In [7]:
import os
import json
import requests

class Singleton(type):
    _instances = {}
    def __call__(cls, *args, **kwargs):
        if cls not in cls._instances:
            cls._instances[cls] = super(Singleton, cls).__call__(*args, **kwargs)
        return cls._instances[cls]

class llm_wrap(metaclass=Singleton):
    def __init__(self, config_file="ollama_config.json"):
        self.loaded = False
        self.config_data = self.load_config(config_file)
        self.load_response = self.load_model(self.config_data["LLM"])
        if self.load_response.status_code == 200: self.loaded = True

    def load_config(self, config_file):
        """
        Loads environment-variable-like keys from a JSON file and sets them.
        """
        with open(config_file, 'r') as f:
            config_data = json.load(f)
        for key, value in config_data.items():
            os.environ[key] = str(value)
        return config_data

    def load_model(self, model_name: str):
        """
        Instantiates an Ollama client for a given model.
        Because environment variables are already set,
        Ollama respects those concurrency/queue settings.
        """
        url = self.config_data["LOAD_MODEL_API_PATH"]
        payload = {
            "model": model_name,
            "stream": False  
        }

        response = requests.post(url, json=payload)
        print("Status code:", response.status_code)
        print("Response:", response.text)

        return response
    
    def process_prompt(self, prompt: str, chat = True):
        """
        Sends a prompt to the provided Ollama client and prints the 
        streamed output in real-time.
        """
        if self.loaded:
            url = self.config_data["CHAT_API_PATH"] if chat else self.config_data["GENERATE_API_PATH"]
            payload = {
                        "model": self.config_data["LLM"],
                        "messages": [
                            {
                            "role": "user",
                            "content": prompt
                            }
                        ],
                        "stream": True,
                        "options": {
                                    "temperature": self.config_data["TEMPERATURE"]
                                    }
                        }
            response = requests.post(url, json=payload, stream=True)
            self.complete_message = ""
            for line in response.iter_lines(decode_unicode=True):
                if line: 
                    try:
                        chunk = json.loads(line)
                        print(chunk)
                        if chunk["done"]:
                            return self.complete_message
                        else:
                            self.complete_message += chunk["message"]["content"]
                            yield self.complete_message
                    except json.JSONDecodeError as e:
                        print("Could not decode chunk:", line)
                        return False

            # return response
        else:
            print("Failed to load the model")


In [None]:
for response in llm_wrap().process_prompt("think and say something"):
    # print(response)
    pass

In [None]:
import gradio as gr
import random
import time

with gr.Blocks() as demo:
    chatbot = gr.Chatbot(type="messages")
    msg = gr.Textbox()
    clear = gr.ClearButton([msg, chatbot])

    def respond(message, chat_history):
        bot_message = random.choice(["How are you?", "Today is a great day", "I'm very hungry"])
        chat_history.append({"role": "user", "content": message})
        chat_history.append({"role": "assistant", "content": bot_message})
        time.sleep(2)
        return "", chat_history

    msg.submit(respond, [msg, chatbot], [msg, chatbot])

demo.launch()

In [1]:
import numpy as np
import gradio as gr

notes = ["C", "C#", "D", "D#", "E", "F", "F#", "G", "G#", "A", "A#", "B"]

def generate_tone(note, octave, duration):
    sr = 48000
    a4_freq, tones_from_a4 = 440, 12 * (octave - 4) + (note - 9)
    frequency = a4_freq * 2 ** (tones_from_a4 / 12)
    duration = int(duration)
    audio = np.linspace(0, duration, duration * sr)
    audio = (20000 * np.sin(audio * (2 * np.pi * frequency))).astype(np.int16)
    return sr, audio

demo = gr.Interface(
    generate_tone,
    [
        # gr.Dropdown(notes, type="index"),
        # gr.Slider(4, 6, step=1),
        # gr.Textbox(value="1", label="Duration in seconds"),
        gr.Audio()
    ],
    "audio",
)
if __name__ == "__main__":
    demo.launch()

  from .autonotebook import tqdm as notebook_tqdm


* Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.


In [None]:
import gradio as gr
from gradio import ChatMessage
from prompt_handler import handle_prompt
import gradio as gr

class SingletonMeta(type):
    _instances = {}

    def __call__(cls, *args, **kwargs):
        if cls not in cls._instances:
            cls._instances[cls] = super(SingletonMeta, cls).__call__(*args, **kwargs)
        return cls._instances[cls]

class ChatInterface(metaclass=SingletonMeta):
    def __init__(self):
        self.demo = gr.ChatInterface(
            chat_function,
            title="Verbal Communication Skills Trainer 💪",
            type="messages",
            multimodal=True,
            textbox=gr.MultimodalTextbox(sources=["microphone", "upload"]),
            # additional_inputs= gr.Audio()
        )
        

    def launch_interface(self):
        self.demo.launch()


def chat_function(message, history):
    # print(history)
    # print("#######################")
    # print(message)

    response = [ChatMessage(
        content="",
        metadata={}
    )]

    stage = "think"
    for llm_response in handle_prompt(message, chat = True).get_llm_output():
        llm_response, stage = process_string(llm_response, stage)

        if stage=="think":
            response[-1].content = llm_response
            response[-1].metadata = {"title": "Evaluating", "id": 0, "status": "pending"}
        elif stage=="transition":
            response[-1].content = llm_response
            response[-1].metadata["status"] = "done"
            response[-1].metadata["thought_len"] = len(llm_response)
            response.append(ChatMessage(content=""))
        else:
            response[-1]=ChatMessage(content=llm_response[response[-2].metadata["thought_len"]:])
        yield response

def process_string(s, stage):
    if s.startswith("<think>") and "</think>" not in s: stage = "think"
    elif stage == "think": stage = "transition"
    else: stage = "respond"
    
    s = s.replace("<think>", "")
    s = s.replace("</think>", "")
    
    return s, stage


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
ChatInterface().launch_interface()

* Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.


In [2]:
import gradio

with gradio.Blocks() as interface:
    recorder = gradio.Audio(sources='microphone', type='filepath', visible=False)
    action_btn = gradio.Button('Start')
    def next_line(action, _):
        if action == 'Start':
            return {action_btn: 'Next', recorder: gradio.update(visible=True)}
        else:
            return {action_btn: 'Done', recorder: gradio.update(visible=False)}
    action_btn.click(next_line, inputs=[action_btn, recorder], outputs=[action_btn, recorder])
interface.launch(share=True)

* Running on local URL:  http://127.0.0.1:7861
* Running on public URL: https://2c71bd42bb272cdf11.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


