In [2]:
import os
from dotenv import load_dotenv
import ollama
import gradio as gr

In [None]:
system_message = "You are a helpful assistant who gives precise and to the point answers"

In [9]:
def message_llm(prompt):
    messages = [
        {"role": "system", "content": system_message},
        {"role": "user", "content": prompt}
    ]

    response = ollama.chat(
        model = 'llama3.2',
        messages = messages
    )
    return response.message.content

In [None]:
# Adding inbrowser=True in launch() opens up a new browser window automatically
# Adding share=True in launch() means that it can be accessed publically

view = gr.Interface(
    fn = message_llm,
    inputs = [gr.Textbox(label = "Enter your message here:", lines = 6)],
    outputs = [gr.Textbox(label = "Response:", lines = 10)],
    flagging_mode = "never"
)
view.launch(inbrowser=True, share=True)

Let's add the streaming output like ChatGPT in the gradio response

In [12]:
from openai import OpenAI

openai = OpenAI(base_url="http://127.0.0.1:11434/v1", api_key="ollama") 

In [None]:
def set_system_message(tone):
    return f"You are a helpful assistant that gives precise outputs in markdown based on a {tone} tone"

def stream_llm(prompt, tone):
    messages = [
        {"role": "system", "content": set_system_message(tone)},
        {"role": "user", "content": prompt}
    ]

    stream = openai.chat.completions.create(
        model = "llama3.2",
        messages = messages,
        temperature = 0.7,
        stream = True
    )

    result = ""
    for chunk in stream:
        result += chunk.choices[0].delta.content or ""
        yield result

In [None]:
view = gr.Interface(
    fn = stream_llm,
    inputs = [gr.Textbox(label="Your message: "), gr.Dropdown(["Formal", "Informal", "Professional", "Friendly", "Encouraging", "Humorous", "Sarcastic"], label="Select tone", value="Sarcastic")],
    outputs = [gr.Markdown(label="Response: ")],
    flagging_mode = "never"
)
view.launch()

In [28]:
def chat(history):
    messages = [{"role": "system", "content": "You are a repeating assistant"}] + history
    # response = openai.chat.completions.create(model=MODEL, messages=messages)
    reply = f"{history[-1].get('content')}" # response.choices[0].message.content
    history += [{"role": "assistant", "content": reply}]
    return history

with gr.Blocks() as ui:
    with gr.Row():
        chatbot = gr.Chatbot(height=500, type="messages")
    with gr.Row():
        entry = gr.Textbox(label="Chat with the assistant: ")
    with gr.Row():
        audio_input = gr.Audio(type="filepath", label="Speak your question")
    with gr.Row():
        clear = gr.Button("Clear")
    
    def do_entry(message, history):
        history += [{"role":"user", "content":message}]
        return "", history

    entry.submit(do_entry, inputs=[entry, chatbot], outputs=[entry, chatbot]).then(
        chat, inputs=chatbot, outputs=chatbot
    )

    clear.click(lambda:None, inputs=None, outputs=chatbot, queue=False)

ui.launch()

* Running on local URL:  http://127.0.0.1:7882

To create a public link, set `share=True` in `launch()`.


