# Gradio UI for using local LLMs

In [1]:
import gradio as gr
import ollama

  from .autonotebook import tqdm as notebook_tqdm


In [22]:
MODEL = "llama3.2"
available_models = [model["model"] for model in ollama.list()["models"]]

In [3]:
def message(prompt):
    messages = [
        {
            "role": "system",
            "content": "You are a helpful assistant that responds in markdown",
        },
        {"role": "user", "content": prompt},
    ]
    response = ollama.chat(model=MODEL, messages=messages)
    return response["message"]["content"]

In [24]:
def stream_llama(prompt, model):
    messages = [
        {
            "role": "system",
            "content": "You are a helpful assistant that responds in markdown",
        },
        {"role": "user", "content": prompt},
    ]
    stream = ollama.chat(model=model, messages=messages, stream=True)
    result = ""
    for chunk in stream:
        result += chunk["message"]["content"]
        yield result

In [25]:
gr.Interface(
    fn=stream_llama,
    inputs=[
        gr.Textbox(label="Message"),
        gr.Dropdown(available_models, label="Select Model"),
    ],
    outputs=gr.Markdown(label="Response"),
    flagging_mode="never",
).launch()

* Running on local URL:  http://127.0.0.1:7866
* To create a public link, set `share=True` in `launch()`.


