# Personal ChatBot Assistant

Here I built a tool that takes a question and responds with an explanation.

Is it possible selecting whether using a local running model (llama 3.2) or calling APIs (4o-mini or claude-3-haiku).

Gradio UI usage makes the tool more user-friendly.

In [1]:
# imports
from dotenv import load_dotenv
import os
from openai import OpenAI
import ollama
from IPython.display import Markdown, display
import gradio as gr
import anthropic

In [2]:
# Constants
MODEL_GPT = 'gpt-4o-mini'
MODEL_LLAMA = 'llama3.2'
MODEL_ANTHROPIC = 'claude-3-haiku-20240307'

In [3]:
# Set up environment
load_dotenv(override=True)
openai_api_key = os.getenv('OPENAI_API_KEY')
anthropic_api_key = os.getenv('ANTHROPIC_API_KEY')
list_available = ["LLaMA (local run)"]
if openai_api_key:
    print("OpenAI API Key found")
    openai = OpenAI()
    list_available.append("GPT")
else:
    print("OpenAI API Key not set")
    
if anthropic_api_key:
    print("Anthropic API Key found")
    claude = anthropic.Anthropic()
    list_available.append("Claude")
else:
    print("Anthropic API Key not set")



OpenAI API Key found
Anthropic API Key found


In [4]:
# This function returns the messages template
def get_messages(system_prompt, history, user_prompt):
    return [{"role": "system", "content": system_prompt}] + history + [{"role": "user", "content": user_prompt}]

In [5]:
# This function calls OpenAI API
def openai_stream(system_prompt, history, user_prompt):   
    stream = openai.chat.completions.create(
        model=MODEL_GPT,
        messages=get_messages(system_prompt, history, user_prompt),
        stream=True
    )
    response = ""
    for chunk in stream:
        response += chunk.choices[0].delta.content or ""
        yield response

In [21]:
def claude_stream(system_prompt, history, user_prompt):
    formatted_history = []
    for el in history:
        formatted_history.append({"role": el["role"], "content": el["content"]})
    result = claude.messages.stream(
        model=MODEL_ANTHROPIC,
        max_tokens=1000,
        temperature=0.7,
        system=system_prompt,
        messages= formatted_history + [{"role": "user", "content": user_prompt}],
    )
    response = ""
    with result as stream:
        for text in stream.text_stream:
            response += text or ""
            yield response

In [22]:
# This function queries the local running model
def ollama_stream(system_prompt, history, user_prompt):
    stream = ollama.chat(model=MODEL_LLAMA, messages=get_messages(system_prompt, history, user_prompt), stream=True)
    response = ""
    for chunk in stream:
        response += chunk['message']['content'] or ""
        yield response

In [69]:
def chat(user_prompt, history, model):
    system_prompt = "You are a powerful assistant that takes questions and responds with an explanation. You are provided with questions from a user. Respond in markdown. In case of mathematical formulations, write the formula in latex using the $ characters."
    if model == "GPT":
        result = openai_stream(system_prompt, history, user_prompt)
    elif model == "Claude":
        result = claude_stream(system_prompt, history, user_prompt)
    else:
        result = ollama_stream(system_prompt, history, user_prompt)
    yield from result

In [70]:
with gr.Blocks() as demo:
    # Full-width top bar with Dropdown
    with gr.Row():
        dropdown = gr.Dropdown(
            list_available,
            label="Select Model",
            value="LLaMA (local run)",
            interactive=True
        )

    # ChatInterface below
    view = gr.ChatInterface(
        fn=chat,
        additional_inputs=[dropdown],
        chatbot=gr.Chatbot(latex_delimiters=[{"left":"$$", "right": "$$", "display":True}, {"left":"$", "right": "$", "display":False}]),
        type="messages"
    )

demo.launch()



* Running on local URL:  http://127.0.0.1:7905

To create a public link, set `share=True` in `launch()`.


