# Gradio Day!

Today we will build User Interfaces using the outrageously simple Gradio framework.

Prepare for joy!

Please note: your Gradio screens may appear in 'dark mode' or 'light mode' depending on your computer settings.

In [1]:
# imports
import os
import requests
from bs4 import BeautifulSoup
from typing import List
from dotenv import load_dotenv
from openai import OpenAI
import google.generativeai
import anthropic
import gradio as gr # oh yeah!

In [2]:
# Load environment variables in a file called .env
load_dotenv()

# Connect to OpenAI, Anthropic and Google; comment out the Claude or Google lines if you're not using them
openai = OpenAI()
claude = anthropic.Anthropic()
google.generativeai.configure()

class LLM_Model:
    api_key: str
    model: str
    base_url: str
    openai: OpenAI

    def __init__(self, key, model, url):
        self.api_key_name = key
        self.model = model
        self.base_url = url
        self.api_key = os.getenv(key)
        if not self.api_key:
            raise ValueError ("No API key was found - please head over to the troubleshooting notebook in this folder to identify & fix!")
        self.openai = OpenAI(base_url=url, api_key=self.api_key)

    def getResponse(self, messages, streaming=False, tools=None):
        response = self.openai.chat.completions.create(model=self.model, messages=messages, stream=streaming, tools=tools)
        if streaming == False:
            return response.choices[0].message.content
        else:
            result = ""
            for chunk in response:
                result += chunk.choices[0].delta.content or ""
                yield result
        
deepseek = LLM_Model("DEEPSEEK_API_KEY", "deepseek-chat", "https://api.deepseek.com")
deepseekR1 = LLM_Model("DEEPSEEK_API_KEY", "deepseek-reasoner", "https://api.deepseek.com")
llama = LLM_Model("OPENAI_API_KEY", "llama3.2", "http://localhost:11434/v1")
gpt_4o = LLM_Model("OPENAI_API_KEY", "gpt-4o-mini", "https://api.openai.com/v1")
gpt_o3 = LLM_Model("OPENAI_API_KEY", "o3-mini", "https://api.openai.com/v1")
qwen = LLM_Model("QWEN_API_KEY", "qwen-max-2025-01-25", "https://dashscope-intl.aliyuncs.com/compatible-mode/v1")

llms = {"DeepSeek": deepseek, "DeepSeek R1": deepseekR1, "Llama": llama, "ChatGpt": gpt_4o, "Qwen": qwen}


# A generic system message - no more snarky adversarial AIs!
system_message = "You are a helpful assistant"

In [3]:
# Let's create a call that streams back results
# If you'd like a refresher on Generators (the "yield" keyword),
# Please take a look at the Intermediate Python notebook in week1 folder.

def stream_openai(prompt, model):
    messages = [
        {"role": "system", "content": system_message},
        {"role": "user", "content": prompt}
      ]
    return model.getResponse(messages, True)


In [4]:
def stream_claude(prompt):
    result = claude.messages.stream(
        model="claude-3-haiku-20240307",
        max_tokens=1000,
        temperature=0.7,
        system=system_message,
        messages=[
            {"role": "user", "content": prompt},
        ],
    )
    response = ""
    with result as stream:
        for text in stream.text_stream:
            response += text or ""
            yield response

In [5]:
dict_models = {
    "GPT-4o-mini": gpt_4o,
    "GPT-o3-mini": gpt_o3,
    "QWen-2.5-max": qwen,
    "DeepSeekV3": deepseek,
    "DeepSeekR1": deepseekR1
}

def stream_model(prompt, model):
    if dict_models[model]:
        result = stream_openai(prompt, dict_models[model])
    else:
        raise ValueError("Unknown model")
    yield from result

In [None]:
view = gr.Interface(
    fn=stream_model,
    inputs=[
        gr.Textbox(label="Your message:", lines=6), 
        gr.Dropdown(dict_models.keys(), 
                    label="Select model", 
                    value="GPT-4o-mini")],
    outputs=[gr.Markdown(label="Response:")],
    flagging_mode="never"
)
view.launch(share=True, )

In [36]:
from langchain_openai import ChatOpenAI
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationChain

# set up the conversation memory for the chat
memory = ConversationBufferMemory()

# create a new Chat with OpenAI
dict_models2 = {
    "GPT-4o-mini": ConversationChain(memory = memory,
                                     llm=ChatOpenAI(model=gpt_4o.model, disable_streaming=False, streaming=True, api_key=gpt_4o.api_key, base_url=gpt_4o.base_url)),
    "GPT-o3-mini": ConversationChain(memory = memory,
                                     llm=ChatOpenAI(model=gpt_o3.model, disable_streaming=False, api_key=gpt_o3.api_key, base_url=gpt_o3.base_url)),
    "QWen-2.5-max": ConversationChain(memory = memory,
                                      llm=ChatOpenAI(model=qwen.model, disable_streaming=False, api_key=qwen.api_key, base_url=qwen.base_url)),
    "DeepSeekV3": ConversationChain(memory = memory,
                                    llm=ChatOpenAI(model=deepseek.model, disable_streaming=False, api_key=deepseek.api_key, base_url=deepseek.base_url)),
    "DeepSeekR1": ConversationChain(memory = memory,
                                    llm=ChatOpenAI(model=deepseekR1.model, disable_streaming=False, api_key=deepseekR1.api_key, base_url=deepseekR1.base_url))
}

conversation = dict_models2["GPT-4o-mini"]

def chat(message):
    result = conversation.predict(input=message)

    return result

# Define a function to handle streaming responses
def chat_with_ai(user_input, history, model):
    conversation = dict_models2[model]
    history = history or []  # Initialize history if None
    response = ""
    for chunk in conversation.stream(input=user_input):
        response += chunk["response"]
        yield history + [(user_input, response)]


In [None]:
# Gradio Interface
with gr.Blocks() as demo:
    gr.Markdown("# LangChain Chatbot with Streaming")
    chatbot = gr.Chatbot(label="Conversation")
    user_input = gr.Textbox(label="Your Message")
    models = gr.Dropdown(dict_models.keys(), 
                    label="Select model", 
                    value="GPT-4o-mini")
    clear_button = gr.Button("Clear")

    # Chat function
    def respond(user_input, history, model):
        # Call the chat_with_ai function and yield updates
        for updated_history in chat_with_ai(user_input, history, model):
            yield updated_history, ""  # Clear the input field after submission

    user_input.submit(
        respond,  # Function to call
        [user_input, chatbot, models],  # Inputs
        [chatbot, user_input],  # Outputs (update both chatbot and streaming response)
        show_progress=True,  # Show progress while streaming
    )
    clear_button.click(lambda: None, None, chatbot, queue=False)  # Clear chat history

# Launch the Gradio app
demo.queue().launch()