In [None]:
import os
import openai
import google.generativeai as genai
import anthropic
import requests
import ollama
import gradio as gr

In [None]:
# Set up keys (you can load from env or file securely)
openai.api_key = os.getenv("OPENAI_API_KEY")
genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))

In [None]:

# OPENAI FUNCTIONS
def ask_openai(message):
    response = openai.chat.completions.create(
        model="gpt-4o-mini",  # or "gpt-4"
        messages=[{"role": "user", "content": message}]
    )
    return response.choices[0].message.content

def stream_openai(message, history):
    messages = [{"role": "system", "content": "You are a helpful assistant."}]
    for user, bot in history:
        messages.extend([
            {"role": "user", "content": user},
            {"role": "assistant", "content": bot}
        ])
    messages.append({"role": "user", "content": message})

    response = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        messages=messages,
        stream=True,
    )

    collected = ""
    for chunk in response:
        if chunk.choices and chunk.choices[0].delta.get("content"):
            token = chunk.choices[0].delta["content"]
            collected += token
            yield history + [[message, collected]]

In [None]:
# GEMINI FUNCTIONS
GEMINI_MODEL = "gemini-2.0-flash"
def ask_gemini(message):
    model = genai.GenerativeModel(GEMINI_MODEL)
    chat = model.start_chat(history=[])
    response = chat.send_message(message)
    response.resolve()
    return response.text

def stream_gemini(message, history):
    model = genai.GenerativeModel(GEMINI_MODEL)
    chat = model.start_chat(history=[])
    for user, bot in history:
        chat.history.append({"role": "user", "parts": [{"text": user}]})
        chat.history.append({"role": "model", "parts": [{"text": bot}]})

    stream = chat.send_message(message, stream=True)
    collected = ""
    for chunk in stream:
        collected += chunk.text
        yield history + [[message, collected]]

In [None]:
# ANTHROPIC FUNCTIONS
def ask_anthropic(message):
    client = anthropic.Anthropic(api_key=os.getenv("ANTHROPIC_API_KEY"))
    response = client.messages.create(
        model="claude-3-opus-20240229",
        max_tokens=512,
        messages=[{"role": "user", "content": message}]
    )
    return response.content[0].text

def stream_anthropic(message, history):
    messages = [{"role": "user", "content": "You are a helpful assistant."}]
    for user, bot in history:
        messages.extend([
            {"role": "user", "content": user},
            {"role": "assistant", "content": bot}
        ])
    messages.append({"role": "user", "content": message})

    response = anthropic_client.messages.create(
        model="claude-3-sonnet-20240229",
        messages=messages,
        max_tokens=1024,
        stream=True,
    )

    collected = ""
    for chunk in response:
        if chunk.type == "content_block_delta":
            token = chunk.delta.text
            collected += token
            yield history + [[message, collected]]

In [None]:
# OLLAMA FUNCTIONS
HEADERS = {"Content-Type": "application/json"}
def ask_ollama(message):
    print("Asking Ollama:", message)
    res = requests.post(
        "http://localhost:11434/api/chat",
        json={"model": "llama3.2", "messages": [{"role": "user", "content": message}], "headers": HEADERS, "stream": False}
    )
    print("Ollama response:", res.json())
    print(res.json()["message"]["content"])
    return res.json()["message"]["content"]

def stream_ollama(message, history):
    messages = [{"role": "system", "content": "You are a helpful assistant."}]
    for user, assistant in history:
        messages += [{"role": "user", "content": user}, {"role": "assistant", "content": assistant}]
    messages.append({"role": "user", "content": message})

    # response = requests.post(
    #     "http://localhost:11434/api/chat",
    #     json={"model": "llama3.2", "messages": messages, "stream": True},
    #     stream=True,
    # )

    print("Streaming Ollama:", messages)

    response = ollama.chat(
        model="llama3.2",
        messages= messages,
        stream=True,
    )

    collected = ""
    for chunk in response:
        token = chunk["message"]["content"]
        collected += token
        yield history + [[message, collected]]

In [None]:
# Master response handler
def response(message, history, model_choice):
    try:
        if model_choice == "OpenAI":
            return ask_openai(message)
        elif model_choice == "Gemini":
            return ask_gemini(message)
        elif model_choice == "Anthropic":
            return ask_anthropic(message)
        elif model_choice == "Ollama":
            return ask_ollama(message)
        else:
            return "Model not supported."
    except Exception as e:
        return f"⚠️ Error: {e}"
    
# for streaming responses
def handle_message(message, history, model_choice):
    if not message.strip():
        yield history + [["-", "Please enter a message"]], history
    generators = {
        "OpenAI": stream_openai,
        "Gemini": stream_gemini,
        "Anthropic": stream_anthropic,
        "Ollama": stream_ollama,
    }

    generator = generators.get(model_choice)
    if not generator:
        yield history + [[message, "❌ Invalid model selected"]], history

    final_response = ""
    for updated_history in generator(message, history):
        final_response = updated_history[-1][1]
        yield updated_history, updated_history  # Update both chatbot + state

In [None]:
# Simple Gradio Interface without streaming
with gr.Blocks() as demo:
    model_dropdown = gr.Dropdown(choices=["OpenAI", "Gemini", "Anthropic", "Ollama"],
                                  value="Gemini", label="Select Model")
    chatbot = gr.Chatbot()
    msg = gr.Textbox(placeholder="Ask a question...")
    send_btn = gr.Button("Send")

    history = gr.State([])

    def chat_flow(message, chat_history, model_choice):
        response_text = response(message, chat_history, model_choice)
        chat_history.append((message, response_text))
        return chat_history, chat_history

    send_btn.click(chat_flow, inputs=[msg, history, model_dropdown], outputs=[chatbot, history])
    msg.submit(chat_flow, inputs=[msg, history, model_dropdown], outputs=[chatbot, history])

demo.launch(debug=True)

In [None]:
# Gradio with streaming
with gr.Blocks() as demo:
    gr.Markdown("# 🤖 Multi-LLM Chatbot with Streaming + Markdown")
    gr.Markdown("Select a model below and chat with Gemini, GPT, Claude, or Ollama!")

    model_selector = gr.Dropdown(
        choices=["OpenAI", "Gemini", "Anthropic", "Ollama"],
        value="Gemini",
        label="Choose a Model"
    )

    chatbot = gr.Chatbot(render_markdown=True, avatar_images=("https://cdn-icons-png.flaticon.com/512/9131/9131478.png","https://cdn-icons-png.flaticon.com/512/13330/13330989.png"))
    msg = gr.Textbox(placeholder="Type your message here...", show_label=False)
    send = gr.Button("Send", variant="huggingface")
    clear = gr.Button("Clear Chat", variant="stop")
    state = gr.State([])
    send.click(handle_message, [msg, state, model_selector], [chatbot, state], concurrency_limit=1)
    msg.submit(handle_message, [msg, state, model_selector], [chatbot, state], concurrency_limit=1)

    def reset():
        return [], []

    clear.click(reset, None, [chatbot, state])

demo.launch(debug=True)
