# Multi-Model Streaming Response Interface OpenAI, Claude, Gemini, Ollama con Gradio

In [1]:
from openai import OpenAI
import google.generativeai
import anthropic
import ollama
from dotenv import load_dotenv
import os
import gradio as gr

  from .autonotebook import tqdm as notebook_tqdm


## Configure api connections

In [2]:
load_dotenv()
openai  = os.getenv("OPENAI_API_KEY")
claude  = os.getenv("ANTHROPIC_API_KEY")
gemini  = os.getenv("GOOGLE_API_KEY")

openai_model = "gpt-4o-mini"
claude_model = "claude-3-haiku-20240307"
gemini_model = "gemini-1.5-flash"
ollama_model = "llama3.2"

openai = OpenAI()
claude = anthropic.Anthropic()
gemini = google.generativeai.configure()

## Multi model response (streaming mode)

In [3]:
system_prompt = f"""You are a chat that responds in a very friendly tone"""

def stream_openai(user_prompt):
    """
    Function to generate a response from OpenAI model based on the provided user prompt.
    
    Args:
    user_prompt (str): The user's input to be processed by OpenAI.

    Returns:
    str: The generated response from OpenAI model in a streaming fashion.
    """
    # Create a streaming response from OpenAI chat completions
    stream = openai.chat.completions.create(
        model    = openai_model,  # Specify the model to use
        messages = [
            {"role": "system", "content": system_prompt},  # System message to set the behavior
            {"role": "user", "content": user_prompt}  # User message containing the user's input
        ],
        stream = True  # Enable streaming mode
    )
    
    ans = ""
    # Loop through each chunk of the stream and accumulate the response
    for chunk in stream:
        ans += chunk.choices[0].delta.content or ""  # Append the content of each chunk to the response
        yield ans  # Yield the response as it's being generated

def stream_claude(user_prompt):
    """
    Function to generate a response from Claude model based on the provided user prompt.
    
    Args:
    user_prompt (str): The user's input to be processed by Claude.

    Returns:
    str: The generated response from Claude model in a streaming fashion.
    """
    # Create a streaming response from Claude's model
    stream = claude.messages.stream(
        model  = claude_model,  # Specify the Claude model to use
        system = system_prompt,  # Set the system message to define the assistant's behavior
        messages = [{"role": "user", "content": user_prompt}],  # User's input
        max_tokens = 500,  # Set the maximum number of tokens for the response
    )

    ans = ""
    # Stream the response text
    with stream as chunk:
        for text in chunk.text_stream:
            ans += text or ""  # Append the text of each chunk to the response
            yield ans  # Yield the response as it's being generated

def stream_gemini(user_prompt):
    """
    Function to generate a response from Gemini model based on the provided user prompt.
    
    Args:
    user_prompt (str): The user's input to be processed by Gemini.

    Returns:
    str: The generated response from Gemini model in a streaming fashion.
    """
    # Configure and initialize the Gemini model
    config = google.generativeai.GenerativeModel(
        model_name = gemini_model,  # Specify the Gemini model to use
    )

    prompt = f"{system_prompt}\n{user_prompt}"  # Combine system prompt and user prompt
    stream = config.generate_content(
        prompt,
        stream = True  # Enable streaming mode
    )

    ans = ""
    # Loop through each chunk in the stream and accumulate the response
    for chunk in stream:
        ans += chunk.text or ""  # Append the text of each chunk to the response
        yield ans  # Yield the response as it's being generated

def stream_ollama(user_prompt):
    """
    Function to generate a response from Ollama model based on the provided user prompt.
    
    Args:
    user_prompt (str): The user's input to be processed by Ollama.

    Returns:
    str: The generated response from Ollama model in a streaming fashion.
    """
    # Create a streaming response from Ollama's model
    stream = ollama.chat(
        model = ollama_model,  # Specify the Ollama model to use
        messages = [
            {"role": "system", "content": system_prompt},  # System message to set the behavior
            {"role": "user", "content": user_prompt}  # User message containing the user's input
        ],
        stream = True  # Enable streaming mode
    )

    ans = ""
    # Loop through each chunk and accumulate the response
    for chunk in stream:
        ans += chunk["message"]["content"] or ""  # Append the content of each chunk to the response
        yield ans  # Yield the response as it's being generated

def model(user_prompt, model):
    """
    Function to stream responses from different models based on the user's selection.

    Args:
    user_prompt (str): The user's input to be processed.
    model (str): The selected model to process the input. Can be "gpt", "claude", "gemini", or "ollama".

    Returns:
    str: The generated response from the selected model in a streaming fashion.
    """
    if model == "gpt":
        # Call the OpenAI GPT model for response
        result = stream_openai(user_prompt)
    elif model == "claude":
        # Call the Claude model for response
        result = stream_claude(user_prompt)
    elif model == "gemini":
        # Call the Gemini model for response
        result = stream_gemini(user_prompt)
    elif model == "ollama":
        # Call the Ollama model for response
        result = stream_ollama(user_prompt)
    else:
        # Raise an error if the model is unknown
        raise ValueError("Unknown Model")
    
    # Yield the result in a streaming fashion
    yield from result

## Chat interface

In [4]:
# Create a Gradio interface to interact with the models
view = gr.Interface(
    fn=model,  # The function to call for generating responses
    inputs=[
        gr.Textbox(label="Your message:"),  # Input textbox for the user message
        gr.Dropdown(["gpt", "claude", "gemini", "ollama"], label="Select a model:", value="gpt")  # Dropdown for model selection
    ],
    outputs=[gr.Markdown(label="Response:")]  # Display the model's response as markdown
)

# Launch the interface
view.launch()

--------


Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.


