<a href="https://colab.research.google.com/github/Rajeevku1/AIAgentSamples/blob/main/chatusinggradio.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os
import requests
import gradio as gr
from IPython.display import Markdown, display, update_display
from openai import OpenAI
from google.colab import userdata
import json
import math


In [None]:
openai_api_key = userdata.get('OPENAI_API_KEY')
anthropic_api_key = userdata.get('ANTHROPIC_API_KEY')
TAVILY_API_KEY = userdata.get('TAVILY_API_KEY')

# Initialize OpenAI clients
if openai_api_key:
    openai_client = OpenAI(api_key=openai_api_key)
else:
    openai_client = None

anthropic_url = "https://api.anthropic.com/v1/"
if anthropic_api_key:
    anthropic_client = OpenAI(api_key=anthropic_api_key, base_url=anthropic_url)
else:
    anthropic_client = None

In [None]:
system_message = "You are a helpful assistant that responds in markdown without code blocks"
mean_confidence =0

In [None]:
def stream_claude(prompt):
    messages = [
        {"role": "system", "content": system_message},
        {"role": "user", "content": prompt}
      ]
    stream = anthropic_client.chat.completions.create(
        model="claude-sonnet-4-5-20250929",
        messages=messages,
        stream=True
    )
    result = ""
    for chunk in stream:
        result += chunk.choices[0].delta.content or ""
    return result

In [None]:
def stream_model(prompt, model):
    if model=="GPT":
        result = stream_gpt(prompt, model)
    elif model=="Claude":
        result = stream_claude(prompt)
    elif model=="GPT Reasoning":
        result = stream_gpt(prompt, model)
    elif model=="GPT Internet":
        result = stream_gpt(prompt, model)
    else:
        raise ValueError("Unknown model")
    return result

In [None]:
def stream_gpt(prompt,model):
    if not openai_client:
        return "Error: OpenAI API key not set"

    system_message = "You are a helpful assistant can use internet to answer queries"
    messages = [{"role": "system", "content": system_message}, {"role": "user", "content": prompt}]
    if model=="GPT Reasoning":
        response = openai_client.chat.completions.create(model="gpt-5-mini", messages=messages, reasoning_effort="minimal" )
        # For 'GPT Reasoning', logprobs are not requested, so confidence will not be calculated.
        #return response.choices[0].message.content
    elif model=="GPT Internet":
        promptForInternet = ask_with_internet(prompt)
        messages = [{"role": "user", "content": promptForInternet},{"role": "system", "content": system_message}]
        #print(messages)
        response = openai_client.chat.completions.create(model="gpt-4o-mini", messages=messages, logprobs=True, top_logprobs=1  )
        # For 'GPT Internet', logprobs are not requested, so confidence will not be calculated.
        #return response.choices[0].message.content
    else: # This applies to "GPT" model
        response = openai_client.chat.completions.create(model="gpt-4", messages=messages, logprobs=True, top_logprobs=1 )

    token_logprob_objects = response.choices[0].logprobs.content
    token_logprobs_values = [item.logprob for item in token_logprob_objects if item.logprob is not None]
    token_probs = [math.exp(lp) for lp in token_logprobs_values]
    if token_probs:
        mean_confidence = sum(token_probs)/len(token_probs)
    else:
        mean_confidence = 0 # Handle case where no logprobs are available
    return response.choices[0].message.content + f"\n\nMean confidence: {mean_confidence}"

In [None]:
def chatWithAIusingGrido():
    # Let's use Markdown
    # Are you wondering why it makes any difference to set system_message when it's not referred to in the code below it?
    # I'm taking advantage of system_message being a global variable, used back in the message_gpt function (go take a look)
    # Not a great software engineering practice, but quite common during Jupyter Lab R&D!

    system_message = "You are a helpful assistant that responds in markdown without code blocks"
    message_input = gr.Textbox(label="Your message:", info="Enter a message", lines=7)
    model_Name  = gr.Dropdown(["GPT", "GPT Reasoning","Claude", "GPT Internet"],label="Select model", value="GPT")
    message_output = gr.Textbox(label="Response:", lines=12)

    view = gr.Interface(
        fn=stream_model,
        title="AI Chat Bot",
        inputs=[message_input,model_Name],
        outputs=[message_output],
        examples=[
            ["Explain the Transformer architecture to a layperson", "GPT" ],
            ["Explain the Transformer architecture to an aspiring AI engineer", "Claude"]
            ],
        flagging_mode="never"
        )
    view.launch(inbrowser=True, debug =True)

In [None]:
def tavily_search(query):
    url = "https://api.tavily.com/search"
    payload = {
        "api_key": TAVILY_API_KEY,
        "query": query,
        "max_results": 3
    }
    return requests.post(url, json=payload).json()



In [None]:
def ask_with_internet(question):
    # Step 1: Search
    results = tavily_search(question)

    # Step 2: Extract text
    context = "\n".join([r["content"] for r in results["results"]])

    # Step 3: Ask LLM with context
    prompt = f"""
    Use the following web search results to answer the question.

    Search Results:
    {context}

    Question: {question}

    Answer using the search results.
    """
    #print(prompt)
    return prompt


In [None]:
#prompt = ask_with_internet("who is president of USA")
response = stream_gpt(prompt="who is president of USA", model="GPT")
print(response)

In [None]:
chatWithAIusingGrido()

It looks like you are running Gradio on a hosted Jupyter notebook, which requires `share=True`. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://aece3101dbb28580a7.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


Traceback (most recent call last):
  File "/usr/local/lib/python3.12/dist-packages/gradio/queueing.py", line 759, in process_events
    response = await route_utils.call_process_api(
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/gradio/route_utils.py", line 354, in call_process_api
    output = await app.get_blocks().process_api(
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/gradio/blocks.py", line 2191, in process_api
    result = await self.call_function(
             ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/gradio/blocks.py", line 1698, in call_function
    prediction = await anyio.to_thread.run_sync(  # type: ignore
                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/anyio/to_thread.py", line 63, in run_sync
    return await get_async_backend().run_sync_in_worker_thread(
           ^^^^^