In [1]:
# ---------------------------------------------------------------------------------------
# Imports
# ---------------------------------------------------------------------------------------
# Importing necessary libraries for environment management, API interactions, 
# and UI creation.

import os
import json
from dotenv import load_dotenv
from openai import OpenAI
import gradio as gr
import anthropic

In [2]:
# ---------------------------------------------------------------------------------------
# Initialization
# ---------------------------------------------------------------------------------------
# Loading environment variables (including the OpenAI API key).
load_dotenv(override=True)

# Retrieve the OpenAI API key from environment variables.
openai_api_key = os.getenv('OPENAI_API_KEY')

# Set the OpenAI model name. This is a custom name "gpt-4o-mini" in this example.
MODEL = "gpt-4o-mini"

# Create an instance of the OpenAI client.
openai = OpenAI()

In [3]:
# ---------------------------------------------------------------------------------------
# System Message Setup
# ---------------------------------------------------------------------------------------
# Defining a system message for the AI assistant, "FlightAI." 
# The assistant should provide short, courteous, and accurate answers.
system_message = "You are a helpful assistant for an Airline called FlightAI. "
system_message += "Give short, courteous answers, no more than 1 sentence. "
system_message += "Always be accurate. If you don't know the answer, say so."

In [4]:
# ---------------------------------------------------------------------------------------
# Ticket Price Retrieval Function
# ---------------------------------------------------------------------------------------
# We have a dictionary that maps various destination cities to a ticket price.
ticket_prices = {"london": "$799", "paris": "$899", "tokyo": "$1400", "berlin": "$499"}

def get_ticket_price(destination_city):
    """
    Retrieves the price for a return ticket to a specified city.

    Args:
        destination_city (str): The city the customer wants to travel to.

    Returns:
        tuple: A tuple containing:
            - (str) The ticket price for the destination city (or 'Unknown' if not found).
            - (str) A formatted response string indicating the price.
    """
    # Print a log to indicate this function is called with the given city.
    print(f"Tool get_ticket_price called for {destination_city}")
    
    # Convert city to lowercase for dictionary lookup.
    city = destination_city.lower()
    
    # Attempt to fetch the ticket price from the predefined dictionary; default to "Unknown" if not found.
    price = ticket_prices.get(city, "Unknown")
    
    # Create a response string to be returned.
    response_str = f'The price for {destination_city} is {price}'
    return price, response_str

In [5]:
# ---------------------------------------------------------------------------------------
# Tool Definition for get_ticket_price
# ---------------------------------------------------------------------------------------
# This dictionary defines the metadata for the 'get_ticket_price' function (tool).
# The assistant can call this function when it needs to know the ticket price.
price_function = {
    "name": "get_ticket_price",
    "description": (
        "Get the price of a return ticket to the destination city. "
        "Call this whenever you need to know the ticket price, "
        "for example when a customer asks 'How much is a ticket to this city?'"
    ),
    "parameters": {
        "type": "object",
        "properties": {
            "destination_city": {
                "type": "string",
                "description": "The city that the customer wants to travel to",
            },
        },
        "required": ["destination_city"],
        "additionalProperties": False
    }
}

In [6]:
# ---------------------------------------------------------------------------------------
# Flight Booking Function
# ---------------------------------------------------------------------------------------
def book_flight(origin, destination, date):
    """
    Books a flight by writing flight details to a .txt file named after the given date.

    Args:
        origin (str): The departure city for the flight.
        destination (str): The arrival city for the flight.
        date (str): The date of the flight in YYYY-MM-DD format.

    Returns:
        tuple: A tuple containing:
            - (str) The destination city
            - (str) The origin city
            - (str) The date of the flight
            - (str) A confirmation string
    """
    # Create or overwrite a text file named with the flight date.
    f = open(f"{date}.txt", "w")
    # Write flight details into that file.
    f.write(f"{origin}, {destination}, {date}")
    f.close()
    # Provide a simple confirmation message.
    response_str = f"The flight for {destination} from {origin} on {date} is booked."
    return destination, origin, date, response_str

In [7]:
# ---------------------------------------------------------------------------------------
# Tool Definition for book_flight
# ---------------------------------------------------------------------------------------
# This dictionary defines the metadata for the 'book_flight' function (tool).
# The assistant can call this function to record flight booking details in a .txt file.
book_flight_function = {
    "name": "book_flight",
    "description": (
        "Books the flight by writing down the flight details in a .txt file. "
        "Call this whenever the user decides to book a flight and provides "
        "the information about the flight destination, flight origin, and flight date."
    ),
    "parameters": {
        "type": "object",
        "properties": {
            "origin": {
                "type": "string",
                "description": "The city that the customer wants to travel from",
            },
            "destination": {
                "type": "string",
                "description": "The city that the customer wants to travel to"
            },
            "date": {
                "type": "string",
                "description": "The date of the flight in YYYY-MM-DD format"
            }
        },
        "required": ["origin", "destination", "date"],
        "additionalProperties": False
    }
}

In [8]:
# ---------------------------------------------------------------------------------------
# Tools Collection
# ---------------------------------------------------------------------------------------
# A list of available tool definitions that the model can invoke.
tools = [
    {"type": "function", "function": book_flight_function},
    {"type": "function", "function": price_function}
]

def call_function(name, args):
    """
    Calls the appropriate function (tool) based on the provided name and arguments.

    Args:
        name (str): The name of the function to call.
        args (dict): The arguments for the function.

    Returns:
        Any: The return value of the called function.
    """
    # Call book_flight if the name matches.
    if name == "book_flight":
        return book_flight(**args)
    # Call get_ticket_price if the name matches.
    if name == "get_ticket_price":
        return get_ticket_price(**args)

In [9]:
# ---------------------------------------------------------------------------------------
# Chat Function (Legacy)
# ---------------------------------------------------------------------------------------
# NOTE: This function is superseded by the "new chat function" below but is kept here 
# for reference/compatibility. 
def chat(message, history):
    """
    Original chat function that takes a user message and conversation history, 
    passes them to the OpenAI model, and returns the assistant's response.
    If the model invokes a tool, calls the relevant function and re-calls the model.
    
    Args:
        message (str): The latest user message.
        history (list): A list of previous messages in the conversation, 
                        where each message is a dict with 'role' and 'content'.
    
    Returns:
        str: The assistant's response message as plain text.
    """
    # Combine system message, conversation history, and user message.
    messages = [{"role": "system", "content": system_message}] + history + [{"role": "user", "content": message}]
    
    # Get initial response from the model, providing tools so it can decide to call them.
    response = openai.chat.completions.create(model=MODEL, messages=messages, tools=tools)
    print(response.choices[0].message.content)
    
    # If the model's finish_reason indicates a tool call, handle tool invocation.
    if response.choices[0].finish_reason == "tool_calls":
        messages.append(response.choices[0].message)
        
        for tool_call in response.choices[0].message.tool_calls:
            # Extract the function name and arguments from the tool call.
            name = tool_call.function.name
            args = json.loads(tool_call.function.arguments)
        
            # Call the corresponding tool function with the parsed arguments.
            result = call_function(name, args)
            
            # Append the tool's result into the conversation as a 'tool' role message.
            messages.append({
                "role": "tool",
                "tool_call_id": tool_call.id,
                "content": str(result)
            })
            print(messages)
            # Re-call the model with the updated messages after tool execution.
            response = openai.chat.completions.create(model=MODEL, messages=messages)
    
    # Return the final assistant response.
    return response.choices[0].message.content

In [10]:
# ---------------------------------------------------------------------------------------
# Anthropic Claude Setup
# ---------------------------------------------------------------------------------------
# Get the Anthropic API key from environment variables.
anthropic_api_key = os.getenv('ANTHROPIC_API_KEY')
# Create an instance of the Anthropic client.
claude = anthropic.Anthropic()

# System message for translation requests to Claude.
translation_system_message = "Translate the sequence into Russian language and return only the translation"   

In [11]:
# ---------------------------------------------------------------------------------------
# New chat function (Updated)
# ---------------------------------------------------------------------------------------
# This new function has similar logic to the old 'chat', but it is consolidated
# and includes clearer variable naming and logic structure.

def chat(message, history):
    """
    New chat function that appends the user's message to history, 
    calls the OpenAI model, checks for any tool calls, 
    executes those tool calls if needed, and returns the final response.

    Args:
        message (str): The latest user message to add to the conversation.
        history (list): The conversation history, 
                        as a list of dicts with 'role' and 'content'.

    Returns:
        str: The assistant's final message content after any tool calls.
    """
    # Prepare the conversation for the model by including the system message at the start.
    messages = [{"role": "system", "content": system_message}] + history + [{"role": "user", "content": message}]
    
    # Create a model completion request.
    response = openai.chat.completions.create(model=MODEL, messages=messages, tools=tools)
    # print("Assistant:", response.choices[0].message.content)
    
    # If the model indicates it wants to call a tool:
    if response.choices[0].finish_reason == "tool_calls":
        # Append the assistant's tool call message to the conversation.
        messages.append(response.choices[0].message)
        
        # Process each tool call
        for tool_call in response.choices[0].message.tool_calls:
            # Get the function name and arguments from the tool call.
            name = tool_call.function.name
            args = json.loads(tool_call.function.arguments)
            
            # Execute the function locally.
            result = call_function(name, args)
            
            # Append the tool's result as a message with 'tool' role.
            messages.append({
                "role": "tool",
                "tool_call_id": tool_call.id,
                "content": str(result)
            })
            
            # Call the model again with the new conversation state (including the tool response).
            response = openai.chat.completions.create(model=MODEL, messages=messages)
    
    # Return the final content from the model.
    return response.choices[0].message.content

In [12]:
# ---------------------------------------------------------------------------------------
# Translation Function for the Right Chat Window
# ---------------------------------------------------------------------------------------
def translation(message, history):
    """
    Given a new message and some conversation history, 
    send a request to the Claude translation endpoint 
    to translate the text into Russian.

    Args:
        message (str): The new message to be translated.
        history (list): The conversation history for context (if needed).

    Returns:
        str: The translated text.
    """
    # If there is prior history, filter out unnecessary fields.
    # This is done to keep only role and content in the messages we pass.
    if history:
        filtered_history = [{'role': msg['role'], 'content': msg['content']} 
                            for msg in history if 'role' in msg and 'content' in msg]
        messages = filtered_history + [{"role": "user", "content": message}]
    else:
        messages = [{"role": "user", "content": message}]

    # Make a request to Claude's API, using a system prompt instructing it to translate the text.
    response = claude.messages.create(
        model="claude-3-5-sonnet-latest",
        max_tokens=200,
        system=translation_system_message,
        messages=messages
    )
    # The response returns a list of message objects in 'content',
    # but we're interested in the text of the first one only.
    return response.content[0].text


In [13]:
# ---------------------------------------------------------------------------------------
# Helper for the Left Chat Interface
# ---------------------------------------------------------------------------------------
def do_chat(user_message, chat_history):
    """
    Helper function for the left chat interface that:
    1) Appends the user's message to the history.
    2) Calls the main 'chat' function to get the assistant's response.
    3) Appends the assistant's response to the history.
    4) Returns an empty string (to clear the user textbox) and the updated history.

    Args:
        user_message (str): The user's latest input.
        chat_history (list): The ongoing conversation history.

    Returns:
        tuple: 
            (str) An empty string to clear the text input in the UI.
            (list) The updated chat history.
    """
    # Call the main chat function to get the assistant's response.
    response = chat(user_message, chat_history)
    # Append the user and assistant messages to the conversation history.
    chat_history.append({"role": "user", "content": user_message})
    chat_history.append({"role": "assistant", "content": response})
    # Return a tuple: the first element clears the input box, the second updates the displayed history.
    return "", chat_history


In [14]:
# ---------------------------------------------------------------------------------------
# Helper for the Right Chat Interface
# ---------------------------------------------------------------------------------------
def do_translation(chat_history, translation_history):
    """
    Takes the entire conversation from the left side and translates 
    both user and assistant messages using the translation function.

    Args:
        chat_history (list): The conversation from the left chat, 
                             each message is a dict with keys 'role' and 'content'.
        translation_history (list): The conversation for the right chat, 
                                    which we will update with translated text.

    Returns:
        list: A new conversation history of translated messages 
              (each with 'role' and 'content').
    """
    translated_history = []

    # Translate each message in the left chat, preserving the role.
    for msg in chat_history:
        # Translate the content to Russian (as instructed in translation_system_message).
        translated_text = translation(msg["content"], [])
        translated_history.append({"role": msg["role"], "content": translated_text})

    # Return the fully translated conversation.
    return translated_history

In [15]:
# ---------------------------------------------------------------------------------------
# Audio Transcription
# ---------------------------------------------------------------------------------------
def transcribe(audio):
    """
    Transcribes an audio file using OpenAI's Whisper model.

    Args:
        audio (str): The file path to the audio file.

    Returns:
        str: The transcribed text, or an error message if the transcription fails.
    """
    try:
        # Open the audio file in binary read mode.
        audio_file = open(audio, 'rb')
        # Use OpenAI's Whisper to create a text transcription.
        response = openai.audio.translations.create(model="whisper-1", file=audio_file)
        
        # Depending on how OpenAI returns the result, handle it:
        if hasattr(response, 'text'):
            return response.text
        if isinstance(response, dict) and 'text' in response:
            return response['text']
        elif isinstance(response, str):
            return response
        else:
            return f"Error: Unexpected response format: {response}"
    except Exception as e:
        # Catch and return any errors.
        return f"Error: {str(e)}"

def record_and_send(audio, chat_history):
    """
    A function used by the UI to record audio, transcribe it, 
    and then pass the transcription to the do_chat function.

    Args:
        audio (str): The file path to the recorded audio file.
        chat_history (list): The current conversation history of the chat.

    Returns:
        tuple:
            - (str) An empty string to clear the text input in the UI.
            - (list) The updated chat history with the transcription appended.
    """
    # Convert the audio to text.
    text = transcribe(audio)
    # Pass the text to the do_chat function, which returns the updated chat history.
    return do_chat(text, chat_history)

In [None]:
# ---------------------------------------------------------------------------------------
# Gradio UI
# ---------------------------------------------------------------------------------------
# Build the Gradio app with two Chatbots (left and right) and an audio recorder.
with gr.Blocks() as demo:
    # Create two chatbots side by side: 
    # Left is the AI Assistant, right is the translated version.
    with gr.Row():
        chatbot = gr.Chatbot(label="AI Assistant (Left)", height=500, type="messages")
        translator_chatbot = gr.Chatbot(label="Translated Output (Right)", height=500, type="messages")
    
    # Create a row containing a text box and an audio recording button.
    with gr.Row():
        entry = gr.Textbox(label="Enter your message:", placeholder="Type your message or use the mic")
        record_button = gr.Audio(type="filepath", label="🎤", elem_id="small-record-button", interactive=True)
    
    # Create a row with a clear button to reset both chat windows.
    with gr.Row():
        clear = gr.Button("Clear")

    # When an audio file is recorded, call record_and_send(...). 
    # The outputs from record_and_send go to (entry, chatbot). 
    # Then chain to do_translation(...), which updates translator_chatbot.
    record_button.change(record_and_send, inputs=[record_button, chatbot], outputs=[entry, chatbot]) \
                 .then(fn=do_translation, inputs=[chatbot, translator_chatbot], outputs=translator_chatbot)

    # When the user submits text, call do_chat(...). 
    # Outputs go to (entry, chatbot). Then chain to do_translation(...).
    entry.submit(fn=do_chat, inputs=[entry, chatbot], outputs=[entry, chatbot]) \
         .then(fn=do_translation, inputs=[chatbot, translator_chatbot], outputs=translator_chatbot)

    # Clear button resets both chatbots' histories to empty lists.
    clear.click(fn=lambda: [], inputs=None, outputs=[chatbot, translator_chatbot], queue=False)

# Launch the Gradio interface in the browser with debug mode enabled.
demo.launch(inbrowser=False, debug=True)

* Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.


Assistant: Hello! How can I assist you today?
Assistant: Sure! Please provide the origin city, destination city, and the date of the flight in YYYY-MM-DD format.


In [None]:
# ---------------------------------------------------------------------------------------
# Alternate Transcribe & Interface
# ---------------------------------------------------------------------------------------
# The code below is an alternative approach to audio transcription 
# and demonstration of a separate Gradio interface.

def transcribe(audio):
    """
    Alternative transcription function that uses OpenAI's Whisper model
    to convert speech audio files to text.

    Args:
        audio (str): The file path to the audio file.

    Returns:
        str: The transcribed text or an error message if something goes wrong.
    """
    try:
        # Open the audio file in binary mode for reading.
        audio_file = open(audio, 'rb')
        # Call the Whisper API specifying the response format as plain text.
        response = openai.audio.transcriptions.create(
            model="whisper-1", 
            file=audio_file, 
            response_format="text"
        )
        return response
    except Exception as e:
        return f"Error: {str(e)}"

# A variable to store the last recorded audio (unused globally in this code, but provided for demonstration).
recorded_audio = None

def record_and_transcribe(audio):
    """
    Records audio from the user, then uses the alternative transcribe function.

    Args:
        audio (str): The file path to the recorded audio file.

    Returns:
        str: The transcribed text from the audio.
    """
    global recorded_audio
    recorded_audio = audio
    return transcribe(audio)

# Create a minimal Gradio interface just for speech-to-text transcription.
iface = gr.Interface(
    fn=record_and_transcribe,
    inputs=gr.Audio(type="filepath", label="Record your voice"),
    outputs="text",
    title="Speech-to-Text Transcription",
    description="Record your voice and get a text transcription.",
    live=True,
)

# Launch the second (minimal) interface.
iface.launch()

In [None]:

##############
# Your new chat function
##############
def chat(message, history):
    """
    New chat function that appends the user message to history, 
    calls the model, checks for tool_calls, and (if needed) 
    calls the relevant tools.
    """
    messages = [{"role": "system", "content": system_message}] + history + [{"role": "user", "content": message}]
    response = openai.chat.completions.create(model=MODEL, messages=messages, tools=tools)
    print("Assistant:", response.choices[0].message.content)
    
    # If the assistant invoked a tool, handle it
    if response.choices[0].finish_reason == "tool_calls":
        # The assistant's tool-call request is appended
        messages.append(response.choices[0].message)
        for tool_call in response.choices[0].message.tool_calls:
            name = tool_call.function.name
            args = json.loads(tool_call.function.arguments)
            result = call_function(name, args)
            
            # Add the tool's response
            messages.append({
                "role": "tool",
                "tool_call_id": tool_call.id,
                "content": str(result)
            })
            # Make another request with the new messages
            response = openai.chat.completions.create(model=MODEL, messages=messages)
    
    # Finally, return just the string content of the model’s last response
    return response.choices[0].message.content

##############
# Translation function for the right chat window
##############
def translation(message, history):
    """
    Given a new message and some conversation history, 
    call your Claude translation endpoint.
    """
    if history:
        # Filter out unnecessary fields from history
        filtered_history = [{'role': msg['role'], 'content': msg['content']} for msg in history if 'role' in msg and 'content' in msg]
        messages = filtered_history + [{"role": "user", "content": message}]
    else:
        # If no prior history, just pass this single message
        messages = [{"role": "user", "content": message}]

    # Example usage of the Claude API
    response = claude.messages.create(
        model="claude-3-5-sonnet-latest",
        max_tokens=200,
        system=translation_system_message,
        messages=messages
    )
    return response.content[0].text  # Correct way to access the response text


##############
# Helper for the left chat interface
##############
def do_chat(user_message, chat_history):
    """
    1) Appends user message to the left chat's history
    2) Invokes chat(...) to get the assistant's reply
    3) Returns updated chat history for display
    """
    # The `chat_history` parameter here is your existing list of dicts 
    # (e.g. [{"role":"user", "content":...}, {"role":"assistant", "content":...}, ...])
    response = chat(user_message, chat_history)
    # Append user and assistant messages to the conversation
    chat_history.append({"role": "user", "content": user_message})
    chat_history.append({"role": "assistant", "content": response})
    # Return two things: (1) empty string to clear the textbox, (2) the updated chat history
    return "", chat_history

##############
# Helper for the right chat interface
##############
def do_translation(chat_history, translation_history):
    """
    Takes the entire conversation from the left (chat_history).
    Translates both user and assistant messages, maintaining the same order.
    """
    translated_history = []

    for msg in chat_history:
        # Translate both user and assistant messages
        translated_text = translation(msg["content"], [])
        # Append the translated message while preserving the role
        translated_history.append({"role": msg["role"], "content": translated_text})

    return translated_history

def transcribe(audio):
    try:
        audio_file = open(audio, 'rb')
        response = openai.audio.translations.create(model="whisper-1", file=audio_file)
        if hasattr(response, 'text'):
            return response.text
        if isinstance(response, dict) and 'text' in response:
            return response['text']
        elif isinstance(response, str):
            return response
        else:
            return f"Error: Unexpected response format: {response}"    
    except Exception as e:
        return f"Error: {str(e)}"    



def record_and_send(audio, chat_history):
    text = transcribe(audio)
    return do_chat(text, chat_history)

##############
# Gradio UI
##############
with gr.Blocks() as demo:
    with gr.Row():
        chatbot = gr.Chatbot(label="AI Assistant (Left)", height=500, type="messages")
        translator_chatbot = gr.Chatbot(label="Translated Output (Right)", height=500, type="messages")
    with gr.Row():
        entry = gr.Textbox(label="Enter your message:", placeholder="Type your message or use the mic")
        record_button = gr.Audio(type="filepath", label="🎤", elem_id="small-record-button", interactive=True)
    with gr.Row():
        clear = gr.Button("Clear")

    record_button.change(record_and_send, inputs=[record_button, chatbot], outputs=[entry, chatbot]).then(fn=do_translation, inputs=[chatbot, translator_chatbot], outputs=translator_chatbot)

    entry.submit(fn=do_chat, inputs=[entry, chatbot], outputs=[entry, chatbot]).then(fn=do_translation, inputs=[chatbot, translator_chatbot], outputs=translator_chatbot)

    clear.click(fn=lambda: [], inputs=None, outputs=[chatbot, translator_chatbot], queue=False)

demo.launch(inbrowser=False, debug=True)



* Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.


Assistant: Hello! How can I assist you today?


In [13]:
def transcribe(audio):
    try:
        # Transcribe the audio using OpenAI's Whisper API
        audio_file = open(audio, 'rb')
        response = openai.audio.transcriptions.create(
            model="whisper-1", 
            file=audio_file, 
            response_format="text"
        )
        return response
    except Exception as e:
        return f"Error: {str(e)}"

recorded_audio = None

def record_and_transcribe(audio):
    global recorded_audio
    recorded_audio = audio
    return transcribe(audio)

iface = gr.Interface(
    fn=record_and_transcribe,
    inputs=gr.Audio(type="filepath", label="Record your voice"),
    outputs="text",
    title="Speech-to-Text Transcription",
    description="Record your voice and get a text transcription.",
    live=True,
)

iface.launch()

* Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.


