In [12]:
import os
import json
from dotenv import load_dotenv
from openai import OpenAI
import gradio as gr
import anthropic

import sounddevice as sd
import soundfile as sf
import numpy as np
import openai
import io
import scipy.io.wavfile

In [13]:
# Initialization

load_dotenv(override=True)

openai_api_key = os.getenv('OPENAI_API_KEY')
anthropic_api_key = os.getenv('ANTHROPIC_API_KEY')

if openai_api_key:
    print(f"OpenAI API Key exists and begins {openai_api_key[:8]}")
else:
    print("OpenAI API Key not set")

if anthropic_api_key:
    print(f"Anthropic API Key exists and begins {anthropic_api_key[:7]}")
else:
    print("Anthropic API Key not set")
    
MODEL = "gpt-4o-mini"
openai = OpenAI()

MODEL_CLAUDE = "claude-3-7-sonnet-latest"
claude = anthropic.Anthropic()

OpenAI API Key exists and begins sk-proj-
Anthropic API Key exists and begins sk-ant-


In [14]:
system_message = "You are a helpful assistant for an Airline called FlightAI. "
system_message += "Give short, courteous answers, no more than 1 sentence. "
system_message += "Always be accurate. If you don't know the answer, say so."
system_message += "And never invoke more than one tool at the time. Don't hesitate to ask to the user to precise if it need to know the price of the flight or the room availability information."

system_message_claude = "You act as a translator. You will translate the sentence in japanese."

In [15]:
# Let's start by making a useful function

ticket_prices = {"london": "$799", "paris": "$899", "tokyo": "$1400", "berlin": "$499"}

def get_ticket_price(destination_city):
    city = destination_city.lower()
    return ticket_prices.get(city, "Unknown")

# Bonus
Available_room = {"london": "2", "paris": "4", "tokyo": "0", "berlin": "1"}

def fake_booking(destination_city):
    city = destination_city.lower()
    return Available_room.get(city, "Unknown")

In [16]:
# There's a particular dictionary structure that's required to describe our function:

price_function = {
    "name": "get_ticket_price",
    "description": "Get the price of a return ticket to the destination city. Call this whenever you need to know the ticket price, for example when a customer asks 'How much is a ticket to this city'",
    "parameters": {
        "type": "object",
        "properties": {
            "destination_city": {
                "type": "string",
                "description": "The city that the customer wants to travel to",
            },
        },
        "required": ["destination_city"],
        "additionalProperties": False
    }
}

booking_function = {
    "name": "fake_booking",
    "description": "Get the number of available room for one night booking. Call this whenever you need to know if there are available rooms in the city, for example when a customer asks 'Is there any available room in this city?'",
    "parameters": {
        "type": "object",
        "properties": {
            "destination_city": {
                "type": "string",
                "description": "The city that the customer wants to travel to",
            },
        },
        "required": ["destination_city"],
        "additionalProperties": False
    }
}

In [17]:
tools = [{"type": "function", "function": price_function},{"type": "function", "function": booking_function}]

In [18]:
def french_translator(user_prompt):
    message = claude.messages.create(
    model=MODEL_CLAUDE,
    max_tokens=200,
    temperature=0.7,
    system=system_message_claude,
    messages=[
        {"role": "user", "content": user_prompt},
        ],
    )
    return message.content[0].text

In [19]:
def handle_tool_call(message):
    tool_call = message.tool_calls[0]
    function_name = tool_call.function.name
    arguments = json.loads(tool_call.function.arguments)
    city = arguments.get("destination_city")
    if function_name == 'fake_booking':
        room = fake_booking(city)
        response = {
            "role": "tool",
            "content": json.dumps({"destination_city": city,"available_room": room}),
            "tool_call_id": tool_call.id
        }
    else:
        price = get_ticket_price(city)
        response = {
            "role": "tool",
            "content": json.dumps({"destination_city": city,"price": price}),
            "tool_call_id": tool_call.id
        }
    return response, city

In [20]:
def chat(message, history=None):
    if history is None:
        history = []

    messages = [{"role": "system", "content": system_message}] + history + [{"role": "user", "content": message}]
    response = openai.chat.completions.create(model=MODEL, messages=messages, tools=tools)

    if response.choices[0].finish_reason == "tool_calls":
        tool_call_message = response.choices[0].message
        tool_response, city = handle_tool_call(tool_call_message)
        messages.append(tool_call_message)
        messages.append(tool_response)
        response = openai.chat.completions.create(model=MODEL, messages=messages)

    assistant_reply = response.choices[0].message.content
    translated_text = french_translator(assistant_reply)

    # Append assistant reply to the history
    history.append({"role": "user", "content": message})
    history.append({"role": "assistant", "content": assistant_reply})

    return history, translated_text

In [21]:
client = OpenAI()

In [22]:
def record_audio(duration=5, sample_rate=44100):
    print("🎙️ Speak now...")
    audio = sd.rec(int(duration * sample_rate), samplerate=sample_rate, channels=1)
    sd.wait()
    print("✅ Recording complete.")

    wav_io = io.BytesIO()
    sf.write(wav_io, audio, sample_rate, format='WAV')
    wav_io.seek(0)
    return wav_io

def transcribe_audio(audio_io):
    audio_io.name = "input.wav"  # 👈 this is crucial
    response = client.audio.transcriptions.create(
        model="whisper-1",
        file=audio_io
    )
    return response.text

In [60]:
# Combine both steps
audio_io = record_audio(duration=5)

🎙️ Speak now...
✅ Recording complete.


In [61]:
transcription = transcribe_audio(audio_io)
print("📝 Transcribed text:", transcription)

📝 Transcribed text: I live in Switzerland and I speak French.


In [23]:
with gr.Blocks() as ui:
    with gr.Row():
        chatbot = gr.Chatbot(height=500, type="messages")
        translated_output = gr.Textbox(label="Assistant (japanese)", lines=10)

    with gr.Row():
        mic = gr.Audio(label="🎤 Speak your question", type="filepath")

    with gr.Row():
        clear = gr.Button("Clear")

    # 🧠 Handle flow: audio → transcription → chat
    def process_audio(audio_file_path, history):
        if audio_file_path is None:
            # No audio provided yet, just return current history
            return history, ""
    
        with open(audio_file_path, "rb") as f:
            audio_bytes = io.BytesIO(f.read())
            audio_bytes.name = "input.wav"
            text = transcribe_audio(audio_bytes)
    
        return chat(text, history)

    mic.change(
        fn=process_audio,
        inputs=[mic, chatbot],
        outputs=[chatbot, translated_output]
    )

    clear.click(lambda: None, inputs=None, outputs=chatbot, queue=False)

ui.launch(inbrowser=True)

* Running on local URL:  http://127.0.0.1:7861
* To create a public link, set `share=True` in `launch()`.


