Load libraries and dependencies

In [7]:
import asyncio

from agents import (
  Agent, 
  handoff, 
  RunContextWrapper, 
  Runner, 
  AsyncOpenAI, 
  set_default_openai_client, 
  set_default_openai_api,
  function_tool
)

from agents.extensions.handoff_prompt import prompt_with_handoff_instructions
from dotenv import load_dotenv
import os
import json

Load Environments and set to use Azure Open AI client

In [8]:
load_dotenv()

AZURE_OPENAI_ENDPOINT = os.getenv("AZURE_OPENAI_ENDPOINT")
AZURE_OPENAI_DEPLOYMENT_NAME = os.getenv("AZURE_OPENAI_DEPLOYMENT_NAME")
AZURE_OPENAI_API_VERSION = os.getenv("AZURE_OPENAI_API_VERSION")
AZURE_OPENAI_API_KEY = os.getenv("AZURE_OPENAI_API_KEY")
AZURE_OPENAI_MODEL = os.getenv("AZURE_OPENAI_MODEL")

OPEN_WEATHER_API_KEY = os.getenv("OPENWEATHER_API_KEY")

OPENAPI_KEY = os.getenv('OPENAI_API_KEY')


external_client = AsyncOpenAI(
    api_key=AZURE_OPENAI_API_KEY,
    base_url=f"{AZURE_OPENAI_ENDPOINT}/openai/deployments/{AZURE_OPENAI_DEPLOYMENT_NAME}",
    default_headers={"api-key": AZURE_OPENAI_API_KEY},
    default_query={"api-version": AZURE_OPENAI_API_VERSION}
)

transcript_client = AsyncOpenAI(
    api_key=OPENAPI_KEY,
)

set_default_openai_client(external_client, use_for_tracing=False)
set_default_openai_api("chat_completions")

In [9]:
INSTRUCTIONS = """
You are a sales agent. Given a specific prompt, you extract the intend \
of a prompt. You only respond to the intent related to sales quotation. \
If the intent does not relate to the sales quotation, you reject the prompt. \
When the intent is requesting the sales quotation, you must use get_quotation tool to \
retrieve the sales quotation. \
When the sales quotation is available, display both the intent of the prompt and the quotation. \
Otherwise, you display the intent of the prompt. \

When you reject the prompt, you should tell the REASON of rejecting the prompt. \

"""

@function_tool
async def get_quotation():
  with open('sample-quotation/soq-1.json') as f:
    data = json.load(f)
  return f"Here is the content of the quotation:\n\n{json.dumps(data, indent=2)}\n"

sales_agent = Agent(
  name = "Sales Agent",
  instructions=INSTRUCTIONS,
  tools=[get_quotation]
)

In [10]:
async def transcribe(audio_path):
  with open(audio_path, 'rb') as audio_file:
    transcript = await transcript_client.audio.transcriptions.create(
      model="whisper-1",
      # model = "gpt-4o-mini-transcribe",
      file=(audio_path, audio_file, "audio/wav"),
      response_format="text"
    )
  print(f"Transcription: {transcript}")
  return transcript

async def run_agent_with_transcript(transcript: str):
  result = await Runner.run(sales_agent, transcript)
  return result.final_output

In [11]:
async def transcribe_and_route(audio_path):
  text = await transcribe(audio_path)
  response = await run_agent_with_transcript(text)
  return response

def user_msg(msg: str):
  return f"""
  <div style="text-align: right;">
    Me: {msg}
  </div>
  """

def bot_msg(msg: str):
  return f"""
  <div style="text-align: left;">
      <span style="display: inline-block; padding: 8px 12px; border-radius: 16px; max-width: 70%;">
          Bot: <strong>{msg}</strong>
      </span>
  </div>
  """


async def chat_wrapper(audio_path, history):
  transcription = await transcribe(audio_path)
  history.append(("User", user_msg(transcription)))

  response = await run_agent_with_transcript(transcription)
  history.append(("Bot", bot_msg(response)))
  return history

In [12]:
import gradio as gr

# gr.Interface(
#     fn=transcribe_and_route,
#     inputs=gr.Audio(sources="microphone", type="filepath"),
#     outputs=["text"],
#     title="Voice Intent Agent",
#     description="Uses Whisper for transcription and Agent SDK for intent recognition."
# ).launch()

def process_audio(audio_path, chat_history):
    return asyncio.run(chat_wrapper(audio_path, chat_history))


with gr.Blocks() as app:
  gr.Markdown('Uses Whisper for transcription and Agent SDK for intent recognition.')
  chatbot = gr.Chatbot()
  audio = gr.Audio(sources="microphone", type="filepath")
  btn = gr.Button("Submit")

  state = gr.State([])

  btn.click(fn=process_audio, inputs=[audio, state], outputs=chatbot).then(
        fn=lambda x: x, inputs=chatbot, outputs=state
    ).then(
       fn=lambda: None, inputs=None, outputs=audio
    )

app.launch()


  chatbot = gr.Chatbot()


* Running on local URL:  http://127.0.0.1:7861
* To create a public link, set `share=True` in `launch()`.




Transcription: Tell me how is the temperature now?

Transcription: Then can you show me the sales quotation?

Transcription: Ok, can you suggest me one of the famous restaurants in Puchong?

