<a href="https://colab.research.google.com/github/JayNguyen-123/AI_Phone_Agent/blob/main/RealTime_Phone_Agent.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
impor json
import os
import base64
import asyncio
import websockets
from fastapi import FastAPI, WebSocket, Request
from fastapi.responses import HTMLResponse
from fastapi.websockets import WebSocketDisconnect
from twilio.twiml.voice_response import VoiceResponse, Connect, Say, Stream
from dotenv import load_dotenv
import logging

load_dotenv()

In [None]:
# Configure logging
logging.basicConfig(level=INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')      # requires OpenAI Realtime API Access
PORT = int(os.getenv('PORT', 5050))


In [None]:
SYSTEM_MESSAGE = (
    "You are a helpful and bubbly AI assistant who answers any questions I ask"

)
VOICE = 'alloy'
LOG_EVENT_TYPES = [
    'response.content.done', 'rate_limits.updated', 'response.done',
    'input_audio_buffer.committed', 'input_audio_buffer.speech_stopped',
    'input_audio_buffer.speech_started', 'response.create', 'session.created'
]

SHOW_TIMING_MATH = False
app = FastAPI()
if not OPENAI_API_KEY:
  raise ValueError('Missing the OpenAI key. Please set it in the .env file.')


In [None]:
@app.get("/", response_class=HTMLResponse)
async def index_page():
  return "<html><body><h1>Twilio Media Stream is running!</h1></body></html>"

@app.api_route("/incoming-call", methods=["GET", "POST"])
async def handle_incoming_call(request: Request):
  """Handle incoming call and return TwiML response to connect to Media Stream."""
  logger.info("Receiving incoming call request from: %s", request.client.host)
  response = VoiceResponse()
  host = request.url.hostname
  connect = Connect()
  connect.stream(url=f'wss://{host}/media-stream')
  response.append(connect)
  logger.info("Successfully created the TwiML response")

  return HTMLResponse(content=str(response), media_type="application/xml")

@app.websocket("/media-stream")
async def handle_media_stream(websocket: WebSocket):
  """Handle Websocket connections between Twilio and OpenAI."""
  print("Client connected")
  await websocket.accept()

  async with websockets.connect(
      'wss"//api.openai.com/v1/realtime?model=gpt-4o-realtime-preview-2024-10-01',
      extra_headers = {
          "Authorization": f"Bearer {OPENAI_API_KEY}",
          "OpenAI-Beta": "realtime=v1"
      }
  ) as openai_ws:
       await send_session_update(openai_ws)

       # Connection specific state
       stream_sid = None
       latest_media_timestamp = 0
       last_assistant_item = None
       mark_queue = []
       response_start_timestamp_twilio = None

       async def receive_from_twilio():
        """Receive audio data from Twilio and send it to the OpenAI Realtime API."""
        nonlocal stream_sid, latest_media_timestamp
        try:
          async for message in websocket.iter_text():
            data = json.loads(message)
            if data['event'] == 'media' and openai_ws.open:
              latest_media_timestamp = int(data['media']['timestamp'])
              audio_append = {
                  "type": "input_audio_buffer.append",
                  "audio": data['media']['payload']
              }
              await openai_ws.send(json.dumps(audio_append))

            elif data['event'] == 'start':
              stream_sid = data['start']['streamSid']
              print(f"Incoming stream has started {stream_sid}")
              response_start_timestamp_twilio = None
              latest_media_timestamp = 0
              last_assistant_item = None
            elif data['event'] == 'mark':
              if mark_queue:
                mark_queue.pop(0)

        except WebSocketDisconnect:
          print("CLient disconnect.")
          if openai_ws.open:
            await openai_ws.close()

       async def send_to_twilio():
        """Receive events from the OpenAI Realtime API, send audio back to Twilio."""
        nonlocal stream_sid, last_assistant_item, response_start_timestamp_twilio
        try:
          async for openai_message in openai_ws:
            response = json.loads(openai_message)
            if response['type'] in LOG_EVENT_TYPES:
              print(f"Receive event: {response['type']}", response)

            if response.get('type') == 'response.audio.delta' and 'delta' in response:
              audio_payload = base64.b64decode(base64.b64decode(response['delta'])).decode('utf-8')
              audio_delta = {
                  "event": "media",
                  "streamSid": stream_sid,
                  "media": {
                      "payload": audio_payload
                  }
              }
              await websocket.send_json(audio_delta)

              if response_start_timestamp_twilio is None:
                response_start_timestamp_twilio = latest_media_timestamp
                if SHOW_TIMING_MATH:
                  print(f"Setting start timestamp for new response: {response_start_timestamp_twilio}ms")

              # Update last_assistant _item safely
              if response.get('item_id'):
                last_assistant_item = response['item_id']

              await send_mark(websocket, stream_sid)

            # Trigger an interruption. Your case might work better using 'input_audio_buffer.speech_stopped' or combining the two
            if response.get('type') == 'input_audio_buffer.speech_started':
              print("Speech started detected.")
              if last_assistant_item:
                print(f"Interrupting response with id: {last_assistant_item}")
                await handle_speech_start_event()
        except Exception as e:
          print(f"Error in send_to_twilio: {e}")

       async def handle_speech_start_event():
        """Handle interruption when the caller's speech start."""
        nonlocal response_start_timestamp_twilio, last_assistant_item
        print("Handling speech start event.")
        if mark_queue and response_start_timestamp_twilio is not None:
          elapse_time = latest_media_timestamp - response_start_timestamp_twilio
          if SHOW_TIMING_MATH:
            print(f"Calculating elapsed time for truncation: {latest_media_timestamp} - {response_start_timestamp_twilio} = {elapse_time}ms")

          if last_assistant_item:
            if SHOW_TIMING_MATH:
              print(f"Trucating item with ID: {last_assistant_item}, Truncated at: {elapse_time}ms")

            truncate_event = {
                "type": "conversation.item.truncate",
                "item_id": last_assistant_item,
                "content_index": 0,
                "audio_end_ms": elapse_time
            }
            await openai_ws.send(json.dumps(truncate_event))


          await websocket.send_json({
              "event": "clear",
              "streamSid": stream_sid
          })

          mark_queue.clear()
          last_assistant_item = None
          response_start_timestamp_twilio = None

       async def send_mark(connection, stream_sid):
        if stream_sid:
          mark_event = {
              "event": "mark",
              "streamSid": stream_sid,
              "mark": {"name": "responsePart"}
          }
          await connection.send_json(mark_event)
          mark_queue.append("responsePart")

       await asyncio.gather(receive_from_twilio(), send_to_twilio())


async def send_initial_conversation_item(openai_ws):
  """Send initial convsersation item if AI talk first."""
  initial_conversation_item = {
      "type": "conversation.item.create",
      "item": {
          "type": "message",
          "role": "user",
          "content": [{
              "type": "input_text",
              "text": "Greet the user with 'Hello there! I am an AI voice assistant that will help you with any questions you may have. Please ask me anything you want to know.'"
              }]
     }
  }

  await openai_ws.send(json.dumps(initial_conversation_item))
  await openai_ws.send(json.dumps({"type": "response.create"}))

async def send_session_update(openai_ws):
  """Send session update to OpenAI WebSocket."""

  session_update = {
      "type": "session.update",
      "session": {
          "turn_detection": {"type": "server_vad"},
          "input_audio_format": "g711_ulaw",
          "output_audio_format": "g711_ulaw",
          "voice": VOICE,
          "instructions": SYSTEM_MESSAGE,
          "modalities": ["text", "audio"],
          "temperature": 0.8,
      }
  }
  print("Sending session update: ", json.dumps(session_update))
  await openao_ws.send(json.dumps(session_update))

  await send_initial_conversation_item(openai_ws)

if __name__=="__name__":
  import uvicorn
  uvicorn.run(app, host="0.0.0.0", port=PORT)




NameError: name 'app' is not defined