In [5]:
import logging
import os

# set the directory to the location of the script
try:
    target_directory = os.getenv(
        "TARGET_DIRECTORY", os.getcwd()
    )  # Use environment variable if available
    if os.path.exists(target_directory):
        os.chdir(target_directory)
        logging.info(f"Successfully changed directory to: {os.getcwd()}")
    else:
        logging.error(f"Directory does not exist: {target_directory}")
except Exception as e:
    logging.exception(f"An error occurred while changing directory: {e}")

In [12]:
import os
import json
import asyncio
import contextlib
import uuid
from typing import List, Dict, Any, Optional

# GPT, TTS, tools
from openai import AzureOpenAI
from src.speech.text_to_speech import SpeechSynthesizer
from usecases.browser_RTMedAgent.backend.tools import available_tools
from usecases.browser_RTMedAgent.backend.functions import (
    schedule_appointment,
    refill_prescription,
    lookup_medication_info,
    evaluate_prior_authorization,
    escalate_emergency,
    authenticate_user,
)
from usecases.browser_RTMedAgent.backend.prompt_manager import PromptManager
from utils.ml_logging import get_logger

# ---------------------------------------------------------------------------
# Globals & helpers
# ---------------------------------------------------------------------------
STOP_WORDS = ["goodbye", "exit", "see you later", "bye"]
logger = get_logger()
prompt_manager = PromptManager()

az_openai_client = AzureOpenAI(
    api_version="2025-02-01-preview",
    azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),
    api_key=os.getenv("AZURE_OPENAI_KEY"),
)

az_speech_synthesizer_client = SpeechSynthesizer()

function_mapping = {
    "schedule_appointment": schedule_appointment,
    "refill_prescription": refill_prescription,
    "lookup_medication_info": lookup_medication_info,
    "evaluate_prior_authorization": evaluate_prior_authorization,
    "escalate_emergency": escalate_emergency,
    "authenticate_user": authenticate_user,
}


def check_for_stopwords(prompt: str) -> bool:
    return any(stop_word in prompt.lower() for stop_word in STOP_WORDS)


async def speak_and_log(text: str):
    print(f"[Agent 🗣️]: {text}")
    try:
        az_speech_synthesizer_client.start_speaking_text(text)
    except Exception as e:
        logger.error(f"Error synthesizing TTS: {e}")


class ConversationManager:
    def __init__(self, auth: bool = True, pm: Optional[PromptManager] = None):
        self.pm = pm or PromptManager()
        self.cid = str(uuid.uuid4())[:8]
        if auth:
            prompt = self.pm.get_prompt("voice_agent_authentication.jinja")
            self.hist = [{"role": "system", "content": prompt}]
        else:
            prompt = self.pm.get_prompt("voice_agent_system.jinja")
            self.hist = [{"role": "system", "content": prompt}]


# ---------------------------------------------------------------------------
# Shared: GPT streaming response + tool handling
# ---------------------------------------------------------------------------
async def process_gpt_response(convo_manager: ConversationManager, user_prompt: str):
    tool_call_id = None
    tool_name = None
    function_call_arguments = ""
    collected_messages: List[str] = []
    tts_sentence_end = [".", "!", "?", ";", "。", "！", "？", "；", "\n"]

    convo_manager.hist.append({"role": "user", "content": user_prompt})

    try:
        response = az_openai_client.chat.completions.create(
            stream=True,
            messages=convo_manager.hist,
            tools=available_tools,
            tool_choice="auto",
            max_tokens=4096,
            temperature=0.5,
            top_p=1.0,
            model=os.getenv("AZURE_OPENAI_CHAT_DEPLOYMENT_ID"),
        )

        full_response = ""
        tool_call_started = False

        for chunk in response:
            if not chunk.choices:
                continue

            delta = chunk.choices[0].delta

            # 🔄 Collect tool_call fragments
            if delta.tool_calls:
                tool_call = delta.tool_calls[0]

                # Capture tool_call ID, name, and append args progressively
                tool_call_id = tool_call.id or tool_call_id
                if tool_call.function.name:
                    tool_name = tool_call.function.name
                if tool_call.function.arguments:
                    function_call_arguments += tool_call.function.arguments
                tool_call_started = True
                continue

            # 🎤 Regular streaming assistant message
            if delta.content:
                chunk_text = delta.content
                collected_messages.append(chunk_text)
                full_response += chunk_text

                if chunk_text.strip() in tts_sentence_end:
                    text_to_speak = "".join(collected_messages).strip()
                    if text_to_speak:
                        await speak_and_log(text_to_speak)
                        collected_messages.clear()

        final_text = "".join(collected_messages).strip()
        if final_text:
            await speak_and_log(final_text)

        # 💬 Only append assistant text if it was non-empty
        if full_response.strip():
            convo_manager.hist.append(
                {"role": "assistant", "content": full_response.strip()}
            )

        # 🧠 Append tool_call only if fully detected
        if tool_call_started and tool_call_id and tool_name and function_call_arguments:
            convo_manager.hist.append(
                {
                    "role": "assistant",
                    "content": None,
                    "tool_calls": [
                        {
                            "id": tool_call_id,
                            "type": "function",
                            "function": {
                                "name": tool_name,
                                "arguments": function_call_arguments,
                            },
                        }
                    ],
                }
            )

            tool_result = await handle_tool_call(
                tool_name, tool_call_id, function_call_arguments, convo_manager
            )
            if tool_name == "authenticate_user":
                return tool_result

    except asyncio.CancelledError:
        logger.info(f"🔚 process_gpt_response cancelled for input: '{user_prompt[:40]}'")
        raise

    return None


# ---------------------------------------------------------------------------
# Handle tool call
# ---------------------------------------------------------------------------
async def handle_tool_call(
    tool_name, tool_id, function_call_arguments, convo_manager: ConversationManager
):
    logger.info(f"tool_name: {tool_name}")
    logger.info(f"tool_id: {tool_id}")
    logger.info(f"function_call_arguments: {function_call_arguments}")

    try:
        parsed_args = json.loads(function_call_arguments.strip() or "{}")
        function_to_call = function_mapping.get(tool_name)

        if function_to_call:
            result_json = await function_to_call(parsed_args)
            logger.info(f"✅ Function '{tool_name}' executed. Result: {result_json}")

            if isinstance(result_json, str):
                result = json.loads(result_json)
            else:
                result = result_json

            convo_manager.hist.append(
                {
                    "tool_call_id": tool_id,
                    "role": "tool",
                    "name": tool_name,
                    "content": json.dumps(result),
                }
            )

            await process_tool_followup(convo_manager)
            return result

    except json.JSONDecodeError as e:
        logger.error(f"❌ Error parsing function arguments: {e}")

    return {}


# ---------------------------------------------------------------------------
# GPT follow-up after tool completes
# ---------------------------------------------------------------------------
async def process_tool_followup(convo_manager: ConversationManager):
    collected_messages: List[str] = []
    tts_sentence_end = [".", "!", "?", ";", "。", "！", "？", "；", "\n"]

    response = az_openai_client.chat.completions.create(
        stream=True,
        messages=convo_manager.hist,
        temperature=0.5,
        top_p=1.0,
        max_tokens=4096,
        model=os.getenv("AZURE_OPENAI_CHAT_DEPLOYMENT_ID"),
    )

    full_response = ""

    for chunk in response:
        if not chunk.choices:
            continue
        delta = chunk.choices[0].delta
        if hasattr(delta, "content") and delta.content:
            chunk_message = delta.content
            collected_messages.append(chunk_message)
            full_response += chunk_message

            if chunk_message.strip() in tts_sentence_end:
                text_to_speak = "".join(collected_messages).strip()
                if text_to_speak:
                    await speak_and_log(text_to_speak)
                    collected_messages.clear()

    final_text = "".join(collected_messages).strip()
    if final_text:
        await speak_and_log(final_text)
        convo_manager.hist.append({"role": "assistant", "content": final_text})

Templates found: ['voice_agent_authentication.jinja', 'voice_agent_system.jinja']


In [13]:
cm = ConversationManager(auth=True)
await speak_and_log(
    (
        "Hello from XMYX Healthcare Company! Before I can assist you, "
        "let’s verify your identity. How may I address you?"
    )
)
cm.hist.append(
    {
        "role": "assistant",
        "content": "Hello from XMYX Healthcare Company! Before I can assist you, "
        "let’s verify your identity. How may I address you?",
    }
)
user_input = "My name is Alice Brown."
await process_gpt_response(cm, user_input)
user_input = "I was born on 1987-04-12"
await process_gpt_response(cm, user_input)
user_input = "phone number is 5552971078"
await process_gpt_response(cm, user_input)
user_input = "Yes"
result = await process_gpt_response(cm, user_input)
if result and result.get("authenticated"):
    print("✅ Authenticated")
else:
    print("❌ Authentication failed")

2025-04-18 12:02:24,261 - micro - MainProcess - INFO     [🔊] Speaking text (server speaker): Hello from XMYX Healthcare Com... (text_to_speech.py:start_speaking_text:55)
INFO:micro:[🔊] Speaking text (server speaker): Hello from XMYX Healthcare Com...


Templates found: ['voice_agent_authentication.jinja', 'voice_agent_system.jinja']
[Agent 🗣️]: Hello from XMYX Healthcare Company! Before I can assist you, let’s verify your identity. How may I address you?


2025-04-18 12:02:24,876 - micro - MainProcess - INFO     [🔊] Speaking text (server speaker): Thank you, Alice.... (text_to_speech.py:start_speaking_text:55)
INFO:micro:[🔊] Speaking text (server speaker): Thank you, Alice....
2025-04-18 12:02:24,884 - micro - MainProcess - INFO     [🔊] Speaking text (server speaker): Could you please provide your ... (text_to_speech.py:start_speaking_text:55)
INFO:micro:[🔊] Speaking text (server speaker): Could you please provide your ...
2025-04-18 12:02:24,906 - micro - MainProcess - INFO     [🔊] Speaking text (server speaker): Please say it in the format "M... (text_to_speech.py:start_speaking_text:55)
INFO:micro:[🔊] Speaking text (server speaker): Please say it in the format "M...


[Agent 🗣️]: Thank you, Alice.
[Agent 🗣️]: Could you please provide your date of birth?
[Agent 🗣️]: Please say it in the format "Month Day, Year."


2025-04-18 12:02:25,346 - micro - MainProcess - INFO     [🔊] Speaking text (server speaker): Thank you for providing your d... (text_to_speech.py:start_speaking_text:55)
INFO:micro:[🔊] Speaking text (server speaker): Thank you for providing your d...
2025-04-18 12:02:25,351 - micro - MainProcess - INFO     [🔊] Speaking text (server speaker): Let me confirm it with you: Ap... (text_to_speech.py:start_speaking_text:55)
INFO:micro:[🔊] Speaking text (server speaker): Let me confirm it with you: Ap...
2025-04-18 12:02:25,354 - micro - MainProcess - INFO     [🔊] Speaking text (server speaker): Is that correct?... (text_to_speech.py:start_speaking_text:55)
INFO:micro:[🔊] Speaking text (server speaker): Is that correct?...


[Agent 🗣️]: Thank you for providing your date of birth.
[Agent 🗣️]: Let me confirm it with you: April 12, 1987.
[Agent 🗣️]: Is that correct?


2025-04-18 12:02:25,913 - micro - MainProcess - INFO     [🔊] Speaking text (server speaker): Thank you, Alice.... (text_to_speech.py:start_speaking_text:55)
INFO:micro:[🔊] Speaking text (server speaker): Thank you, Alice....
2025-04-18 12:02:25,949 - micro - MainProcess - INFO     [🔊] Speaking text (server speaker): Let me confirm your phone numb... (text_to_speech.py:start_speaking_text:55)
INFO:micro:[🔊] Speaking text (server speaker): Let me confirm your phone numb...
2025-04-18 12:02:25,953 - micro - MainProcess - INFO     [🔊] Speaking text (server speaker): Is that correct?... (text_to_speech.py:start_speaking_text:55)
INFO:micro:[🔊] Speaking text (server speaker): Is that correct?...


[Agent 🗣️]: Thank you, Alice.
[Agent 🗣️]: Let me confirm your phone number: 5 5 5 2 9 7 1 0 7 8.
[Agent 🗣️]: Is that correct?


2025-04-18 12:02:26,418 - micro - MainProcess - INFO     tool_name: authenticate_user (2681065656.py:handle_tool_call:167)
INFO:micro:tool_name: authenticate_user
2025-04-18 12:02:26,420 - micro - MainProcess - INFO     tool_id: call_Eq2HrrfloZaVT5cvFnau02QH (2681065656.py:handle_tool_call:168)
INFO:micro:tool_id: call_Eq2HrrfloZaVT5cvFnau02QH
2025-04-18 12:02:26,422 - micro - MainProcess - INFO     function_call_arguments: {"first_name":"Alice","last_name":"Brown","phone_number":"5552971078"} (2681065656.py:handle_tool_call:169)
INFO:micro:function_call_arguments: {"first_name":"Alice","last_name":"Brown","phone_number":"5552971078"}
2025-04-18 12:02:26,425 - micro - MainProcess - INFO     🔎 Checking user: Alice Brown with phone: 5552971078 (functions.py:authenticate_user:135)
INFO:micro:🔎 Checking user: Alice Brown with phone: 5552971078
2025-04-18 12:02:26,427 - micro - MainProcess - INFO     📞 Cleaned stored phone: 5552971078 (functions.py:authenticate_user:149)
INFO:micro:📞 Cleane

[Agent 🗣️]: Thank you for verifying your information, Alice.
[Agent 🗣️]: Let’s continue.
[Agent 🗣️]: How can I assist you today?
✅ Authenticated


In [14]:
result

{'authenticated': True,
 'message': 'Authenticated Alice Brown.',
 'patient_id': 'P54321'}

In [15]:
print(json.dumps(cm.hist, indent=2))

[
  {
    "role": "system",
    "content": "\n\n\"\"\"Task\"\"\"\nYou are a professional and empathetic healthcare voice agent.  \nYour sole responsibility in this phase is to **verify the identity** of the caller before providing any assistance\u2014except in the case of an emergency.\n\n\"\"\"Purpose\"\"\"\nProtect user privacy and ensure that only verified users receive assistance.  \nDo **not** proceed to any service-related request until identity is confirmed, **unless** the call involves a possible emergency.\n\n---\n\n\"\"\"Instructions\"\"\"\n\n1. **Emergency Check**:\n  - As the caller begins speaking, **carefully listen for any signs of an emergency** (e.g., chest pain, difficulty breathing, suicidal thoughts, etc.).\n  - If an emergency is detected **at any point**, immediately trigger the internal tool:\n    **`escalate_emergency`**\n  - Respond with:\n    > \u201cThis sounds like an urgent medical issue. I\u2019m going to escalate this right away so you can get immediate h