In [1]:
import logging
import os

# set the directory to the location of the script
try:
    os.chdir("../..")
    target_directory = os.getenv(
        "TARGET_DIRECTORY", os.getcwd()
    )  # Use environment variable if available
    if os.path.exists(target_directory):
        os.chdir(target_directory)
        print(f"Changed directory to: {os.getcwd()}")
        logging.info(f"Successfully changed directory to: {os.getcwd()}")
    else:
        logging.error(f"Directory does not exist: {target_directory}")
except Exception as e:
    logging.exception(f"An error occurred while changing directory: {e}")

Changed directory to: c:\Users\pablosal\Desktop\gbb-ai-audio-agent


In [3]:
from src.speech.text_to_speech import SpeechSynthesizer
from src.speech.conversation_recognizer import StreamingConversationTranscriberFromBytes
from openai import AzureOpenAI

if "az_speech_recognizer_stream_client" not in locals():
    az_speech_recognizer_stream_client = StreamingConversationTranscriberFromBytes(
        audio_format="pcm",
        candidate_languages=["en-US", "fr-FR", "de-DE", "es-ES", "it-IT"],
        enable_diarisation=True,
    )

if "az_speach_synthesizer_client" not in locals():
    az_speach_synthesizer_client = SpeechSynthesizer(
        voice="en-US-Ava:DragonHDLatestNeural"
    )

# Ensure Azure OpenAI client is initialized only if not already defined
if "client" not in locals():
    client = AzureOpenAI(
        api_version="2025-02-01-preview",
        azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),
        api_key=os.getenv("AZURE_OPENAI_KEY"),
    )

[2025-08-07 01:32:32,875] INFO - micro: Azure Monitor tracing initialized for speech synthesizer
INFO:micro:Azure Monitor tracing initialized for speech synthesizer
[2025-08-07 01:32:32,897]  INFO - micro: Azure Monitor tracing initialized for speech synthesizer
INFO:micro:Azure Monitor tracing initialized for speech synthesizer
[2025-08-07 01:32:32,897] INFO - micro: Creating SpeechConfig with API key authentication
INFO:micro:Creating SpeechConfig with API key authentication
[2025-08-07 01:32:32,912]INFO - micro: Creating SpeechConfig with API key authentication
INFO:micro:Creating SpeechConfig with API key authentication
[2025-08-07 01:32:32,912] INFO - micro: Speech synthesizer initialized successfully
INFO:micro:Speech synthesizer initialized successfully
 INFO - micro: Speech synthesizer initialized successfully
INFO:micro:Speech synthesizer initialized successfully


In [4]:
# Define end-of-sentence markers for TTS
TTS_ENDS = {".", "!", "?", ";", "\n"}

# Flags and buffers
is_synthesizing = False
user_buffer = ""
assistant_buffer = ""
tts_thread = None

In [None]:
import os, time, threading


def on_final(text, lang, speaker_id):
    global user_buffer
    user_buffer += text.strip() + "\n"
    print(f"🧾 User {[speaker_id]} (final) in {lang}: {text}")


def assistant_speak(text):
    global is_synthesizing
    print("Hi there, I am a assistant_speak callback!")
    is_synthesizing = True
    print("Syntetixing:", is_synthesizing)
    az_speach_synthesizer_client.start_speaking_text(text)


def on_partial(text, lang, speaker_id):
    global is_synthesizing
    if is_synthesizing:
        # az_speach_synthesizer_client.stop_speaking()
        is_synthesizing = False
    print(f"🗣️ User {[speaker_id]} (partial) in {lang}: {text}")


az_speech_recognizer_stream_client.set_partial_result_callback(on_partial)
az_speech_recognizer_stream_client.set_final_result_callback(on_final)

# Start recognition
az_speech_recognizer_stream_client.start()
print("🎙️ Speak now...")

# Start mic streaming thread
import pyaudio

RATE, CHANNELS, CHUNK = 16000, 1, 1024
audio = pyaudio.PyAudio()
stream = audio.open(
    format=pyaudio.paInt16,
    channels=CHANNELS,
    rate=RATE,
    input=True,
    frames_per_buffer=CHUNK,
)


def mic_loop():
    while True:
        data = stream.read(CHUNK, exception_on_overflow=False)
        az_speech_recognizer_stream_client.write_bytes(data)


threading.Thread(target=mic_loop, daemon=True).start()

messages = [{"role": "system", "content": "You are a helpful assistant."}]

user_buffer = ""  # This should be filled in by your STT callback as before

while True:
    if user_buffer:
        full_conversation = (
            "\n".join([f"{m['role'].capitalize()}: {m['content']}" for m in messages])
            + f"\nUser: {user_buffer}"
        )
        messages.append({"role": "user", "content": user_buffer})
        user_buffer = ""  # clear after using

        completion = client.chat.completions.create(
            stream=True,
            messages=messages,
            max_tokens=4096,
            temperature=1.0,
            top_p=1.0,
            model=os.getenv("AZURE_OPENAI_CHAT_DEPLOYMENT_ID"),
        )

        collected_messages = []
        last_tts_request = None

        for chunk in completion:
            if chunk.choices and hasattr(chunk.choices[0].delta, "content"):
                chunk_text = chunk.choices[0].delta.content
                if chunk_text:
                    collected_messages.append(chunk_text)
                    print(chunk_text, end="", flush=True)
                    # Check for sentence end to stream to TTS
                    if chunk_text in TTS_ENDS:
                        text = "".join(collected_messages).strip()
                        last_tts_request = assistant_speak(text)
                        collected_messages.clear()
        print()  # finish line after streaming LLM response

    time.sleep(0.1)

🎙️ Speak now...

🧾 User ['Unknown'] (final) in en-US: 
🧾 User ['Unknown'] (final) in en-US: 
Hello!Hi there, I am a assistant_speak callback!Hello!Hi there, I am a assistant_speak callback!
Syntetixing: True
Syntetixing: True



[2025-08-07 01:32:55,361] INFO INFO - micro - micro: [🔊] Starting streaming speech synthesis for text: Hello!...
INFO:micro:[🔊] Starting streaming speech synthesis for text: Hello!...
: [🔊] Starting streaming speech synthesis for text: Hello!...
INFO:micro:[🔊] Starting streaming speech synthesis for text: Hello!...


 It looks looks like like your message is your message is empty empty.Hi there, I am a assistant_speak callback!
.Hi there, I am a assistant_speak callback!
Syntetixing: True
Syntetixing: True


[2025-08-07 01:32:55,419] INFO - micro: [🔊] Starting streaming speech synthesis for text: It looks like your message is empty....
INFO:micro:[🔊] Starting streaming speech synthesis for text: It looks like your message is empty....
 INFO - micro: [🔊] Starting streaming speech synthesis for text: It looks like your message is empty....
INFO:micro:[🔊] Starting streaming speech synthesis for text: It looks like your message is empty....


 How can I assist you can I assist you today? today?Hi there, I am a assistant_speak callback!
Syntetixing: Hi there, I am a assistant_speak callback!
Syntetixing: True
True


[2025-08-07 01:32:55,480] INFO - micro INFO - micro: [🔊] Starting streaming speech synthesis for text: How can I assist you today?...
INFO:micro:[🔊] Starting streaming speech synthesis for text: How can I assist you today?...
: [🔊] Starting streaming speech synthesis for text: How can I assist you today?...
INFO:micro:[🔊] Starting streaming speech synthesis for text: How can I assist you today?...



🗣️ User ['Unknown'] (partial) in en-US: here's your complete
🗣️ User ['Unknown'] (partial) in en-US: here's your complete
🗣️ User ['Unknown'] (partial) in en-US: here's your complete fix
🗣️ User ['Unknown'] (partial) in en-US: here's your complete fix
🗣️ User ['Unknown'] (partial) in en-US: here's your complete fixed and
🗣️ User ['Unknown'] (partial) in en-US: here's your complete fixed and
🗣️ User ['Guest-1'] (partial) in en-US: here's your complete fixed and production
🗣️ User ['Guest-1'] (partial) in en-US: here's your complete fixed and production
🗣️ User ['Guest-1'] (partial) in en-US: here's your complete fixed and production ready
🗣️ User ['Guest-1'] (partial) in en-US: here's your complete fixed and production ready
🗣️ User ['Guest-1'] (partial) in en-US: here's your complete fixed and production ready class
🗣️ User ['Guest-1'] (partial) in en-US: here's your complete fixed and production ready class
🗣️ User ['Guest-1'] (partial) in en-US: here's your complete fixed and produc

KeyboardInterrupt: 

🗣️ User ['Guest-2'] (partial) in en-US: OK yes but i what i'm trying to say is that not everybody has the same thing so i just don't want to say that OK clear code

🗣️ User ['Unknown'] (partial) in en-US: clear code struct🗣️ User ['Unknown'] (partial) in en-US: clear code struct

🗣️ User ['Unknown'] (partial) in en-US: clear code structure🗣️ User ['Unknown'] (partial) in en-US: clear code structure

🧾 User ['Guest-1'] (final) in en-US: Here's your complete fixed and production ready class with correct Solero VAD streaming usage. No chunk error.🧾 User ['Guest-1'] (final) in en-US: Here's your complete fixed and production ready class with correct Solero VAD streaming usage. No chunk error.

🧾 User ['Guest-2'] (final) in en-US: OK, yes, but I what I'm trying to say is that not everybody has the same thing. So I.🧾 User ['Guest-2'] (final) in en-US: OK, yes, but I what I'm trying to say is that not everybody has the same thing. So I.

🧾 User ['Guest-2'] (final) in en-US: Just don't want to

In [None]:
az_speech_recognizer_stream_client.close_stream()
az_speach_synthesizer_client.stop_speaking()

[2025-08-07 01:33:24,074] INFO -  INFO - micro: [🛑] Stopping speech synthesis...
INFO:micro:[🛑] Stopping speech synthesis...
micro: [🛑] Stopping speech synthesis...
INFO:micro:[🛑] Stopping speech synthesis...


🗣️ User ['Unknown'] (partial) in en-US: optional next

🧾 User ['Guest-1'] (final) in en-US: Optional Next.🧾 User ['Guest-1'] (final) in en-US: Optional Next.



: Conversation canceled: ConversationTranscriptionCanceledEventArgs(session_id=13c14e2aa58d444b90b193d66e46efed, result=ConversationTranscriptionResult(result_id=6c6ed35fba55462eb4f7c67f068849dd, speaker_id=, text=, reason=ResultReason.Canceled))
[2025-08-07 01:33:24,736][2025-08-07 01:33:24,736]  INFOINFO -  - micromicro: Conversation session stopped.
: Conversation session stopped.
INFO:micro:Conversation session stopped.
INFO:micro:Conversation session stopped.
: Conversation canceled: ConversationTranscriptionCanceledEventArgs(session_id=13c14e2aa58d444b90b193d66e46efed, result=ConversationTranscriptionResult(result_id=b64c6e3ee0ed4886bc3ba626bcdb0b11, speaker_id=, text=, reason=ResultReason.Canceled))
