# Working with Audio

## Univeral Code Used for the Entire Notebook

Let's set up our libraries and client

In [None]:
# Install necessary packages for handling sound files and sound devices
# Uncomment the following line if you need to install the packages
# !pip install pyaudio
# !pip install --upgrade openai

In [1]:
import os  # For interacting with the operating system
import requests  # For making HTTP requests
from io import BytesIO  # For handling byte streams
from pathlib import Path  # For filesystem path manipulations

import pyaudio  # For handling audio playback and recording

from IPython.display import Audio, display, clear_output, Markdown, HTML  # For displaying content in Jupyter Notebooks

from openai import OpenAI, AssistantEventHandler  # For OpenAI API and event handling
from typing_extensions import override  # For method overriding in subclasses

import time  # For time-related functions
import threading  # For handling threads
import queue  # For creating and managing queues
import re  # For regular expressions


In [2]:
# Initialize the OpenAI client
client = OpenAI()  


### Generating an Audio File

Using the openai api library approach

In [3]:
# Define the speech file path
speech_file_path = "./fight_on_the_beaches.mp3"

# Create the TTS (Text-to-Speech) request
response = client.audio.speech.create(
    model="tts-1-hd",  # Specify the TTS model to use
    voice="fable",  # Specify the voice to use for the TTS
    input="""
    Even though large tracts of Europe and many old and famous States have fallen or may fall into the grip of the Gestapo and all the odious apparatus of Nazi rule, we shall not flag or fail. We shall go on to the end, we shall fight in France, we shall fight on the seas and oceans, we shall fight with growing confidence and growing strength in the air, we shall defend our Island, whatever the cost may be, we shall fight on the beaches, we shall fight on the landing grounds, we shall fight in the fields and in the streets, we shall fight in the hills; we shall never surrender, and even if, which I do not for a moment believe, this Island or a large part of it were subjugated and starving, then our Empire beyond the seas, armed and guarded by the British Fleet, would carry on the struggle, until, in God’s good time, the New World, with all its power and might, steps forth to the rescue and the liberation of the old.
    """  # Input text to be converted to speech
)

# Save the response audio to a file
with open(speech_file_path, 'wb') as file:
    file.write(response.content)  # Write the audio content to the file

# Print a message indicating where the audio was saved
print(f"Audio saved to {speech_file_path}")


Audio saved to ./fight_on_the_beaches.mp3


Using the API endpoint approach

In [4]:

# Define the speech file path
speech_file_path = "./old_soldiers_never_die.mp3"
api_key = os.getenv("OPENAI_API_KEY")

# API endpoint and headers
url = "https://api.openai.com/v1/audio/speech"
headers = {
    "Authorization": f"Bearer {api_key}",
    "Content-Type": "application/json"
}

# Data payload for the request
data = {
    "model": "tts-1",
    "voice": "shimmer",
    "input": """
    I still remember the refrain of one of the most popular barracks ballads of that day which proclaimed most proudly that old soldiers never die; they just fade away. And like the old soldier of that ballad, I now close my military career and just fade away, an old soldier who tried to do his duty as God gave him the light to see that duty.
    """
}

# Make the synchronous request
response = requests.post(url, headers=headers, json=data)

# Check if the request was successful
if response.status_code == 200:
    with open(speech_file_path, 'wb') as file:
        file.write(response.content)
    print(f"Audio saved to {speech_file_path}")
else:
    print(f"Error: {response.status_code} - {response.text}")


Audio saved to ./old_soldiers_never_die.mp3


### Using Chat Completion and Assistant Output

Chat Completion to Audio without End-to-End Streaming

In [5]:

class AudioPlayer:
    def __init__(self):
        self.audio_queue = queue.Queue()
        self.playback_complete = threading.Event()
        self.audio_added = threading.Event()
        
        # Initialize PyAudio
        self.p = pyaudio.PyAudio()
        self.stream = self.p.open(format=pyaudio.paInt16,
            channels=1,
            rate=24000,
            output=True)
        
        # Start audio playback thread
        self.audio_thread = threading.Thread(target=self.play_audio)
        self.audio_thread.start()

    def play_audio(self):
        while not self.playback_complete.is_set():
            try:
                audio_chunk = self.audio_queue.get(timeout=0.1)
                self.stream.write(audio_chunk)
            except queue.Empty:
                if self.audio_added.is_set() and self.audio_queue.empty():
                    # If all audio has been added and queue is empty, we're done
                    break
                continue

        self.stream.stop_stream()
        self.stream.close()
        self.p.terminate()
        print("Audio playback completed.")

    def add_audio(self, audio_data):
        for chunk in audio_data:
            self.audio_queue.put(chunk)
        self.audio_added.set()  # Signal that all audio has been added

    def wait_for_completion(self):
        self.audio_thread.join()

def get_chat_completion(prompt):
    response = client.chat.completions.create(
        model="gpt-4o",
        messages=[
            {"role": "system", "content": "You are a helpful assistant."},
            {"role": "user", "content": prompt}
        ]
    )
    return response.choices[0].message.content

def text_to_speech(text):
    audio_response = client.audio.speech.create(
        model="tts-1",
        voice="onyx",
        input=text,
        response_format="pcm"
    )
    return audio_response.iter_bytes(chunk_size=1024)

# Main execution
prompt = "Give me the entire Gettysburg Address from Abraham Lincoln. JUST the address, not any additional text before or after. Do not add a reply just give me the text of the address."
print("Generating response...")
response_text = get_chat_completion(prompt)

print("Response:")
print(response_text)

print("\nConverting to speech...")
audio_data = text_to_speech(response_text)

print("Playing audio...")
player = AudioPlayer()
player.add_audio(audio_data)

# Wait for audio to finish playing
player.wait_for_completion()

print("Process completed.")

Generating response...
Response:
Four score and seven years ago our fathers brought forth on this continent, a new nation, conceived in Liberty, and dedicated to the proposition that all men are created equal.

Now we are engaged in a great civil war, testing whether that nation, or any nation so conceived and so dedicated, can long endure. We are met on a great battle-field of that war. We have come to dedicate a portion of that field, as a final resting place for those who here gave their lives that that nation might live. It is altogether fitting and proper that we should do this.

But, in a larger sense, we can not dedicate—we can not consecrate—we can not hallow—this ground. The brave men, living and dead, who struggled here, have consecrated it, far above our poor power to add or detract. The world will little note, nor long remember what we say here, but it can never forget what they did here. It is for us the living, rather, to be dedicated here to the unfinished work which the

Assistant to Audio without End-to-end Streaming

In [6]:

class AudioPlayer:
    """
    A class to handle audio playback using PyAudio.
    """
    def __init__(self):
        self.audio_queue = queue.Queue()  # Queue to store audio chunks
        self.playback_complete = threading.Event()  # Event to signal playback completion
        self.audio_added = threading.Event()  # Event to signal that audio has been added to the queue

        # Initialize PyAudio
        self.p = pyaudio.PyAudio()
        self.stream = self.p.open(
            format=pyaudio.paInt16,
            channels=1,
            rate=24000,
            output=True
        )

        # Start audio playback thread
        self.audio_thread = threading.Thread(target=self.play_audio)
        self.audio_thread.start()

    def play_audio(self):
        """
        Method to play audio chunks from the queue.
        """
        while not self.playback_complete.is_set():
            try:
                audio_chunk = self.audio_queue.get(timeout=0.1)
                self.stream.write(audio_chunk)
            except queue.Empty:
                if self.audio_added.is_set() and self.audio_queue.empty():
                    # If all audio has been added and queue is empty, we're done
                    break
                continue

        self.stream.stop_stream()
        self.stream.close()
        self.p.terminate()
        print("Audio playback completed.")

    def add_audio(self, audio_data):
        """
        Method to add audio data to the queue.
        """
        for chunk in audio_data:
            self.audio_queue.put(chunk)
        self.audio_added.set()  # Signal that all audio has been added

    def wait_for_completion(self):
        """
        Method to wait for the audio playback thread to complete.
        """
        self.audio_thread.join()

def get_assistant_response(prompt):
    """
    Function to get a response from the OpenAI assistant.
    """
    # Create an assistant
    assistant = client.beta.assistants.create(
        model="gpt-4o",
        instructions="You are a helpful assistant.",
        name="My Speaking Assistant (No Streaming)"
    )

    # Create a thread
    thread = client.beta.threads.create()

    # Add a message to the thread
    client.beta.threads.messages.create(
        thread_id=thread.id,
        role="user",
        content=prompt
    )

    # Run the assistant
    run = client.beta.threads.runs.create(
        thread_id=thread.id,
        assistant_id=assistant.id
    )

    # Wait for the run to complete
    while True:
        run_status = client.beta.threads.runs.retrieve(
            thread_id=thread.id,
            run_id=run.id
        )
        if run_status.status == 'completed':
            break
        time.sleep(1)

    # Retrieve the messages
    messages = client.beta.threads.messages.list(thread_id=thread.id)

    # Get the last assistant message
    for message in messages.data:
        if message.role == "assistant":
            return message.content[0].text.value

def text_to_speech(text):
    """
    Function to convert text to speech using the OpenAI TTS model.
    """
    audio_response = client.audio.speech.create(
        model="tts-1",
        voice="onyx",
        input=text,
        response_format="pcm"
    )
    return audio_response.iter_bytes(chunk_size=1024)

# Main execution
prompt = (
    "Give me the entire text of the Gettysburg Address from Abraham Lincoln in Japanese. JUST the address, not any additional text before or after. Do not add a reply just give me the text of the address."
)
print("Generating response from assistant...")
response_text = get_assistant_response(prompt)

print("Response:")
print(response_text)

print("\nConverting to speech...")
audio_data = text_to_speech(response_text)

print("Playing audio...")
player = AudioPlayer()
player.add_audio(audio_data)

# Wait for audio to finish playing
player.wait_for_completion()

print("Process completed.")


Generating response from assistant...
Response:
ゲティスバーグ演説（日本語訳）

87年前、われわれの父祖たちはこの大陸に自由を尊び、すべての人々が平等に創られているという信条をもとに、新しい国を誕生させた。今、われわれは内戦のさなかにあり、この国、あるいはその他の国々がそうであったように、長く続くことができるのかどうかの重大な試練の中にある。われわれはこの戦争の大いなる戦場に集まっている。われわれはこの戦場の一角を捧げ、これをここで戦った者たちにとって最後の安息の地とし、彼らの命を捧げて聖別しようとしている。われわれはむしろ、ここに残る生者としての大きな任務を心に刻もう。この戦争を務め上げた彼らの崇高な犠牲に負けぬよう、われわれ自身もまたその任務を全うするという誓いを新たにしよう。彼らが尊い犠牲を払って得た自由のもとに、われわれは新たな決意を持って臨むことを誓う。そして、彼らの死を草しないためにも、この国に神のもと新たなる自由が生まれること、そして人民の、人民による、人民のための政治が地上から消え去ることが決してないようにすることを、われわれはここに誓うのである。

Converting to speech...
Playing audio...
Audio playback completed.
Process completed.


### Generating an Audio File with Streaming

Using the openai api library approach

(doesn't work)

In [7]:
# Define the speech file path
speech_file_path = "./wonderfulday.mp3"

# Create the TTS (Text-to-Speech) request
response = client.audio.speech.create(
    model="tts-1",  # Specify the TTS model to use
    voice="alloy",  # Specify the voice to use for the TTS
    input="Today is a wonderful day to build something people love!"  # Input text to be converted to speech
)

# Save the response audio to a file using stream_to_file method
response.stream_to_file(speech_file_path)

# Print a message indicating where the audio was saved
print(f"Audio saved to {speech_file_path}")


Audio saved to ./wonderfulday.mp3


  response.stream_to_file(speech_file_path)


Using the openai api library approach

(corrected version)

In [9]:
# Define the speech file path
speech_file_path = "./wonderfulday_streaming.mp3"

# Create the TTS (Text-to-Speech) request using the recommended method
with client.audio.speech.with_streaming_response.create(
    model="tts-1",
    voice="alloy",
    input="Today is a wonderful day to build something people love!"
) as response:
    with open(speech_file_path, 'wb') as f:
        for chunk in response.iter_bytes():
            f.write(chunk)

# Print a message indicating where the audio was saved
print(f"Audio saved to {speech_file_path}")



Audio saved to ./wonderfulday_streaming.mp3


In [10]:

# Initialize PyAudio, which provides bindings for PortAudio, a cross-platform audio library
p = pyaudio.PyAudio()

# Open a stream with specific audio format parameters
stream = p.open(format=pyaudio.paInt16,  # Format: 16-bit PCM (Pulse Code Modulation)
                channels=1,              # Channels: 1 (Mono)
                rate=24000,              # Sample rate: 24,000 Hz (samples per second)
                output=True)             # Stream opened for output (playback)

# Function to stream and play audio in real-time
def stream_audio():
    # Create a TTS (Text-to-Speech) request
    with client.audio.speech.with_streaming_response.create(
        model="tts-1",                   # Specify the TTS model to use
        voice="alloy",                   # Specify the voice to use for TTS
        input="""
    The present German submarine warfare against commerce is a warfare against mankind.

    It is war against all nations.

    American ships have been sunk, American lives taken, in ways which it has stirred us very deeply to learn of, but the ships and people of other neutral and friendly nations have been sunk and overwhelmed in the waters in the same way. There has been no discrimination. The challenge is to all mankind.

    Each nation must decide for itself how it will meet it. The choice we make for ourselves must be made with a moderation of counsel and temperateness of judgment befitting our character and our motives as a nation. We must put excited feeling away. Our motive will not be revenge or the victorious assertion of the physical might of the nation, but only the vindication of right, of human right, of which we are only a single champion.
    """,  # Input text to be converted to speech
        response_format="pcm"            # Response format: PCM (Pulse Code Modulation)
    ) as response:
        # Iterate over the audio chunks in the response
        for chunk in response.iter_bytes(1024):  # Read 1024 bytes at a time
            stream.write(chunk)  # Write each chunk to the PyAudio stream for playback

# Start streaming and playing the audio
stream_audio()

# Close the PyAudio stream properly
stream.stop_stream()  # Stop the stream
stream.close()        # Close the stream
p.terminate()         # Terminate the PyAudio session


### Using Chat Completion and Assistant Output

Chat Completion to Audio with End-to-End Streaming

In [11]:
# Create the chat completion request
chat_completion = client.chat.completions.create(
    model="gpt-4o",  # Specify the model to use
    messages=[
        {"role": "system", "content": "You are a helpful assistant."},  # System message to set the assistant's behavior
        {"role": "user", "content": "Give me a paragraph about the importance of space exploration."}  # User message to initiate the conversation
    ],
    stream=True  # Enable streaming responses
)

# Function to stream the response
def stream_response(chat_completion):
    full_response = ""
    display_id = display(HTML(full_response), display_id=True)
    for chunk in chat_completion:
        if chunk.choices[0].delta.content is not None:
            full_response += chunk.choices[0].delta.content
            display_id.update(HTML(full_response))

# Call the function to stream the response
stream_response(chat_completion)


In [13]:


# Initialize PyAudio
p = pyaudio.PyAudio()

# Open a stream with specific audio format parameters
stream = p.open(format=pyaudio.paInt16,
    channels=1,
    rate=24000,
    output=True)

# Create separate queues for text and audio chunks
text_queue = queue.Queue()
sentence_queue = queue.Queue()
audio_queue = queue.Queue()

# Flags for process control
text_generation_complete = threading.Event()
sentence_processing_complete = threading.Event()
audio_generation_complete = threading.Event()

def generate_and_display_text():
    chat_completion = client.chat.completions.create(
        model="gpt-4",
        messages=[
            {"role": "system", "content": "You are a helpful assistant."},
            {"role": "user", "content": "Give me a paragraph about the importance of space exploration."}
        ],
        stream=True
    )
    
    full_response = ""
    display_id = display(HTML(full_response), display_id=True)
    
    for chunk in chat_completion:
        if chunk.choices[0].delta.content:
            new_text = chunk.choices[0].delta.content
            full_response += new_text
            display_id.update(HTML(full_response))
            text_queue.put(new_text)
    
    text_generation_complete.set()

def process_sentences():
    sentence_buffer = ""
    while not (text_generation_complete.is_set() and text_queue.empty()):
        try:
            new_text = text_queue.get(timeout=0.1)
            sentence_buffer += new_text
            sentences = re.findall(r'[^.!?]+[.!?]', sentence_buffer)
            for sentence in sentences:
                sentence_queue.put(sentence.strip())
            sentence_buffer = re.sub(r'.*[.!?]', '', sentence_buffer)
        except queue.Empty:
            continue
    
    if sentence_buffer:
        sentence_queue.put(sentence_buffer.strip())
    
    sentence_processing_complete.set()

def generate_audio():
    while not (sentence_processing_complete.is_set() and sentence_queue.empty()):
        try:
            sentence = sentence_queue.get(timeout=0.5)
            with client.audio.speech.with_streaming_response.create(
                model="tts-1",
                voice="alloy",
                input=sentence,
                response_format="pcm"
            ) as response:
                for audio_chunk in response.iter_bytes(1024):
                    audio_queue.put(audio_chunk)
            
            # Add a short pause between sentences
            audio_queue.put(b'\x00' * 4800)  # 0.1 seconds of silence at 24000 Hz
        except queue.Empty:
            continue
    
    audio_generation_complete.set()

def play_audio():
    audio_started = False
    while not (audio_generation_complete.is_set() and audio_queue.empty()):
        try:
            audio_chunk = audio_queue.get(timeout=0.5)
            stream.write(audio_chunk)
            if not audio_started:
                audio_started = True
        except queue.Empty:
            continue
    

# Start text generation and display in a separate thread
text_thread = threading.Thread(target=generate_and_display_text)
text_thread.start()

# Start sentence processing in a separate thread
sentence_thread = threading.Thread(target=process_sentences)
sentence_thread.start()

# Start audio generation in a separate thread
audio_gen_thread = threading.Thread(target=generate_audio)
audio_gen_thread.start()

# Wait a short moment before starting audio playback
time.sleep(1)

# Start audio playback in a separate thread
audio_play_thread = threading.Thread(target=play_audio)
audio_play_thread.start()

# Wait for all threads to complete
text_thread.join()
sentence_thread.join()
audio_gen_thread.join()
audio_play_thread.join()

# Close the PyAudio stream properly
stream.stop_stream()
stream.close()
p.terminate()


Assistant to Audio with End-to-end Streaming

In [15]:
# Modified event handler that will actually stream the response from the assistant
class EventHandler(AssistantEventHandler):
    """Custom event handler for processing assistant events."""

    def __init__(self):
        super().__init__()
        self.results = []  # Initialize the results list

    @override
    def on_text_created(self, text) -> None:
        """Handle the event when text is first created."""
        # Print the created text to the console
        print("\nassistant text > ", end="", flush=True)
        # Append the created text to the results list
        self.results.append(text)

    @override
    def on_text_delta(self, delta, snapshot):
        """Handle the event when there is a text delta (partial text)."""
        # Print the delta value (partial text) to the console
        print(delta.value, end="", flush=True)
        # Append the delta value to the results list
        self.results.append(delta.value)

    def on_tool_call_created(self, tool_call):
        """Handle the event when a tool call is created."""
        # Print the type of the tool call to the console
        print(f"\nassistant tool > {tool_call.type}\n", flush=True)

    def on_tool_call_delta(self, delta, snapshot):
        """Handle the event when there is a delta (update) in a tool call."""
        if delta.type == 'code_interpreter':
            # Check if there is an input in the code interpreter delta
            if delta.code_interpreter.input:
                # Print the input to the console
                print(delta.code_interpreter.input, end="", flush=True)
                # Append the input to the results list
                self.results.append(delta.code_interpreter.input)
            # Check if there are outputs in the code interpreter delta
            if delta.code_interpreter.outputs:
                # Print a label for outputs to the console
                print("\n\noutput >", flush=True)
                # Iterate over each output and handle logs specifically
                for output in delta.code_interpreter.outputs or []:
                    if output.type == "logs":
                        # Print the logs to the console
                        print(f"\n{output.logs}", flush=True)
                        # Append the logs to the results list
                        self.results.append(output.logs)
                        


In [18]:
# Create a thread and attach the file to the message
thread = client.beta.threads.create(
    messages=[
    {
    "role": "user",
    "content": "Give me a paragraph on penguins.",
    }
]
)

In [20]:
# Stream the output from our assistant
with client.beta.threads.runs.stream(
    thread_id=thread.id,
    assistant_id=assistant.id,
    event_handler=EventHandler(),
) as stream:
    stream.until_done()


assistant text > Penguins are fascinating, flightless birds known for their distinctive black-and-white plumage and waddling gait. Native primarily to the Southern Hemisphere, they are most commonly associated with the icy landscapes of Antarctica, though some species inhabit more temperate regions like the coasts of South Africa, New Zealand, and the Galápagos Islands. Penguins are superb swimmers, using their flippers to maneuver through water with remarkable speed and agility in search of fish, krill, and other marine organisms. Social by nature, they often live in large colonies called rookeries, which provide a dynamic social structure and collective protection against predators. These resilient birds are also known for their complex nesting behaviors and shared parental responsibilities, with many species taking turns incubating eggs and feeding their chicks. Penguins' unique adaptations, such as their dense feathers and layer of blubber, enable them to thrive in some of the pla

In [21]:
import time  # Import time module for sleep functions
import threading  # Import threading module for handling threads
import queue  # Import queue module for creating queues
import pyaudio  # Import PyAudio module for audio playback
import re  # Import re module for regular expressions
from openai import OpenAI  # Import OpenAI module for interacting with the OpenAI API
from IPython.display import clear_output, display, Markdown  # Import display functions for Jupyter Notebooks

# Initialize the OpenAI client
client = OpenAI()

# Define a custom event handler class that extends AssistantEventHandler
class EventHandler(AssistantEventHandler):
    def __init__(self):
        super().__init__()  # Call the initializer of the parent class
        self.results = []  # List to store text results
        self.text_buffer = ""  # String buffer to collect text chunks
        self.sentence_queue = queue.Queue()  # Queue to store sentences for processing
        self.audio_queue = queue.Queue()  # Queue to store audio chunks for playback
        self.text_generation_complete = threading.Event()  # Event to signal when text generation is complete

        # Initialize PyAudio for audio playback
        self.p = pyaudio.PyAudio()
        self.stream = self.p.open(format=pyaudio.paInt16,  # Audio format
            channels=1,  # Number of audio channels
            rate=24000,  # Sample rate
            output=True)  # Output mode

        # Start threads for audio processing and playback
        self.audio_processing_thread = threading.Thread(target=self.process_sentences)
        self.audio_processing_thread.start()
        self.audio_playback_thread = threading.Thread(target=self.play_audio)
        self.audio_playback_thread.start()

    # Method to handle incoming text deltas (partial responses)
    def on_text_delta(self, delta, snapshot):
        text = delta.value  # Extract text from delta
        self.results.append(text)  # Add text to results
        self.text_buffer += text  # Add text to buffer
        self.process_text_buffer()  # Process the text buffer
        self.update_output()  # Update the displayed output

    # Method to process the text buffer and extract sentences
    def process_text_buffer(self):
        sentences = re.findall(r'[^.!?]+[.!?]', self.text_buffer)  # Find sentences in the buffer
        for sentence in sentences:
            self.sentence_queue.put(sentence.strip())  # Add sentences to the queue
        self.text_buffer = re.sub(r'.*[.!?]', '', self.text_buffer)  # Remove processed sentences from the buffer

    # Method to update the displayed output in a Jupyter Notebook
    def update_output(self):
        clear_output(wait=True)  # Clear previous output
        markdown_content = "".join(self.results)  # Combine results into a single string
        display(Markdown(markdown_content))  # Display the results as Markdown

    # Method to process sentences from the queue and convert them to audio
    def process_sentences(self):
        while not self.text_generation_complete.is_set() or not self.sentence_queue.empty():
            try:
                sentence = self.sentence_queue.get(timeout=0.1)  # Get a sentence from the queue
                with client.audio.speech.with_streaming_response.create(
                    model="tts-1",  # TTS model
                    voice="onyx",  # Voice
                    input=sentence,  # Sentence to convert to speech
                    response_format="pcm"  # Audio format
                ) as response:
                    for audio_chunk in response.iter_bytes(1024):  # Stream audio chunks
                        self.audio_queue.put(audio_chunk)  # Add audio chunks to the queue
                # Add a short pause between sentences
                self.audio_queue.put(b'\x00' * 2400)  # 0.05 seconds of silence at 24000 Hz
            except queue.Empty:
                continue

    # Method to play audio from the queue
    def play_audio(self):
        while not self.text_generation_complete.is_set() or not self.audio_queue.empty():
            try:
                audio_chunk = self.audio_queue.get(timeout=0.1)  # Get an audio chunk from the queue
                self.stream.write(audio_chunk)  # Play the audio chunk
            except queue.Empty:
                continue

        self.stream.stop_stream()  # Stop the audio stream
        self.stream.close()  # Close the audio stream
        self.p.terminate()  # Terminate PyAudio

    # Method called when the text generation is complete
    def on_end(self):
        self.process_text_buffer()  # Process any remaining text in the buffer
        if self.text_buffer:
            self.sentence_queue.put(self.text_buffer.strip())  # Add remaining text to the queue
        
        self.text_generation_complete.set()  # Signal that text generation is complete
        self.audio_processing_thread.join()  # Wait for the processing thread to finish
        self.audio_playback_thread.join()  # Wait for the playback thread to finish

# Create an assistant using the client library
assistant = client.beta.assistants.create(
    model="gpt-4o",  # Assistant model
    instructions="You are a helpful assistant.",  # Instructions for the assistant
    temperature=1,  # Sampling temperature
    top_p=1,  # Top-p sampling
)

# Create a new assistant thread with an initial user message
assistant_thread = client.beta.threads.create(
    messages=[
        {
            "role": "user",
            "content": [
                {
                    "type": "text",
                    "text": "Give me one paragraph on penguins",  # Initial user message
                },
            ],
        },
    ]
)

# Create an instance of the custom event handler
event_handler = EventHandler()

# Stream the assistant's response
with client.beta.threads.runs.stream(
    thread_id=assistant_thread.id,  # ID of the assistant thread
    assistant_id=assistant.id,  # ID of the assistant
    event_handler=event_handler,  # Custom event handler
) as stream:
    stream.until_done()  # Stream responses until complete

event_handler.on_end()  # Call the end method when streaming is complete


Penguins are a unique and captivating group of flightless birds primarily found in the Southern Hemisphere, with a particular abundance in Antarctica. They are exceptional swimmers, utilizing their flipper-like wings and streamlined bodies to navigate through icy waters with remarkable agility. Penguins are highly social animals, often forming large colonies that can number in the thousands. These colonies provide protection and support, especially during the breeding season, when parents take turns incubating eggs and foraging for food. Adapted to some of the planet's harshest environments, penguins have developed insulating layers of fat and dense feathers to keep warm. Despite their clumsy waddling on land, their charismatic and endearing behavior, paired with their stark black-and-white plumage, endears them to people worldwide.