In [1]:
#Updated 12.20.24
# Speech to Text Summarization with Open AI
from openai import OpenAI
import sounddevice as sd
import numpy as np
import tempfile
import wave
from dotenv import load_dotenv
import os

# Load environment variables
load_dotenv()

# Configure your OpenAI API key
openai_api_key = os.getenv("OPENAI_API_KEY")
client = OpenAI(api_key=openai_api_key)

# Function to record audio
def record_audio(duration=5, samplerate=44100):
    print(f"Recording for {duration} seconds...")
    audio_data = sd.rec(int(duration * samplerate), samplerate=samplerate, channels=1, dtype='float32')
    sd.wait()  # Wait for the recording to finish
    return audio_data.flatten(), samplerate

# Save audio to WAV format
def save_audio_to_wav(audio_data, samplerate):
    with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_file:
        with wave.open(temp_file.name, 'wb') as wf:
            wf.setnchannels(1)  # Mono audio
            wf.setsampwidth(2)  # 16-bit audio
            wf.setframerate(samplerate)
            wf.writeframes((audio_data * 32767).astype(np.int16).tobytes())
        return temp_file.name

# Function to transcribe audio using Whisper
def transcribe_audio(audio_file):
    with open(audio_file, "rb") as audio:
        transcription = client.audio.transcriptions.create(
            model="whisper-1",
            file=audio
        )
    # Directly access the 'text' attribute
    return transcription.text

# Function to analyze text using OpenAI LLM
def analyze_text_with_llm(text, prompt="Analyze the following content:"):
    response = client.chat.completions.create(
        model="gpt-4o",
        messages=[
            {"role": "system", "content": "You are a helpful assistant."},
            {"role": "user", "content": f"{prompt}\n\n{text}"}
        ]
    )
    # Return the 'content' field of the assistant's response
    return response.choices[0].message.content

# Generalized function
def listen_and_analyze(duration=5, user_prompt="Analyze this content:"):
    # Record audio
    audio_data, samplerate = record_audio(duration)
    
    # Save audio to a WAV file
    audio_file = save_audio_to_wav(audio_data, samplerate)
    
    # Transcribe audio to text
    transcribed_text = transcribe_audio(audio_file)
    print(f"Transcribed Text: {transcribed_text}")
    
    # Analyze the transcribed text
    analysis = analyze_text_with_llm(transcribed_text, user_prompt)
    print(f"Analysis: {analysis}")
    return analysis

# Example usage
if __name__ == "__main__":
    analysis_result = listen_and_analyze(duration=5, user_prompt="Summarize this content.")
    print(f"Final Result: {analysis_result}")


Recording for 5 seconds...
Transcribed Text: I'm testing to see if this works. Please let me know if this works.
Analysis: The content consists of a person testing a system and asking for confirmation on whether it works.
Final Result: The content consists of a person testing a system and asking for confirmation on whether it works.
