In [None]:
import openai
import pyttsx3
import speech_recognition as sr
import time
import boto3
import io
from pydub import AudioSegment
from pydub.playback import play

# Initialize the text-to-speech engine for pyttsx3
pyttsx3_engine = pyttsx3.init()

# Initialize the text-to-speech engine for AWS Polly
polly_engine = boto3.client(
    'polly',
    region_name='us-west-2',  # Adjust the region as needed
    aws_access_key_id='YOUR_AWS_ACCESS_KEY_ID',
    aws_secret_access_key='YOUR_AWS_SECRET_ACCESS_KEY'
)

def transcribe_audio_to_text(filename):
    recognizer = sr.Recognizer()
    with sr.AudioFile(filename) as source:
        audio = recognizer.record(source)
    try:
        return recognizer.recognize_google(audio)
    except:
        print('Skipping unknown error')

def generate_response(user_question):
    prompt = "You are a helpful assistant"
    full_prompt = prompt + user_question
    response = openai.Completion.create(
        engine="gpt-3.5-turbo-instruct",
        prompt=full_prompt,
        max_tokens=400,
        n=1,
        stop=None,
        temperature=0.5,
    )
    return response["choices"][0]["text"]

def convert_text_to_audio_aws_polly(text):
    response = polly_engine.synthesize_speech(
        Text=text,
        OutputFormat='mp3',  # You can specify the output format (mp3, ogg_vorbis, pcm)
        VoiceId='Joanna'  # You can choose from various voices available in AWS Polly
    )

    # Create a file-like object from the audio stream
    audio_stream = io.BytesIO(response['AudioStream'].read())

    # Load the audio stream using pydub
    audio_segment = AudioSegment.from_mp3(audio_stream)

    # Play the audio
    play(audio_segment)

def speak_text(text):
    pyttsx3_engine.say(text)
    pyttsx3_engine.runAndWait()

def main():
    while True:
        # Wait for user to say "Assistant"
        print("Say 'okay assistant' to start engaging with the assistant...")
        with sr.Microphone() as source:
            recognizer = sr.Recognizer()
            audio = recognizer.listen(source)
            try:
                transcription = recognizer.recognize_google(audio)
                if transcription.lower() == "okay assistant":
                    # Record Audio
                    filename = "input.wav"
                    print("What is your question...")
                    with sr.Microphone() as source:
                        recognizer = sr.Recognizer()
                        source.pause_threshold = 1
                        audio = recognizer.listen(source, phrase_time_limit=None, timeout=None)
                        with open(filename, "wb") as f:
                            f.write(audio.get_wav_data())
                            
                    # Transcribe audio to text
                    text = transcribe_audio_to_text(filename)
                    if text:
                        print(f"You said: {text}")
                        
                        response = generate_response(text)
                        print(f"Assistant's Response: {response}")
                        
                        # Read response using text-to-speech
                        convert_text_to_audio_aws_polly(response)
            except Exception as e:
                print("An error occurred: {}".format(e))
                
if __name__ == "__main__":
    main()
