In [1]:
import warnings
import os
import pygame
from gtts import gTTS
from time import sleep
import speech_recognition as sr
import tempfile
import datetime
import torch
from transformers import GPTNeoForCausalLM, AutoTokenizer
# Suppress FutureWarning for tokenization spaces
warnings.filterwarnings("ignore", category=FutureWarning, module="transformers.tokenization_utils_base")


pygame 2.6.0 (SDL 2.28.4, Python 3.8.8)
Hello from the pygame community. https://www.pygame.org/contribute.html


In [2]:
# Load GPT-Neo 1.3B model and tokenizer
model_name = "EleutherAI/gpt-neo-1.3B"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = GPTNeoForCausalLM.from_pretrained(model_name)

# Set the pad_token to eos_token to avoid padding issues
tokenizer.pad_token = tokenizer.eos_token


In [3]:
def speak_response(text):
    tts = gTTS(text=text, lang='en')
    with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_audio_file:
        tts.save(temp_audio_file.name)
        temp_file_path = temp_audio_file.name
    pygame.mixer.init()
    try:
        pygame.mixer.music.load(temp_file_path)
        pygame.mixer.music.play()

        while pygame.mixer.music.get_busy():
            pygame.time.wait(50)  # Check every 50ms for faster termination
    except Exception as e:
        print(f"Error playing audio: {e}")
    finally:
        pygame.mixer.quit()
        if os.path.exists(temp_file_path):
            os.remove(temp_file_path)


In [4]:
def capture_speech(mic_index):
    recognizer = sr.Recognizer()
    recognizer.energy_threshold = 300  # Lower energy threshold for faster detection
    mic = sr.Microphone(device_index=mic_index)
    
    with mic as source:
        print("Listening for your query...")
        recognizer.adjust_for_ambient_noise(source, duration=0.5)
        
        try:
            audio = recognizer.listen(source, timeout=5, phrase_time_limit=6)
        except sr.WaitTimeoutError:
            print("Listening timed out, please speak again.")
            return None

    try:
        user_input = recognizer.recognize_google(audio)
        print(f"User said: {user_input}")
        return user_input
    except sr.UnknownValueError:
        print("Sorry, I could not understand the audio.")
        return None
    except sr.RequestError as e:
        print(f"Error with the Google Speech Recognition service; {e}")
        return None


In [5]:
def handle_time_date_queries(user_input):
    if "time" in user_input:
        current_time = datetime.datetime.now().strftime("%I:%M %p")
        return f"The current time is {current_time}."
    elif "date" in user_input or "today" in user_input:
        current_date = datetime.datetime.now().strftime("%A, %B %d, %Y")
        return f"Today is {current_date}."
    else:
        return None


In [6]:
def select_microphone():
    return 2  # Hardcoded microphone index to avoid unnecessary complexity


In [7]:
def generate_response(user_input):
    # Handle time and date queries
    time_date_response = handle_time_date_queries(user_input)
    if time_date_response:
        return time_date_response

    # Check if the user asked for the AI's name
    if "what is your name" in user_input.lower():
        return "I am Amigo, your all-weather conversational companion, just like the bike that you are riding on."

    # Check for factual or current event queries
    if any(keyword in user_input.lower() for keyword in ["what is", "who is", "when is", "where is", "current", "today", "news"]):
        return "Thank you for your query, but that's out of my scope of training."

    # Tokenize user input
    inputs = tokenizer(user_input, return_tensors="pt", truncation=True, max_length=100, padding=True)

    # Generate response using GPT-Neo with tuned parameters for context and logic
    reply_ids = model.generate(
        inputs['input_ids'], 
        attention_mask=inputs['attention_mask'], 
        max_length=100, 
        num_beams=5,  # Beam search for improved response quality
        early_stopping=True,
        pad_token_id=tokenizer.eos_token_id
    )

    # Decode and return the generated response
    bot_response = tokenizer.decode(reply_ids[0], skip_special_tokens=True)
    return bot_response


In [8]:
def listen_for_wake_word(mic_index):
    recognizer = sr.Recognizer()
    mic = sr.Microphone(device_index=mic_index)
    
    print("Waiting for wake word 'Hey Amigo'...")
    while True:
        with mic as source:
            recognizer.adjust_for_ambient_noise(source, duration=0.5)
            audio = recognizer.listen(source, timeout=5)
            
        try:
            user_input = recognizer.recognize_google(audio)
            if "hey amigo" in user_input.lower():
                return True  # Wake word detected, start conversation
        except sr.UnknownValueError:
            continue
        except sr.RequestError as e:
            print(f"Error with the Google Speech Recognition service: {e}")
            continue


In [9]:
def amigo_conversational_companion():
    mic_index = select_microphone()

    # Step 1: Wait for the wake word "Hey Amigo"
    if listen_for_wake_word(mic_index):
        # Step 2: Greet the user
        greeting = "Hi Hello Namaskara, I am Amigo. How can I help you today?"
        print(f"Amigo: {greeting}")
        speak_response(greeting)
        
        # Step 3: Conversation loop
        while True:
            user_input = capture_speech(mic_index)
            if user_input is None:
                continue
            
            # Exit command
            if "exit" in user_input.lower() or "goodbye" in user_input.lower():
                farewell_message = "Goodbye, have a nice day!"
                print(f"Amigo: {farewell_message}")
                speak_response(farewell_message)
                break
            
            # Generate and speak response
            bot_response = generate_response(user_input)
            print(f"Amigo: {bot_response}")
            speak_response(bot_response)


In [13]:
if __name__ == "__main__":
    amigo_conversational_companion()


Waiting for wake word 'Hey Amigo'...
Amigo: Hi Hello Namaskara, I am Amigo. How can I help you today?
Listening for your query...
User said: tell me a joke


KeyboardInterrupt: 