In [1]:
#import speech_recognition as sr   # voice recognition library
import random                     # to choose random words from list
import pyttsx3                    # offline Text to Speech
import datetime                   # to get date and time
import webbrowser                 # to open and perform web tasks
import serial                     # for serial communication
import pywhatkit                  # for more web automation
import whisper                    # for speech-to-text

In [None]:
# connect with Arduino Nano Board over serial communication
try:
    port = serial.Serial("COM7", 9600)
    print("Phycial body, connected.")
except:
    print("Unable to connect to my physical body")

In [3]:
# Declare robot name (Wake-Up word)
robot_name = 'robo'

# Loading Whisper's Model
model = whisper.load_model("large")   # You can select 'tiny', 'base', 'medium' and 'large' depending on you ram

# random words list
hi_words = ['hi', 'hello', 'yo baby', 'salam']
bye_words = ['bye', 'tata', 'hasta la vista']
r_u_there = ['are you there', 'you there']

# initilize things
engine = pyttsx3.init()                    # init text to speech engine
voices = engine.getProperty('voices')      # check for voices
listener = sr.Recognizer()                 # initialize speech recognition API

In [4]:
def listen():
    """Listen to what the user says"""
    try:
        with sr.Microphone() as source:
            print("Talk>>")
            voice = listener.listen(source)                     # Listen from microphone
            #command = listener.recognize_google(voice).lower()  # use google API
        
            # Save the WAV file
            with open("audio.wav", "wb") as f:
                f.write(voice.get_wav_data())
            # Converting Speech to Text
            Result = model.transcribe("audio.wav")
            
            # all words lowercase- so that we can process easily
            command = Result["text"].lower()
            command = command.lower()
            print(command)
            
            # look for wake up word in the beginning
            if (command.split(' ')[0] == robot_name):
                # if wake up word found....
                print("[wake-up word found]")
                process(command)                 # call process funtion to take action
    except:
        pass

In [8]:
def process(words):
    """ process what user says and take actions """
    print(words) # check if it received any command

    # break words in
    word_list = words.split(' ')[1:]   # split by space and ignore the wake-up word

    if (len(word_list)==1):
        if (word_list[0] == robot_name):
            talk("How Can I help you?")
            port.write(b'l')
            return
    
    if word_list[0] == 'play':
        """if command for playing things, play from youtube"""
        talk("Okay boss, playing")
        extension = ' '.join(word_list[1:])                    # search without the command word
        port.write(b'u')
        pywhatkit.playonyt(extension)   
        port.write(b'l')          
        return

    elif word_list[0] == 'search':
        """if command for google search"""
        port.write(b'u')
        talk("Okay boss, searching")
        port.write(b'l')
        extension = ' '.join(word_list[1:])
        pywhatkit.search(extension)
        return

    if (word_list[0] == 'get') and (word_list[1] == 'info'):
        """if command for getting info"""
        port.write(b'u')
        talk("Okay, I am right on it")
        port.write(b'u')
        extension = ' '.join(word_list[2:])                    # search without the command words
        inf = pywhatkit.info(extension)
        talk(inf)                                              # read from result             
        return

    elif word_list[0] == 'open':
        """if command for opening URLs"""
        port.write(b'l')
        talk("Opening, sir")
        url = f"http://{''.join(word_list[1:])}"   # make the URL
        webbrowser.open(url)
        return
    elif word_list[0] == 'uppercut':
        port.write(b'U')

    elif word_list[0] == 'match':
        port.write(b's')

    elif word_list[0] == '5':
        port.write(b'p')

    # now check for matches
    for word in word_list:
        if word in hi_words:
            """ if user says hi/hello greet him accordingly"""
            port.write(b'h')               # send command to wave hand
            talk(random.choice(hi_words))

        elif word in bye_words:
            """ if user says bye etc"""
            talk(random.choice(bye_words))

In [9]:
def talk(sentence):
    """ talk / respond to the user """
    engine.say(sentence)
    engine.runAndWait()

In [None]:
while True:
    listen()