In [None]:
# Install required libraries
!pip install speechrecognition gtts pydub
!apt-get install ffmpeg


In [None]:
#Library for recognizing speech input
import speech_recognition as sr

#Library for text-to-speech conversation
from gtts import gTTS

#Regular expression module to process text and extract numbers
import re

#Module to perform basic mathematical operations
import operator

# For playing audio in Colab
import IPython.display as ipd

from pydub import AudioSegment
from google.colab import files

# Function to convert any audio file to WAV format
def convert_audio_to_wav(audio_filename):
  wav_filename = audio_filename.rsplit(".", 1)[0] + ".wav"  #Change extension to .wav
  audio = AudioSegment.from_file(audio_filename)  #Auto_detect format
  audio = audio.set_frame_rate(16000).set_channels(1).set_sample_width(2)  # Convert to PCM WAV (16kHz, mono)
  audio.export(wav_filename, format="wav")  #Convert to .wav format
  return wav_filename

# Define a dictionary that maps words (spoken by the user)
#   to their respective arithmetic operations
operations = {
    "plus": operator.add,   "+": operator.add,  #Maps "plus" to the addition function
    "minus": operator.sub,  "-": operator.sub,  #Maps "minus" to the subtraction function
    "times": operator.mul,  "*": operator.mul,  #Maps "times" to the multiplication function
    "multiplied by": operator.mul,
    "divided by": operator.truediv,  "/": operator.truediv,  #Maps "divided by" to the division function
}


# Dictionary to convert spoken words of numbers into actual digits
words_to_numbers = {
        "zero": 0,
        "one": 1,
        "two": 2,
        "three": 3,
        "four": 4,
        "five": 5,
        "six": 6,
        "seven": 7,
        "eight": 8,
        "nine": 9,
        "ten": 10
    }

def speak(text):
    """
    Converts text to speech using Google Text-to-Speech (gTTS)
    and plays the generated audio.
    """
    tts = gTTS(text=text, lang="en")  #Convert text-to-speech
    tts.save("response.mp3")  #Save speech to a file
    ipd.display(ipd.Audio("response.mp3"))  #Play the speech audio


def recognize_speech(audio_file):
    """
    This function listens to the user's voice input,
    converts it into text, and returns the recognized sentence.

    Returns:
    str: The recognized text from the user's speech input.
    None: If speech is unclear or not recognized.
    """

    recognizer = sr.Recognizer()    # Create an instance of Recognizer (to process speech output)

    #Use the uploaded audio file
    with sr.AudioFile(audio_file) as source:
       audio = recognizer.record(source) #Capture audio


    try:
        #Capture audio from the microphone
        #convert the speech into text using Google's speech Recognition API
        text = recognizer.recognize_google(audio)
        print(f"User said: {text}") #print the recognized text in the console

        return text.lower() #convert text to lowercase for uniform processing

    except sr.UnknownValueError:
        print("Sorry, I didn't catch that. Can you please repeat?") #is speech is not recognized
        return None
    except sr.RequestError:
        print("Sorry, there was an issue with the speech recognition service. Please try again later.") #if there is an issue with the speech API
        return None




def extract_numbers_and_operator(expression):
    """
    This function extracts numbers and the operator from a spoken mathematical expression.

    Example:
    "one plus two" -> (1, "plus", 2,)

    Parameters:
    expression (str): The recognized speech converted to text.

    Returns:
    tuple: (first number, operator as a string, second number) if extraction is successful.
    (None, None, None) if extraction fails.
    """
    # Convert word-based numbers (like "one") into numeric digits (like "1")
    for word, num in words_to_numbers.items():
        expression = expression.replace(word, str(num))   #Replace occurances of word-based numbers with digits

    # Extract numbers using regex
    numbers = re.findall('\d+', expression) #Finds all digit sequences in the text
    numbers = [int(num) for num in numbers]  # Convert to integers

    if len(numbers) < 2: #If we don't find at least two numbers, the operation is invalid
        print("Could not detect two numbers for calculations.")
        return None, None, None #Return None values to indicate failare


    # Identify the mathematical operation in the spoken sentence
    for word in operations.keys():  # Loop through all known operation keywords (e.g, "pus", "minus")
        if word in expression:  #If we find an operation keyword in the input
            return numbers[0], word, numbers[1]   #Return the two numbers and the operation found

    print("Could not detect a valid mathematical operation.") #If no operator was detected
    return None, None, None #Return None values to indicate failare




def perform_calculation(audio_file):
    """
    This function manages the full process of:
    1. Asking for voice input.
    2. Extracting numbers and the operator.
    3. Performing the requested mathematical calculation.
    4. Speaking out the result.
    """
    user_input = recognize_speech(audio_file)   #Capture and process the spoken out

    if not user_input:    #If speech recognition failed or input was unclear
        speak("Sorry, I couldn't understand. Please try again.")
        return   #Stop execution and ask the user to retry

    #Extract numbers and the operator from the user's spoken sentence
    num1, operator_word, num2 = extract_numbers_and_operator(user_input)

    #If any of the extracted values are None, the input was not valid
    if num1 is None or operator_word is None or num2 is None:
      speak("sorry, I couldn't recognize the numbers or operators. Try again.")
      return  #Stop execution if extraction failed


    #Perform the mathematical operations (e.g, num1 + num2 for addition)
    result = operations[operator_word](num1, num2)

    #Construct a response sentence with the final answer
    response = f"The result of {num1} {operator_word} {num2} is {result}"

    print(response)   #Display the final answer in the console
    speak(response)   #Speak the result out loud


#upload an audio file in cilab
from google.colab import files
uploaded = files.upload()

#Get the first uploaded file
audio_filename = next(iter(uploaded))

#Perform calculation using the uploaded file
perform_calculation(audio_filename)




Saving Record (online-voice-recorder.com).wav to Record (online-voice-recorder.com) (1).wav
User said: 10 - 8
The result of 10 - 8 is 2
