In [16]:
import re
import speech_recognition as sr
from pydub import AudioSegment

# Define a dictionary of possible instructions
instructions_dict = {
    'move forward': ['forward', 'move ahead', 'proceed'],
    'move backward': ['go backward', 'move back'],
    'move up': ['up', 'ascend', 'fly up'],
    'move down': ['down', 'descend'],
    'turn left': ['left', 'rotate left'],
    'turn right': ['right', 'rotate right'],
    'diagonal movement': ['diagonally forward','diagonally backward','diagonally up','diagonally down']
}

# Function to classify a single instruction with numerical values
def classify_instruction(instruction):
    action = 'unclassified'
    value = None
    
    for act, keywords in instructions_dict.items():
        for keyword in keywords:
            pattern = r'\b' + re.escape(keyword) + r'\b'
            if re.search(pattern, instruction):
                action = act
                break
    
    # Extract the numerical value using regular expressions
    value_match = re.search(r'\b(\d+)\s*(meters?|m)?\b', instruction)
    if value_match:
        value = int(value_match.group(1))
    
    return action, value

# Function to classify instructions in a sentence
def classify_instructions(sentence):
    sentence = sentence.lower()
    
    # Split the sentence into instructions using common delimiters
    instructions = re.split(r'[;,]| and ', sentence)
    
    classified_actions = []
    
    for instruction in instructions:
        action, value = classify_instruction(instruction.strip())
        classified_actions.append((action, value))
    
    return classified_actions

# Function to convert audio to text
def audio_to_text(audio_path):
    recognizer = sr.Recognizer()
    
    # Load the audio file
    audio = AudioSegment.from_file(audio_path)
    
    # Save the audio file in the required format for the recognizer
    audio.export("temp.wav", format="wav")
    
    with sr.AudioFile("temp.wav") as source:
        audio_data = recognizer.record(source)
        text = recognizer.recognize_google(audio_data)
    
    return text

# Function to process audio file and classify instructions
def process_audio_and_classify(audio_path):
    # Convert audio to text
    instruction_sentence = audio_to_text(audio_path)
    print(f"Extracted Text: '{instruction_sentence}'")
    
    # Classify the instructions in the sentence
    classified_actions = classify_instructions(instruction_sentence)
    
    # Display the classified instructions
    for action, value in classified_actions:
        if value:
            print(f"Instruction: {action}, Value: {value} meters")
        else:
            print(f"Instruction: {action}")

# Example usage with an audio file on the desktop
audio_file_path = "audio5.wav" 

process_audio_and_classify(audio_file_path)


Extracted Text: 'move up and then go down 5 metres and then move diagonally backward by 10 m'
Instruction: move up
Instruction: move down, Value: 5 meters
Instruction: diagonal movement, Value: 10 meters
