In [4]:
import sounddevice as sd
import numpy as np
import speech_recognition as sr
import scipy.io.wavfile as wavfile
import os


def record_audio(duration=5, filename="output.wav"):
    """     
    Records audio using the sounddevice library and saves it to a file.

    Args:
        duration (int): Duration of the recording in seconds.
        filename (str): Name of the output WAV file.

    Returns:
        str: Path to the recorded WAV file.
    """
    print("Recording...")
    sample_rate = 44100  # Standard sample rate for audio
    recording = sd.rec(int(duration * sample_rate), samplerate=sample_rate, channels=1, dtype="int16")
    sd.wait()  # Wait until the recording is finished
    wavfile.write(filename, sample_rate, recording)
    print(f"Recording saved as {filename}")
    return filename


def recognize_speech_from_file(audio_file, reference_phrase, language="fr-FR"):
    """
    Recognizes speech from an audio file and compares it to a reference phrase.

    Args:
        audio_file (str): Path to the audio file to process.
        reference_phrase (str): The phrase to compare against.
        language (str): The language for speech recognition (default is French).

    Returns:
        dict: A dictionary with match status, recognized text, and word-by-word feedback.
    """
    recognizer = sr.Recognizer()

    try:
        with sr.AudioFile(audio_file) as source:
            print("Processing audio...")
            audio = recognizer.record(source)  # Load the audio file
            recognized_text = recognizer.recognize_google(audio, language=language)  # Recognize speech
            print("You said:", recognized_text)

            # Compare word by word
            reference_words = reference_phrase.strip().lower().split()
            recognized_words = recognized_text.strip().lower().split()
            feedback = []

            # Compare each word
            for ref_word, recog_word in zip(reference_words, recognized_words):
                if ref_word == recog_word:
                    feedback.append(f"Correct: '{recog_word}'")
                else:
                    feedback.append(f"Incorrect: You said '{recog_word}', should be '{ref_word}'")

            # Check for extra or missing words
            if len(reference_words) < len(recognized_words):
                extra_words = recognized_words[len(reference_words):]
                feedback.append(f"Extra words: {' '.join(extra_words)}")
            elif len(reference_words) > len(recognized_words):
                missing_words = reference_words[len(recognized_words):]
                feedback.append(f"Missing words: {' '.join(missing_words)}")

            match_status = recognized_text.strip().lower() == reference_phrase.strip().lower()

            return {
                "match": match_status,
                "recognized_text": recognized_text,
                "feedback": feedback,
            }

    except sr.UnknownValueError:
        print("Could not understand the audio.")
        return {"match": False, "recognized_text": None, "feedback": ["Speech not recognized."]}
    except sr.RequestError as e:
        print(f"Could not request results from the speech recognition service; {e}")
        return {"match": False, "recognized_text": None, "feedback": ["Speech recognition service error."]}


if __name__ == "__main__":
    # Reference phrase to match
    phrase = "Le ciel est bleu"

    # Record audio from the user
    audio_file = record_audio(duration=5)

    # Recognize and compare the speech
    result = recognize_speech_from_file(audio_file, phrase, language="fr-FR")

    if result["recognized_text"]:
        print("Recognized Speech:", result["recognized_text"])
        print("Feedback:")
        for feedback in result["feedback"]:
            print(feedback)
    if result["match"]:
        print("Match: The speech matches the given phrase!")
    else:
        print("Mismatch: The speech does not match the given phrase.")

    # Cleanup: Delete the temporary audio file
    if os.path.exists(audio_file):
        os.remove(audio_file)


Recording...
Recording saved as output.wav
Processing audio...
You said: le sel est blue
Recognized Speech: le sel est blue
Feedback:
Correct: 'le'
Incorrect: You said 'sel', should be 'ciel'
Correct: 'est'
Incorrect: You said 'blue', should be 'bleu'
Mismatch: The speech does not match the given phrase.
