<a href="https://colab.research.google.com/github/MK316/Spring2024/blob/main/DLTESOL/PronunciationFeedbackApp.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Simple Language Application [3]: Prononciation Feedback (Accuracy)**

Using Levenshtein distance

In [None]:
%%capture
!pip install gradio speechrecognition python-Levenshtein soundfile

In [None]:
import gradio as gr
import speech_recognition as sr
from Levenshtein import ratio
import tempfile
import numpy as np
import soundfile as sf
import pandas as pd

# Sample dataframe with sentences
data = {
    "Sentences": [
        "The quick brown fox jumps over the lazy dog.",
        "An apple a day keeps the doctor away.",
        "To be or not to be, that is the question.",
        "All human beings are born free and equal in dignity and rights.",
        "She sells sea shells by the sea shore.",
        "How much wood would a woodchuck chuck if a woodchuck could chuck wood?",
        "A stitch in time saves nine.",
        "Good things come to those who wait.",
        "Time flies like an arrow; fruit flies like a banana.",
        "You can't judge a book by its cover."
    ]
}
df = pd.DataFrame(data)

def transcribe_audio(file_info):
    r = sr.Recognizer()
    with tempfile.NamedTemporaryFile(delete=True, suffix=".wav") as tmpfile:
        sf.write(file=tmpfile.name, data=file_info[1], samplerate=44100, format='WAV')
        tmpfile.seek(0)
        with sr.AudioFile(tmpfile.name) as source:
            audio_data = r.record(source)
    try:
        text = r.recognize_google(audio_data)
        return text
    except sr.UnknownValueError:
        return "Could not understand audio"
    except sr.RequestError as e:
        return f"Could not request results; {e}"

def pronunciation_correction(selected_sentence, file_info):
    expected_text = selected_sentence
    user_spoken_text = transcribe_audio(file_info)
    similarity = ratio(expected_text.lower(), user_spoken_text.lower())
    description = f"{similarity:.2f}"  # Formats the float to 2 decimal places

    if similarity >= 0.9:
        feedback = "Excellent pronunciation!"
    elif similarity >= 0.7:
        feedback = "Good pronunciation!"
    elif similarity >= 0.5:
        feedback = "Needs improvement."
    else:
        feedback = "Poor pronunciation, try to focus more on clarity."

    return feedback, description


iface = gr.Interface(
    fn=pronunciation_correction,
    inputs=[
        gr.Dropdown(choices=df['Sentences'].tolist(), label="Select a Sentence"),
        gr.Audio(label="Upload Audio File", type="numpy")
    ],
    outputs=[
        gr.Textbox(label="Pronunciation Feedback"),  # Custom label for the text output
        gr.Number(label="Pronunciation Accuracy Score: 0 (No Match) ~ 1 (Perfect)")  # Custom label for the numerical output
    ],
    title="ðŸŒ€ Pronunciation Feedback Tool"
)


iface.launch(debug=True)
