<a href="https://colab.research.google.com/github/5eunji/Final-project-G3/blob/main/TTS_App(1)_%EB%B3%B8%EB%AC%B8.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **While-Listening Activity: Learning New Words**
## 2. Gradio TTS App

In [None]:
!pip install gradio
!pip install gtts
!pip install SpeechRecognition

import gradio as gr
from gtts import gTTS
import speech_recognition as sr
from difflib import SequenceMatcher
import tempfile
import os

# Define your sentences here
sents = [
    "Many years ago, in the city of Verona, Italy, there were two families, the Montagues and the Capulets. These two families were always battling and did not like each other.",
    "One day, Romeo Montague secretly attended a Capulet party. There, he saw Juliet Capulet and instantly fell in love. However, their love was in danger because of their families’ feud.",
    "After the party, Romeo went to Juliet’s window. Juliet stood at her window, whispering, 'Oh Romeo, Romeo! Where are you Romeo? Give up your family name, or if you won't, just promise to love me, and I'll give up being a Capulet.'",
    "Below Juliet's window, Romeo stood in the shadows of a wall. He looked up at her, his heart beating fast. Romeo and Juliet promised to marry in secret and expressed their love for each other.",
    "Despite their love, the feud between their families grew worse. Their story is one of love, tragedy, and heartbreak."
]

def text_to_speech(selected_sentence, language):
    tld = 'co.uk' if language == "British English" else 'com'

    sn = int(selected_sentence.split(".")[0])  # Extract the sentence number
    mytext = sents[sn - 1]  # Get the selected sentence

    tts = gTTS(text=mytext, lang='en', tld=tld, slow=False)
    filename = 'output.mp3'
    tts.save(filename)
    return filename

def recognize_speech_from_microphone(audio_path):
    recognizer = sr.Recognizer()
    try:
        with sr.AudioFile(audio_path) as source:
            audio_data = recognizer.record(source)
            text = recognizer.recognize_google(audio_data)
            return text
    except sr.UnknownValueError:
        return "Could not understand the audio"
    except sr.RequestError as e:
        return f"Could not request results from Google Speech Recognition service; {e}"
    except Exception as e:
        return str(e)

def calculate_similarity(original_text, recognized_text):
    return SequenceMatcher(None, original_text.lower(), recognized_text.lower()).ratio() * 100

def process_audio(selected_sentence, audio_path):
    sn = int(selected_sentence.split(".")[0])  # Extract the sentence number
    original_text = sents[sn - 1]  # Get the selected sentence
    recognized_text = recognize_speech_from_microphone(audio_path)
    if "Error" in recognized_text or "Could not" in recognized_text:
        return recognized_text, 0.0
    similarity = calculate_similarity(original_text, recognized_text)
    return recognized_text, similarity

def display_sentence(selected_sentence):
    sn = int(selected_sentence.split(".")[0])
    return sents[sn - 1]

with gr.Blocks() as demo:
    with gr.Row():
        with gr.Column():
            gr.Markdown("### Text-to-Speech Converter")
            dropdown_sentences = gr.Dropdown(choices=[f"{i}. {sents[i-1]}" for i in range(1, len(sents) + 1)], label="Select Sentence")
            radio_language = gr.Radio(choices=['American English', 'British English'], label="Language")
            generate_tts_button = gr.Button("Generate Speech")
            tts_audio_output = gr.Audio(type="filepath", label="Output Audio")
            generate_tts_button.click(text_to_speech, inputs=[dropdown_sentences, radio_language], outputs=tts_audio_output)
            selected_sentence_display = gr.Textbox(label="Selected Sentence", interactive=False)
            dropdown_sentences.change(display_sentence, inputs=dropdown_sentences, outputs=selected_sentence_display)

    with gr.Row():
        with gr.Column():
            gr.Markdown("### Pronunciation Evaluator")
            mic_input = gr.Audio(label="Your Pronunciation", type="filepath")
            result_button = gr.Button("Evaluate Pronunciation")
            recognized_text = gr.Textbox(label="Recognized Text")
            similarity_score = gr.Number(label="Similarity (%)")

            result_button.click(process_audio, inputs=[dropdown_sentences, mic_input], outputs=[recognized_text, similarity_score])

demo.launch()
