<a href="https://colab.research.google.com/github/5eunji/Final-project-G3/blob/main/Wordclould_App.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Pre-Listening Activity: Learning New Words**
##1. Gradio Wordcloud App: Create a word cloud from the text to highlight the most frequent words.

In [None]:
!pip install matplotlib wordcloud nltk gradio pandas

import matplotlib.pyplot as plt
from wordcloud import WordCloud
import nltk
from collections import Counter
from nltk.corpus import stopwords, wordnet
from nltk.stem import WordNetLemmatizer
import gradio as gr
import pandas as pd

# Download necessary NLTK data
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
nltk.download('stopwords')
nltk.download('wordnet')

stop_words = set(stopwords.words('english'))
lemmatizer = WordNetLemmatizer()

# Define example sentences, synonyms, and Korean meanings for the word list
word_data_examples = {
    "feud": ("The feud between the Montagues and Capulets caused much suffering.", "conflict, quarrel", "불화", "싸움"),
    "family": ("The Montague family was Romeo’s family.", "household, kin", "가족", "가문"),
    "party": ("Romeo secretly attended a Capulet party.", "gathering, celebration", "파티", "모임"),
    "love": ("Their love was pure and strong.", "affection, passion", "사랑", "애정"),
    "hate": ("The hate between the families was unending.", "anger, hostility", "증오", "미움"),
    "window": ("Romeo stood below Juliet’s window.", "pane, opening", "창문", "유리창"),
    "promise": ("Romeo promised to love Juliet forever.", "vow, pledge", "약속", "맹세"),
    "secret": ("Their love remained a secret.", "hidden, private", "비밀", "숨겨진"),
    "marry": ("They decided to marry despite their families’ feud.", "wed, unite", "결혼하다", "혼인하다"),
    "tragedy": ("Romeo and Juliet is a story of tragedy and love.", "disaster, misfortune", "비극", "참사")
}

# Words to be excluded from both the word cloud and the word list
exclude_words = set(["romeo", "juliet", "montague", "capulet", "oh", "verona"])

def get_wordnet_pos(treebank_tag):
    if treebank_tag.startswith('J'):
        return wordnet.ADJ
    elif treebank_tag.startswith('V'):
        return wordnet.VERB
    elif treebank_tag.startswith('N'):
        return wordnet.NOUN
    elif treebank_tag.startswith('R'):
        return wordnet.ADV
    else:
        return None

def process_text(text):
    words = nltk.word_tokenize(text)
    words = [word.lower() for word in words if word.isalnum() and word.lower() not in stop_words and word.lower() not in exclude_words]
    word_freq = Counter(words)
    pos_tags = nltk.pos_tag(words)
    return word_freq, pos_tags

def generate_wordcloud(word_freq):
    wordcloud = WordCloud(width=800, height=400, background_color='white').generate_from_frequencies(word_freq)
    plt.figure(figsize=(10, 5))
    plt.imshow(wordcloud, interpolation='bilinear')
    plt.axis('off')
    plt.savefig('wordcloud.png')
    return 'wordcloud.png'

def translate_and_get_pos(word_freq, pos_tags):
    pos_map = {
        'NN': 'n.', 'NNS': 'n.', 'NNP': 'n.', 'NNPS': 'n.', 'VB': 'v.', 'VBD': 'v. (과거형)', 'VBG': 'v. (ing형)',
        'VBN': 'v. (과거분사형/수동태)', 'VBP': 'v.', 'VBZ': 'v.', 'JJ': 'adj.', 'JJR': 'adj.', 'JJS': 'adj.',
        'RB': 'adv.', 'RBR': 'adv.', 'RBS': 'adv.', 'IN': 'prep.', 'DT': 'det.', 'CC': 'conj.',
        'UH': 'intj.'
    }

    word_data = []
    for word, freq in word_freq.items():
        if word not in word_data_examples:
            continue

        pos_list = [pos_map.get(pos_tag[1], 'N/A') for pos_tag in pos_tags if pos_tag[0] == word and pos_tag[1] in pos_map]
        pos_list = set(pos_list) if pos_list else {'N/A'}
        if 'N/A' in pos_list or word in exclude_words:
            continue
        pos_str = ", ".join(pos_list)

        translation = f"{word_data_examples[word][2]}, {word_data_examples[word][3]}"
        example_sentence, synonyms = word_data_examples[word][:2]
        word_data.append((word, pos_str, translation, example_sentence, synonyms))

    word_data.sort(key=lambda x: word_freq[x[0]], reverse=True)

    return word_data

def main(text):
    word_freq, pos_tags = process_text(text)
    wordcloud_image = generate_wordcloud(word_freq)
    word_data = translate_and_get_pos(word_freq, pos_tags)

    df = pd.DataFrame(word_data, columns=["어휘 (Word)", "범주 (Category)", "뜻 (Meaning)", "예문 (Example)", "동의어 (Synonyms)"])
    word_data_table = df.to_html(index=False, justify='center')

    return wordcloud_image, word_data_table

css = """
<style>
body {
    background-color: lavender !important;
}
.gr-button {
    background-color: purple !important;
    border-color: purple !important;
}
table {
    width: 100%;
    border-collapse: collapse;
    text-align: center;
}
th, td {
    padding: 8px;
    border: 1px solid #ddd;
}
th {
    background-color: #f2f2f2;
}
</style>
"""

# Lesson content: Romeo and Juliet text
lesson_text = """
Many years ago, in the city of Verona, Italy, there were two families, the Montagues and the Capulets. These two families were always battling and did not like each other.

One day, Romeo Montague secretly attended a Capulet party. There, he saw Juliet Capulet and instantly fell in love. However, their love was in danger because of their families’ feud. After the party, Romeo went to Juliet’s window, and they promised to love each other forever.

Despite their love, the feud between their families grew worse. Their story is one of love, tragedy, and heartbreak.
"""

# Gradio interface
interface = gr.Interface(
    fn=main,
    inputs="text",
    outputs=["image", "html"],
    title="Romeo and Juliet Vocabulary Learning App",
    description="Analyze the text from 'Romeo and Juliet' to generate a word cloud and a vocabulary list with meanings, parts of speech, and examples.",
    examples=[[lesson_text]],
)

interface.launch()
gr.HTML(css)


# **While-Listening Activity: Learning New Words**
## 2. Gradio TTS App

In [None]:
!pip install gradio
!pip install gtts
!pip install SpeechRecognition

import gradio as gr
from gtts import gTTS
import speech_recognition as sr
from difflib import SequenceMatcher
import tempfile
import os

# Define your sentences here
sents = [
    "Many years ago, in the city of Verona, Italy, there were two families, the Montagues and the Capulets. These two families were always battling and did not like each other.",
    "One day, Romeo Montague secretly attended a Capulet party. There, he saw Juliet Capulet and instantly fell in love. However, their love was in danger because of their families’ feud.",
    "After the party, Romeo went to Juliet’s window. Juliet stood at her window, whispering, 'Oh Romeo, Romeo! Where are you Romeo? Give up your family name, or if you won't, just promise to love me, and I'll give up being a Capulet.'",
    "Below Juliet's window, Romeo stood in the shadows of a wall. He looked up at her, his heart beating fast. Romeo and Juliet promised to marry in secret and expressed their love for each other.",
    "Despite their love, the feud between their families grew worse. Their story is one of love, tragedy, and heartbreak."
]

def text_to_speech(selected_sentence, language):
    tld = 'co.uk' if language == "British English" else 'com'

    sn = int(selected_sentence.split(".")[0])  # Extract the sentence number
    mytext = sents[sn - 1]  # Get the selected sentence

    tts = gTTS(text=mytext, lang='en', tld=tld, slow=False)
    filename = 'output.mp3'
    tts.save(filename)
    return filename

def recognize_speech_from_microphone(audio_path):
    recognizer = sr.Recognizer()
    try:
        with sr.AudioFile(audio_path) as source:
            audio_data = recognizer.record(source)
            text = recognizer.recognize_google(audio_data)
            return text
    except sr.UnknownValueError:
        return "Could not understand the audio"
    except sr.RequestError as e:
        return f"Could not request results from Google Speech Recognition service; {e}"
    except Exception as e:
        return str(e)

def calculate_similarity(original_text, recognized_text):
    return SequenceMatcher(None, original_text.lower(), recognized_text.lower()).ratio() * 100

def process_audio(selected_sentence, audio_path):
    sn = int(selected_sentence.split(".")[0])  # Extract the sentence number
    original_text = sents[sn - 1]  # Get the selected sentence
    recognized_text = recognize_speech_from_microphone(audio_path)
    if "Error" in recognized_text or "Could not" in recognized_text:
        return recognized_text, 0.0
    similarity = calculate_similarity(original_text, recognized_text)
    return recognized_text, similarity

def display_sentence(selected_sentence):
    sn = int(selected_sentence.split(".")[0])
    return sents[sn - 1]

with gr.Blocks() as demo:
    with gr.Row():
        with gr.Column():
            gr.Markdown("### Text-to-Speech Converter")
            dropdown_sentences = gr.Dropdown(choices=[f"{i}. {sents[i-1]}" for i in range(1, len(sents) + 1)], label="Select Sentence")
            radio_language = gr.Radio(choices=['American English', 'British English'], label="Language")
            generate_tts_button = gr.Button("Generate Speech")
            tts_audio_output = gr.Audio(type="filepath", label="Output Audio")
            generate_tts_button.click(text_to_speech, inputs=[dropdown_sentences, radio_language], outputs=tts_audio_output)
            selected_sentence_display = gr.Textbox(label="Selected Sentence", interactive=False)
            dropdown_sentences.change(display_sentence, inputs=dropdown_sentences, outputs=selected_sentence_display)

    with gr.Row():
        with gr.Column():
            gr.Markdown("### Pronunciation Evaluator")
            mic_input = gr.Audio(label="Your Pronunciation", type="filepath")
            result_button = gr.Button("Evaluate Pronunciation")
            recognized_text = gr.Textbox(label="Recognized Text")
            similarity_score = gr.Number(label="Similarity (%)")

            result_button.click(process_audio, inputs=[dropdown_sentences, mic_input], outputs=[recognized_text, similarity_score])

demo.launch()
