<a href="https://colab.research.google.com/github/MK316/Myapps/blob/main/mrkim21apps/Oxford3Kaudio.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Oxford 5K vocabulary learning with audio

+ [APP link](https://mrkim21.github.io/appfolder/oxford5k-audio.html)
+ [Oxford 5K wordlist](https://github.com/MK316/Myapps/blob/main/mrkim21apps/data/readme.md)

In [None]:
%%capture
!pip install gradio gtts
!pip install pydub
!apt-get install ffmpeg

# [1] Basic codes

+ 📌[Oxford3K.csv](https://github.com/MK316/Myapps/blob/main/mrkim21apps/data/Oxford3K.csv) file should be upload on the left panel (folder icon)

In [None]:
import gradio as gr
from gtts import gTTS
import pandas as pd
from pydub import AudioSegment
import io
import numpy as np

# Read the CSV data directly for demonstration purposes
# Here you should load your actual data

csv_file = "/content/Oxford3K.csv"

# Read the CSV file
data = pd.read_csv(csv_file)

df = pd.DataFrame(data)


def generate_speech(x, y):
    # Ensure x and y are integers
    x, y = int(x), int(y)

    # Create an empty audio segment for padding
    combined_audio = AudioSegment.silent(duration=1000)  # Starting with 1 second of silence for padding

    # Generate speech for each entry in the specified range
    for index, row in df.iloc[x-1:y].iterrows():
        sentence = f"Number {row['SID']}. {row['WORD']} is a {row['POS']}."
        tts = gTTS(text=sentence, lang='en')
        mp3_fp = io.BytesIO()
        tts.write_to_fp(mp3_fp)
        mp3_fp.seek(0)
        sentence_audio = AudioSegment.from_file(mp3_fp, format='mp3')
        # Add the sentence audio and 2 seconds of silence
        combined_audio += sentence_audio + AudioSegment.silent(duration=2000)

    # Export the combined audio to a BytesIO object and return the bytes
    mp3_io = io.BytesIO()
    combined_audio.export(mp3_io, format='mp3')
    mp3_io.seek(0)
    return mp3_io.read()

# Define the Gradio interface
iface = gr.Interface(
    fn=generate_speech,
    inputs=[
        gr.Number(label="Start Sentence Number (x)"),  # Assume the `default` parameter is removed if it causes an error
        gr.Number(label="End Sentence Number (y)")
    ],
    outputs=gr.Audio(label="Generated Speech"),
    title="Speech Generator",
    description="Generate speech from the CSV data. Specify the start and end sentence numbers."
)


# Launch the Gradio app with sharing enabled

iface.launch(share=True, debug=True)

# [2] Add POS category option to the application

In [None]:
import gradio as gr
from gtts import gTTS
from pydub import AudioSegment
import pandas as pd
import io

# Load the DataFrame
csv_file = "/content/Oxford3K.csv"  # Update this path
data = pd.read_csv(csv_file)

def generate_speech(level, x, y):
    # Filter df based on selected level
    filtered_df = data[data['LEVEL'] == level]

    # Ensure x and y are integers and within range
    x, y = int(x), int(y)
    filtered_df = filtered_df[(filtered_df['SID'] >= x) & (filtered_df['SID'] <= y)]

    combined_audio = AudioSegment.silent(duration=1000)  # Start with silence for padding

    for _, row in filtered_df.iterrows():
        sentence = f"Number {row['SID']}. {row['WORD']} is {row['POS']}."
        tts = gTTS(text=sentence, lang='en')
        mp3_fp = io.BytesIO()
        tts.write_to_fp(mp3_fp)
        mp3_fp.seek(0)
        sentence_audio = AudioSegment.from_file(mp3_fp, format="mp3")
        combined_audio += sentence_audio + AudioSegment.silent(duration=2000)

    mp3_io = io.BytesIO()
    combined_audio.export(mp3_io, format='mp3')
    mp3_io.seek(0)
    return mp3_io.getvalue()

# Interface with level selection
iface = gr.Interface(
    fn=generate_speech,
    inputs=[
        gr.Dropdown(label="Select Level", choices=['B1', 'B2']),  # Adapt choices based on your data
        gr.Number(label="Start Sentence Number (x)"),
        gr.Number(label="End Sentence Number (y)")
    ],
    outputs=gr.Audio(label="Generated Speech"),
    title="Speech Generator",
    description="Select a level and specify the start and end sentence numbers within that level."
)

iface.launch(share=True, debug=True)


# [2] Add CEFR level option

+ For this implementation, we split the data files by LEVEL.
+ 📌 Upload 6 files [Downloadable from here](https://github.com/MK316/Myapps/blob/main/mrkim21apps/data/readme.md)

In [None]:
import gradio as gr
from gtts import gTTS
from pydub import AudioSegment
import pandas as pd
import io

# Files are separated by CEFR levels

csv_files = {"A1":"OF3KA1.csv","A2":"OF3KA2.csv","B1":"OF3KB1.csv","B2":"OF3KB2.csv","C1":"OF3KC1.csv","5K":"OF5K.csv"}

# Function to load the DataFrame based on level selection
def load_data(level):
    # Use the csv_files dictionary to get the correct file name for the given level
    csv_file_path = f"/content/{csv_files[level]}"  # This correctly uses the dictionary
    data = pd.read_csv(csv_file_path)
    return data

def generate_speech(level, x, y):
    data = load_data(level)  # Load data for the selected level

    # Ensure x and y are integers and within range
    x, y = int(x), int(y)
    filtered_df = data[(data['SID'] >= x) & (data['SID'] <= y)]

    combined_audio = AudioSegment.silent(duration=1000)  # Start with silence for padding

    for _, row in filtered_df.iterrows():
        sentence = f"Number {row['SID']}. {row['WORD']}! {row['WORD']} is {row['POS']}."
        tts = gTTS(text=sentence, lang='en')
        mp3_fp = io.BytesIO()
        tts.write_to_fp(mp3_fp)
        mp3_fp.seek(0)
        sentence_audio = AudioSegment.from_file(mp3_fp, format="mp3")
        combined_audio += sentence_audio + AudioSegment.silent(duration=1500)

    mp3_io = io.BytesIO()
    combined_audio.export(mp3_io, format='mp3')
    mp3_io.seek(0)
    return mp3_io.getvalue()

# Interface with updated level selection including all specified options
iface = gr.Interface(
    fn=generate_speech,
    inputs=[
        gr.Dropdown(label="Select Level (3K: A1, A2, B1, B2, C1; 5K: additional B2 and C1)", choices=['A1', 'A2', 'B1', 'B2', 'C1', '5K']),  # Updated choices
        gr.Number(label="Start Number (x)"),
        gr.Number(label="End Number (y)")
    ],
    outputs=gr.Audio(label="Generated Speech"),
    title="Oxford Learner Vocabulary by CEFR levels: Learn with Sound",
    description="Choose a level and define the starting and ending numbers for that level. The system will create a single audio file formatted as 'Number 1. Word is a noun.' After submission, you have the option to download the audio file. Additionally, you can download the numbered lists for each level from the 'My Apps' section at https://mrkim21.github.io (web address)"
)

iface.launch(share=True, debug=True)


# [3] Add option for speech text (with or without numbering)

In [None]:
import gradio as gr
from gtts import gTTS
from pydub import AudioSegment
import pandas as pd
import io

csv_files = {"A1": "OF3KA1.csv", "A2": "OF3KA2.csv", "B1": "OF3KB1.csv", "B2": "OF3KB2.csv", "C1": "OF3KC1.csv", "5K": "OF5K.csv"}

def load_data(level):
    csv_file_path = f"/content/{csv_files[level]}"
    data = pd.read_csv(csv_file_path)
    return data

def generate_speech(level, x, y, audio_option):
    data = load_data(level)

    x, y = int(x), int(y)
    filtered_df = data[(data['SID'] >= x) & (data['SID'] <= y)]

    combined_audio = AudioSegment.silent(duration=1000)

    for _, row in filtered_df.iterrows():
        if audio_option == "Audio with number":
            sentence = f"Number {row['SID']}. {row['SID']} is {row['POS']}."
        else:  # "Audio without number"
            sentence = f"{row['WORD']}!"

        tts = gTTS(text=sentence, lang='en')
        mp3_fp = io.BytesIO()
        tts.write_to_fp(mp3_fp)
        mp3_fp.seek(0)
        sentence_audio = AudioSegment.from_file(mp3_fp, format="mp3")
        combined_audio += sentence_audio + AudioSegment.silent(duration=1500)

    mp3_io = io.BytesIO()
    combined_audio.export(mp3_io, format='mp3')
    mp3_io.seek(0)
    return mp3_io.getvalue()

iface = gr.Interface(
    fn=generate_speech,
    inputs=[
        gr.Dropdown(label="Select Level (3K: A1, A2, B1, B2, C1; 5K: additional B2 and C1)", choices=['A1', 'A2', 'B1', 'B2', 'C1', '5K']),
        gr.Number(label="Start Number (x)"),
        gr.Number(label="End Number (y)"),
        gr.Radio(label="Audio Option", choices=["Audio with number", "Audio without number"])
    ],
    outputs=gr.Audio(label="Generated Speech"),
    title="Oxford Learner Vocabulary by CEFR levels: Learn with Sound",
    description="Choose a level, define the starting and ending numbers, and select the audio option. The system will create a single audio file. After submission, you have the option to download the audio file."
)

iface.launch(share=True, debug=True)


# [4] Add POS options

The user can select POS (Parts of Speech) for words in the given range (Start, End)

In [None]:
import gradio as gr
from gtts import gTTS
from pydub import AudioSegment
import pandas as pd
import io

csv_files = {"A1": "OF3KA1.csv", "A2": "OF3KA2.csv", "B1": "OF3KB1.csv", "B2": "OF3KB2.csv", "C1": "OF3KC1.csv", "5K": "OF5K.csv"}

def load_data(level):
    csv_file_path = f"/content/{csv_files[level]}"
    data = pd.read_csv(csv_file_path)
    return data

def generate_speech(level, x, y, audio_option, pos_filter):
    data = load_data(level)

    x, y = int(x), int(y)
    if pos_filter != "Any":
        filtered_df = data[(data['SID'] >= x) & (data['SID'] <= y) & (data['POS'].str.lower() == pos_filter.lower())]
    else:
        filtered_df = data[(data['SID'] >= x) & (data['SID'] <= y)]

    combined_audio = AudioSegment.silent(duration=1000)

    for _, row in filtered_df.iterrows():
        if audio_option == "Audio with number":
            # Correcting the sentence structure to include both SID and the word's details properly
            sentence = f"Number {row['SID']}. {row['WORD']}! {row['WORD']} is a {row['POS']}."
        else:  # "Audio without number"
            sentence = f"{row['WORD']}!"

        tts = gTTS(text=sentence, lang='en')
        mp3_fp = io.BytesIO()
        tts.write_to_fp(mp3_fp)
        mp3_fp.seek(0)
        sentence_audio = AudioSegment.from_file(mp3_fp, format="mp3")
        combined_audio += sentence_audio + AudioSegment.silent(duration=1500)

    mp3_io = io.BytesIO()
    combined_audio.export(mp3_io, format='mp3')
    mp3_io.seek(0)
    return mp3_io.getvalue()


iface = gr.Interface(
    fn=generate_speech,
    inputs=[
        gr.Dropdown(label="Select Level", choices=['A1', 'A2', 'B1', 'B2', 'C1', '5K']),
        gr.Number(label="Range: Start Number (x)"),
        gr.Number(label="Range: End Number (y)"),
        gr.Radio(label="Audio Option", choices=["Audio with number", "Audio without number"]),
        gr.Dropdown(label="Select Part of Speech", choices=["Any", "Noun", "Verb", "Adjective", "Adverb"])  # Corrected line
    ],
    outputs=gr.Audio(label="Generated Speech"),
    title="Oxford Learner Vocabulary by CEFR levels: Learn with Sound",
    description="Choose a level, define the starting and ending numbers, select the audio option, and filter by Part of Speech if desired. The system will create a single audio file. After submission, you have the option to download the audio file."
)

iface.launch(share=True, debug=True)


# 🌀 [5] Revise the POS options (ALL, A1, A2, ...): Final version

+ If there is no word matching the selected POS, the speech should say something like, "There is no {POS} matching to your selection in the given range"

In [None]:
import gradio as gr
from gtts import gTTS
from pydub import AudioSegment
import pandas as pd
import io

csv_files = {"A1": "OF3KA1.csv", "A2": "OF3KA2.csv", "B1": "OF3KB1.csv", "B2": "OF3KB2.csv", "C1": "OF3KC1.csv", "5K": "OF5K.csv"}

def load_data(level):
    csv_file_path = f"/content/{csv_files[level]}"
    data = pd.read_csv(csv_file_path)
    return data

def generate_speech(level, x, y, audio_option, pos_filter):
    data = load_data(level)

    x, y = int(x), int(y)
    if pos_filter != "ALL":  # If a specific POS is selected, filter by it
        filtered_df = data[(data['SID'] >= x) & (data['SID'] <= y) & (data['POS'].str.lower() == pos_filter.lower())]
    else:  # Include all words if "ALL" is selected
        filtered_df = data[(data['SID'] >= x) & (data['SID'] <= y)]

    if len(filtered_df) == 0:  # Check if the filtered DataFrame is empty
        sentence = f"There is no {pos_filter} in the selected range."
        tts = gTTS(text=sentence, lang='en')
        mp3_fp = io.BytesIO()
        tts.write_to_fp(mp3_fp)
        mp3_fp.seek(0)
        sentence_audio = AudioSegment.from_file(mp3_fp, format="mp3")
        combined_audio = sentence_audio
    else:
        combined_audio = AudioSegment.silent(duration=1000)
        for _, row in filtered_df.iterrows():
            if audio_option == "Audio with number":
                sentence = f"Number {row['SID']}. {row['WORD']} is a {row['POS']}."
            else:  # "Audio without number"
                sentence = f"{row['WORD']}!"

            tts = gTTS(text=sentence, lang='en')
            mp3_fp = io.BytesIO()
            tts.write_to_fp(mp3_fp)
            mp3_fp.seek(0)
            sentence_audio = AudioSegment.from_file(mp3_fp, format="mp3")
            combined_audio += sentence_audio + AudioSegment.silent(duration=1500)

    mp3_io = io.BytesIO()
    combined_audio.export(mp3_io, format='mp3')
    mp3_io.seek(0)
    return mp3_io.getvalue()


iface = gr.Interface(
    fn=generate_speech,
    inputs=[
        gr.Dropdown(label="Select Level", choices=['A1', 'A2', 'B1', 'B2', 'C1', '5K']),
        gr.Number(label="Start Number (x)"),
        gr.Number(label="End Number (y)"),
        gr.Radio(label="Audio Option", choices=["Audio with number", "Audio without number"]),
        gr.Dropdown(label="Select Part of Speech", choices=["ALL", "Noun", "Verb", "Adjective", "Adverb", "Preposition"])  # Updated POS filter input
    ],
    outputs=gr.Audio(label="Generated Speech"),
    title="Oxford Learner Vocabulary by CEFR levels: Learn with Sound",
    description="Choose a level, define the starting and ending numbers, select the audio option, and filter by Part of Speech if desired. The system will create a single audio file. After submission, you have the option to download the audio file."
)

iface.launch(share=True, debug=True)
