<a href="https://colab.research.google.com/github/MK316/Myapps/blob/main/TCEapps/stress_intonation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 📗**Chapter 7 Stress and intonation (F23)**

**Goal:** Learn stress placement with sound - 160 words listed in Chapter 7
+ Wordlist (csv file) [Wordlist 160](https://raw.githubusercontent.com/MK316/Myapps/main/data/stress160.csv)

# **Part I: Play sound to learn stress**

Setting: install packages

In [None]:
#@markdown 📌 Run this code before you start
%%capture
!pip install gradio pandas gtts requests librosa matplotlib pydub

**Instruction:** run the code below and you'll get a live link (or you can practice with the app here.

+ ID: 1~160
+ Words: listed words

Note: You can type either the ID number or the word to play the sound

In [None]:
#@markdown 🌀 Gradio link
import gradio as gr
import pandas as pd
import requests
from gtts import gTTS
from io import BytesIO

# URL of the raw CSV file on GitHub
csv_url = 'https://raw.githubusercontent.com/MK316/Myapps/main/data/stress160.csv'

# Use requests to get the CSV file content from GitHub
response = requests.get(csv_url)
assert response.status_code == 200, 'Failed to download CSV file'

# Load the CSV content into a DataFrame
df = pd.read_csv(BytesIO(response.content))

def generate_audio_by_id(word_id):
    word = df.loc[df['ID'] == word_id, 'Words'].values[0]
    return text_to_speech(word)

def generate_audio_by_word(word):
    if word in df['Words'].values:
        audio_path = text_to_speech(word)
        return audio_path, ""  # Return the path and an empty status message
    else:
        return None, "Word not found in the list."  # Return None and a status message


import os
import tempfile

def text_to_speech(text):
    tts = gTTS(text=text, lang='en')
    with tempfile.NamedTemporaryFile(delete=False, suffix='.mp3') as f:
        tts.save(f.name)
        return f.name  # Return the path to the saved file


def search_and_generate_audio(search_by, query):
    if search_by == 'ID':
        try:
            query = int(query)  # Convert ID to integer
            audio_path = generate_audio_by_id(query)
            return audio_path, ""
        except ValueError:
            return None, "ID must be an integer."
    elif search_by == 'Words':
        audio_path, status = generate_audio_by_word(query)
        return audio_path, status
    else:
        return None, "Please select a valid search option."


# Define the Gradio interface
iface = gr.Interface(
    fn=search_and_generate_audio,
    inputs=[
        gr.Radio(['ID', 'Words'], label="Search by:"),
        gr.Textbox(label="Enter ID or Word:")
    ],
    outputs=[
        gr.Audio(label="Audio of the word"),
        gr.Textbox(label="Status")
    ],
    title="Word Audio Generator"
)

# Launch the interface
iface.launch(debug=True)


## Generate speech file to download

In [None]:
#@markdown 🌀 Generate speech from x to y (x, y = ID numbers)
import pandas as pd
from gtts import gTTS
from pydub import AudioSegment
from IPython.display import Audio
import os

url = "https://raw.githubusercontent.com/MK316/Myapps/main/data/stress160.csv"

# Function to generate speech with pauses
def generate_speech_with_pause(github_url, from_id, to_id):
    # Read the CSV file
    df = pd.read_csv(github_url)

    # Filter rows based on the ID range
    filtered_df = df[(df['ID'] >= from_id) & (df['ID'] <= to_id)]

    # Create a silent audio segment for a 2-second pause
    pause = AudioSegment.silent(duration=1000)  # 2000 milliseconds = 2 seconds

    # Initialize an empty audio segment
    combined = AudioSegment.empty()

    # Loop through each word, generate speech, and add a pause
    for word in filtered_df['Words']:
        tts = gTTS(word)
        tts.save("temp.mp3")
        word_audio = AudioSegment.from_mp3("temp.mp3")
        combined += word_audio + pause
        os.remove("temp.mp3")  # Clean up the temporary file

    # Save the final combined audio
    combined.export("output.mp3", format="mp3")

    # Play the audio
    return Audio("output.mp3")

# Example usage
github_url = url  # Replace with your GitHub URL
from_id = int(input('Start ID: '))  # User input for start ID
to_id = int(input('End ID: '))      # User input for end ID
audio = generate_speech_with_pause(github_url, from_id, to_id)
audio


# **Part II: Reading a sentence to learn basic intonation**

In [None]:
#@markdown 🌀 Sentence reading
import gradio as gr
from gtts import gTTS
import tempfile
import os

# Define the function to convert text to speech and save it to a temporary file
def text_to_speech(text):
    tts = gTTS(text=text, lang='en')
    # Create a temporary file to save the audio
    with tempfile.NamedTemporaryFile(delete=False, suffix='.mp3') as f:
        tts.save(f.name)
        # Return the path to the saved audio file
        return f.name

# Create the Gradio interface
iface = gr.Interface(
    fn=text_to_speech,
    inputs=gr.Textbox(placeholder="Type a sentence here..."),
    outputs=gr.Audio(),
    title="Text to Speech",
    description="Enter a sentence to convert it to speech."
)

# Launch the interface and create a shareable link
iface.launch(share=True)


# **Part III. Visible pitch contour (intonation)**


In [None]:
#@markdown [1] Generate speech audio file: you can type a word or sentence
from gtts import gTTS
from pydub import AudioSegment
from io import BytesIO
from IPython.display import Audio  # Import the Audio class for playing audio

# Function to generate and save a WAV file
def generate_and_save_wav(word, filename='output.wav'):
    # Generate speech using gTTS
    tts = gTTS(text=word, lang='en')

    # Save as MP3 in memory
    mp3_fp = BytesIO()
    tts.write_to_fp(mp3_fp)
    mp3_fp.seek(0)

    # Convert MP3 to WAV
    sound = AudioSegment.from_file(mp3_fp, format="mp3")
    sound.export(filename, format="wav")

    # Play the audio
    return Audio(filename)

# Example usage
mytext = input('Type a word or sentence: ')
audio = generate_and_save_wav(mytext)
audio


In [None]:
#@markdown Intonation contour (pitch)
import librosa
import librosa.display
import matplotlib.pyplot as plt
import numpy as np


# Function to extract and plot the pitch contour
def plot_pitch_contour(audio_file_path):
    # Load the audio file
    y, sr = librosa.load(audio_file_path, sr=None)

    # Define the range for expected pitch (fundamental frequency)
    fmin = librosa.note_to_hz('C2')  # Example minimum pitch
    fmax = librosa.note_to_hz('C6')  # Example maximum pitch

    # Extract the pitch contour using YIN algorithm
    pitch, voiced_flag, voiced_probs = librosa.pyin(y, fmin=fmin, fmax=fmax, sr=sr)

    # Replace NaNs with zeros (unvoiced segments)
    pitch[~np.isfinite(pitch)] = 0

    # Plot the pitch contour
    plt.figure(figsize=(14, 5))
    librosa.display.waveshow(y, sr=sr)
    times = librosa.times_like(pitch, sr=sr)

    # Plot only non-zero pitch values
    for i in range(len(pitch)):
        if pitch[i] > 0:
            plt.plot(times[i], pitch[i], 'ro')  # Red dot for each non-zero pitch

    plt.title('Pitch Contour')
    plt.xlabel('Time (s)')
    plt.ylabel('Pitch (Hz)')
    plt.ylim(0,350)
    plt.show()

# Example usage
print(f"This is one possible intonation of: {mytext}")
plot_pitch_contour('/content/output.wav')
