<a href="https://colab.research.google.com/github/MK316/SpeechProcessing/blob/main/SR_test.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Speech Rate

In [None]:
!pip install SpeechRecognition nltk mutagen
!apt install -y ffmpeg

## Pre-setting: Run the following code if you have your audio file in the mp3 format

[1] File to upload (for converting mp3 to wav)

In [None]:
from google.colab import files

uploaded = files.upload()
mp3_filename = list(uploaded.keys())[0]

[2] Converting mp3 to wav

In [None]:
import os

# Define the WAV filename
wav_filename = os.path.splitext(mp3_filename)[0] + ".wav"

# Convert MP3 to WAV
!ffmpeg -i $mp3_filename $wav_filename -loglevel panic

[3] Audio play to check

In [None]:
from IPython.display import Audio, display

def play_audio(filename):
    display(Audio(filename))

# Call the function
play_audio('2301_mono.wav')  # You can replace 'wav_filename' with your audio file's name


# 🔎 **Part I: Speech rate calculation (syll/sec) with audio information**

The following codes will collect the information below:

+ Filename
+ Text (from speech recognition)
+ Duration (audio duration)
+ Number of words
+ Number of syllables
+ Averaged number of syllables per word
+ WPM (Word per minute): number of words per minute
+ SR (Speech rate): number of syllables per second

**Note: Run [1a] or [1b] to read audio files:**

[1a] Audio zip file from Google Drive  
[1b] Audio zip file from your computer  

**Then, move to [2] to get the audio information**  

[2] Getting SR and record the information

## [1a] Audio files (one zip file) from your Google Drive

In [None]:
import os
import shutil
from google.colab import drive

In [None]:
# 1. Mount Google Drive
drive.mount('/content/drive')

# 2. Specify the path to the zip file in Google Drive (replace with your actual path)
zip_path = "/content/drive/MyDrive/SNSR/test.zip"

# 3. Create a temporary directory for unzipping
unzip_dir = "/content/temp_unzip_folder/"
os.makedirs(unzip_dir, exist_ok=True)

# Unzip the file
!unzip -q $zip_path -d $unzip_dir

# 4. Copy the extracted files to "myaudio" folder
myaudio_path = "/content/myaudio/"
os.makedirs(myaudio_path, exist_ok=True)

for item in os.listdir(unzip_dir):
    shutil.move(os.path.join(unzip_dir, item), os.path.join(myaudio_path, item))

# Clean up by removing the temporary directory
shutil.rmtree(unzip_dir)


## [1b] Audio files (one zip file) from your computer

In [None]:
import os
import zipfile
from google.colab import files
import shutil

# 1. Upload a zip file
uploaded = files.upload()
zip_filename = list(uploaded.keys())[0]

# 2. Create a folder named 'myaudio'
if not os.path.exists('myaudio'):
    os.makedirs('myaudio')

# 3. Unzip the uploaded files
with zipfile.ZipFile(zip_filename, 'r') as zip_ref:
    zip_ref.extractall('temp_unzip_folder')

# 4. Move the unzipped files to the 'myaudio' folder
for item in os.listdir('temp_unzip_folder'):
    shutil.move(os.path.join('temp_unzip_folder', item), 'myaudio')

# Cleanup: remove the temporary unzip folder
shutil.rmtree('temp_unzip_folder')

# 5. Remove the zip file
os.remove(zip_filename)


# [2] Getting SR and record the information in a dataframe

In [None]:
import os
import pandas as pd
import speech_recognition as sr
from nltk.corpus import cmudict
import nltk
import mutagen

nltk.download('punkt')
nltk.download('cmudict')

# Initialize CMU dictionary
d = cmudict.dict()

# Functions for syllable and text metrics
def nsyl(word):
    if word.lower() in d:
        return max([len([y for y in x if y[-1].isdigit()]) for x in d[word.lower()]])
    else:
        return 0

def text_metrics(text):
    words = nltk.word_tokenize(text)
    total_words = len(words)
    total_syllables = sum(nsyl(word) for word in words)
    avg_syllables = total_syllables / total_words if total_words != 0 else 0
    return total_words, total_syllables, avg_syllables

# Initialize speech recognizer
r = sr.Recognizer()

# List .wav files from myaudio folder
wav_files = [f for f in os.listdir('myaudio') if f.endswith('.wav')]

results = []

# Process each .wav file
for wav_file in wav_files:
    # 2. Transcribe audio
    with sr.AudioFile(os.path.join('myaudio', wav_file)) as source:
        audio_data = r.record(source)
        try:
            text = r.recognize_google(audio_data)
        except sr.UnknownValueError:
            print(f"Could not understand audio {wav_file}")
            text = ""
        except sr.RequestError:
            print(f"API unavailable for {wav_file}")
            text = ""

    # 3. Calculate metrics
    total_words, total_syllables, avg_syllables = text_metrics(text)

    audio = mutagen.File(os.path.join('myaudio', wav_file))
    duration = audio.info.length
    speech_rate = total_syllables / duration if duration != 0 else 0
    speech_rate_wpm = (total_words / duration) * 60 if duration != 0 else 0

    # Append results
    results.append({
        'Filename': wav_file,
        'Text': text,
        'Duration': duration,
        'Nwords': total_words,
        'Nsyll': total_syllables,
        'Avg_syll': avg_syllables,
        'WPM': speech_rate_wpm,
        'SR': speech_rate

    })

# 4. Create DataFrame
df = pd.DataFrame(results)
df


# 🔎 **Part II**