In [5]:
pip install librosa pandas torchaudio scikit-learn matplotlib


Note: you may need to restart the kernel to use updated packages.


In [6]:
import librosa
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report

In [1]:
# Convert the Audio to transcription 
!pip install openai-whisper
!pip install ffmpeg-python




In [2]:
pip install pydub


Note: you may need to restart the kernel to use updated packages.


In [None]:
from pydub import AudioSegment
import os
import subprocess

input_folder = "../Audios"
output_folder = "../WAVs"
os.makedirs(output_folder, exist_ok=True)

pydub_formats = {
    ".flac": "flac",
    ".mp3": "mp3",
    ".m4a": "m4a",
    ".wav": "wav" 
}

for file in os.listdir(input_folder):
    file_path = os.path.join(input_folder, file)
    base_name, ext = os.path.splitext(file)
    ext = ext.lower()
    wav_path = os.path.join(output_folder, base_name + ".wav")

    if ext in pydub_formats:
        try:
            sound = AudioSegment.from_file(file_path, format=pydub_formats[ext])
            sound.export(wav_path, format="wav")
            print(f"Converted {ext.upper()} -> WAV: {file} -> {wav_path}")
        except Exception as e:
            print(f"Failed to convert {file} with pydub: {e}")

    elif ext == ".sph":
        try:
            subprocess.run(["sox", file_path, "-t", "wav", wav_path], check=True)
            print(f"Converted SPH -> WAV: {file} -> {wav_path}")
        except subprocess.CalledProcessError as e:
            print(f"Error converting {file} with SoX: {e}")
    else:
        print(f"Unsupported format: {file}")

print("All audio files converted to WAV.")


Converted .MP3 -> WAV: short-stories-0193-194-modern-engineering-wonders.mp3 -> ../WAVs/short-stories-0193-194-modern-engineering-wonders.wav
Converted .MP3 -> WAV: short-stories-0227-229-newspapers.mp3 -> ../WAVs/short-stories-0227-229-newspapers.wav
Converted .M4A -> WAV: yasintha_audio.m4a -> ../WAVs/yasintha_audio.wav
Converted .MP3 -> WAV: short-stories-0156-157-radio-stations.mp3 -> ../WAVs/short-stories-0156-157-radio-stations.wav
Converted .MP3 -> WAV: short-stories-0238-240-the-grand-canyon.mp3 -> ../WAVs/short-stories-0238-240-the-grand-canyon.wav
Converted .MP3 -> WAV: ielts-listening-0306-ielts-recent-actual-test-29-part-2.mp3 -> ../WAVs/ielts-listening-0306-ielts-recent-actual-test-29-part-2.wav
Converted .MP3 -> WAV: toeic-0298-short-talk-148.mp3 -> ../WAVs/toeic-0298-short-talk-148.wav
Converted .MP3 -> WAV: short-stories-0134-135-first-date.mp3 -> ../WAVs/short-stories-0134-135-first-date.wav
Converted .MP3 -> WAV: toeic-0500-short-talk-250.mp3 -> ../WAVs/toeic-0500-sho

KeyboardInterrupt: 

In [3]:
pip install openai-whisper

Note: you may need to restart the kernel to use updated packages.


In [4]:
# Transcribe Using Whisper
import whisper
import os
import pandas as pd

model = whisper.load_model("medium")

folder = "../WAVs" 
transcriptions = []

for file in os.listdir(folder):
    if file.endswith(".wav"):
        path = os.path.join(folder, file)
        try:
            print(f"Transcribing: {file}...")
            result = model.transcribe(path)
            text = result["text"]
            transcriptions.append({
                "filename": file,
                "transcription": text.strip()
            })
        except Exception as e:
            print(f"Error transcribing {file}: {e}")

df = pd.DataFrame(transcriptions)
df.to_csv("data.csv", index=False)
print("Transcriptions saved to data.csv")

Transcribing: ielts-listening-0092-cam14-test-3-part-4.wav...




Transcribing: short-stories-0207-209-sport-canada.wav...




Transcribing: short-stories-0264-267-etiquette.wav...




KeyboardInterrupt: 

In [13]:
import librosa
import os
import pandas as pd

# Load the transcription data
df = pd.read_csv("data.csv")
folder = "../WAVs" 


durations = []
wpms = []
labels = []
valid_indices = [] 

for i, row in df.iterrows():
    file_path = os.path.join(folder, row["filename"])
    try:
        print(f"Processing: {row['filename']}...")
        y, sr = librosa.load(file_path, sr=16000)
        duration_sec = librosa.get_duration(y=y, sr=sr)

        # Calculate WPM
        word_count = len(row["transcription"].split())
        wpm = word_count / (duration_sec / 60)

       
        if wpm < 100:
            label = "Slow"
        elif 100 <= wpm <= 150:
            label = "Ideal"
        else:
            label = "Fast"

      
        durations.append(duration_sec)
        wpms.append(wpm)
        labels.append(label)
        valid_indices.append(i)

    except Exception as e:
        print(f"Error processing {file_path}: {e}")
        continue 


df = df.loc[valid_indices].reset_index(drop=True)
df["duration_sec"] = durations
df["wpm"] = wpms
df["label"] = labels


df.to_csv("labeled_data.csv", index=False)
print("✅ Saved labeled dataset to labeled_data.csv")


Processing: ielts-listening-0092-cam14-test-3-part-4.wav...
Processing: short-stories-0207-209-sport-canada.wav...
Processing: short-stories-0264-267-etiquette.wav...
Processing: short-stories-0212-214-nike.wav...
Processing: toeic-0253-short-talk-123.wav...
Processing: short-stories-0250-253-julie-andrews.wav...
Processing: short-stories-0159-160-a-small-town.wav...
Processing: ielts-listening-0220-cam6-test-3-part-4.wav...
Processing: toeic-0559-short-talk-279.wav...
Processing: toeic-0373-short-talk-183.wav...
Processing: short-stories-0185-186-the-earth-revolves-around-the-sun.wav...
Processing: toeic-0118-short-talk-58.wav...
Processing: toeic-0231-short-talk-111.wav...
Processing: toeic-0280-short-talk-140.wav...
Processing: short-stories-0085-85-a-baby.wav...
Processing: ielts-listening-0024-cam18-test-2-part-4.wav...
Processing: ielts-listening-0144-cam11-test-4-part-4.wav...
Processing: toeic-0497-short-talk-247.wav...
Processing: short-stories-0047-47-bugs.wav...
Processing: 