In [None]:
!pip install pydub
!pip install SpeechRecognition

Collecting SpeechRecognition
  Downloading speechrecognition-3.14.2-py3-none-any.whl.metadata (30 kB)
Downloading speechrecognition-3.14.2-py3-none-any.whl (32.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m32.9/32.9 MB[0m [31m36.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: SpeechRecognition
Successfully installed SpeechRecognition-3.14.2


In [None]:
import os
import shutil
import tempfile
import pydub
from scipy.io import wavfile

# Set paths based on the user's dataset location
MP3_AUDIO_DIR = "/content/drive/MyDrive/metrics_dataset/dataset-stammer/dataset-stammer/"
WAV_AUDIO_DIR = "/content/drive/MyDrive/metrics_dataset/dataset-stammer/dataset-stammer_wav/"

# Create WAV directory if it doesn't exist
os.makedirs(WAV_AUDIO_DIR, exist_ok=True)

def read_mp3(file_path):
    path, ext = os.path.splitext(file_path)
    assert ext == '.mp3', "File is not an MP3!"

    mp3 = pydub.AudioSegment.from_mp3(file_path)
    temp_wav_path = tempfile.mkstemp(suffix=".wav")[1]
    mp3.export(temp_wav_path, format="wav")
    return temp_wav_path

# Convert MP3 files to WAV if WAV folder is empty
if len(os.listdir(WAV_AUDIO_DIR)) == 0:
    for file in os.listdir(MP3_AUDIO_DIR):
        if file.endswith(".mp3"):
            wav_path = read_mp3(os.path.join(MP3_AUDIO_DIR, file))
            shutil.move(wav_path, os.path.join(WAV_AUDIO_DIR, file.replace(".mp3", ".wav")))
    print("MP3 to WAV conversion completed!")
else:
    print("WAV files already exist, skipping conversion.")

MP3 to WAV conversion completed!


In [None]:
import os
import speech_recognition as sr

# Define paths
WAV_AUDIO_DIR = "/content/drive/MyDrive/metrics_dataset/dataset-stammer/dataset-stammer_wav/"
TRANSCRIPT_DIR = "/content/drive/MyDrive/metrics_dataset/transcriptions/"

# Ensure the transcription directory exists
os.makedirs(TRANSCRIPT_DIR, exist_ok=True)

def transcribe_audio(file_path):
    recognizer = sr.Recognizer()
    with sr.AudioFile(file_path) as source:
        audio_data = recognizer.record(source)

    try:
        text = recognizer.recognize_google(audio_data)
        return text
    except sr.UnknownValueError:
        return "[Unrecognized Speech]"
    except sr.RequestError:
        return "[API Error]"

# Process all WAV files
for file in os.listdir(WAV_AUDIO_DIR):
    if file.endswith(".wav"):
        wav_path = os.path.join(WAV_AUDIO_DIR, file)
        transcript = transcribe_audio(wav_path)

        # Save transcription
        transcript_file = os.path.join(TRANSCRIPT_DIR, file.replace(".wav", ".txt"))
        with open(transcript_file, "w") as f:
            f.write(transcript)
        print(f"Transcribed: {file}")

print("Transcription process completed!")


Transcribed: M_0065_18y1m_1.wav
Transcribed: M_0394_10y2m_1.wav
Transcribed: M_0991_07y6m_1.wav
Transcribed: F_0101_10y4m_1.wav
Transcribed: M_0815_10y11m_1.wav
Transcribed: M_0104_12y9m_1.wav
Transcribed: M_0078_16y5m_1.wav
Transcribed: M_0090_10y1m_1.wav
Transcribed: M_1105_21y0m_1.wav
Transcribed: M_0061_14y8m_1.wav
Transcribed: M_0096_10y7m_1.wav
Transcribed: M_0065_14y5m_1.wav
Transcribed: M_0394_08y10m_1.wav
Transcribed: M_0121_11y1m_1.wav
Transcribed: M_1098_25y0m_1.wav
Transcribed: M_0017_09y0m_1.wav
Transcribed: M_0052_16y4m_1.wav
Transcribed: M_0814_14y5m_1.wav
Transcribed: M_0017_09y4m_1.wav
Transcribed: M_0081_09y3m_1.wav
Transcribed: M_0052_13y1m_1.wav
Transcribed: F_0050_10y9m_1.wav
Transcribed: F_0811_10y6m_1.wav
Transcribed: M_0107_08y5m_1.wav
Transcribed: M_0030_17y9m_1.wav
Transcribed: F_0811_10y4m_1.wav
Transcribed: M_0556_07y8m_1.wav
Transcribed: M_0132_12y11m_1.wav
Transcribed: M_0553_10y0m_1.wav
Transcribed: M_0553_11y0m_1.wav
Transcribed: M_0815_10y9m_1.wav
Trans

In [None]:
import os
import pandas as pd

# Define paths
TRANSCRIPT_DIR = "/content/drive/MyDrive/metrics_dataset/transcriptions/"
PROCESSED_DATA_PATH = "/content/drive/MyDrive/metrics_dataset/processed_data.csv"

# Collect transcription data
data = []
for file in os.listdir(TRANSCRIPT_DIR):
    if file.endswith(".txt"):
        file_path = os.path.join(TRANSCRIPT_DIR, file)
        with open(file_path, "r") as f:
            transcript = f.read().strip()
            data.append({"filename": file, "transcription": transcript})

# Create DataFrame
df = pd.DataFrame(data)

# Save processed data
df.to_csv(PROCESSED_DATA_PATH, index=False)
print(f"Processed data saved to {PROCESSED_DATA_PATH}")


Processed data saved to /content/drive/MyDrive/metrics_dataset/processed_data.csv


In [None]:
import re

# Define paths
PROCESSED_DATA_PATH = "/content/drive/MyDrive/metrics_dataset/processed_data.csv"
STUTTER_ANALYSIS_PATH = "/content/drive/MyDrive/metrics_dataset/stutter_analysis.csv"

# Load processed data
df = pd.read_csv(PROCESSED_DATA_PATH)

# Define stuttering patterns (e.g., repeated words, fillers)
def detect_stutter(text):
    repeated_words = re.findall(r'\b(\w+) \1\b', text)
    fillers = re.findall(r'\b(um|uh|like|you know)\b', text, re.IGNORECASE)

    return len(repeated_words) + len(fillers)

# Apply stuttering detection
df["stutter_count"] = df["transcription"].apply(detect_stutter)

# Save results
df.to_csv(STUTTER_ANALYSIS_PATH, index=False)
print(f"Stutter analysis saved to {STUTTER_ANALYSIS_PATH}")


Stutter analysis saved to /content/drive/MyDrive/metrics_dataset/stutter_analysis.csv


In [None]:
from textblob import TextBlob

# Define paths
STUTTER_ANALYSIS_PATH = "/content/drive/MyDrive/metrics_dataset/stutter_analysis.csv"
SPEECH_ANALYSIS_PATH = "/content/drive/MyDrive/metrics_dataset/speech_analysis.csv"

# Load stutter analysis data
df = pd.read_csv(STUTTER_ANALYSIS_PATH)

# Perform sentiment analysis on transcriptions
def analyze_sentiment(text):
    blob = TextBlob(text)
    return blob.sentiment.polarity

df["sentiment_score"] = df["transcription"].apply(analyze_sentiment)

# Save results
df.to_csv(SPEECH_ANALYSIS_PATH, index=False)
print(f"Speech analysis saved to {SPEECH_ANALYSIS_PATH}")

Speech analysis saved to /content/drive/MyDrive/metrics_dataset/speech_analysis.csv


In [None]:
import tensorflow as tf
import tensorflow_hub as hub
import librosa
import numpy as np
import pandas as pd
import os

# Define paths
WAV_AUDIO_DIR = "/content/drive/MyDrive/metrics_dataset/dataset-stammer/dataset-stammer_wav/"
EMOTION_ANALYSIS_PATH = "/content/drive/MyDrive/metrics_dataset/emotion_analysis.csv"

# Load Google's YAMNet model from TensorFlow Hub
yamnet_model_handle = "https://tfhub.dev/google/yamnet/1"
yamnet = hub.load(yamnet_model_handle)

# Emotion labels (mapping from AudioSet class labels)
emotion_labels = ["neutral", "calm", "happy", "sad", "angry", "fearful", "disgust", "surprised"]

# Function to extract features and classify emotion
def predict_emotion(audio_path):
    try:
        # Load and process audio
        waveform, sr = librosa.load(audio_path, sr=16000)
        waveform = waveform[:sr * 10]  # Limit to 10 seconds

        if waveform.shape[0] == 0:
            return "unknown"  # If no valid audio, return "unknown"

        # Ensure waveform is 1D (YAMNet expects shape (None,))
        waveform = np.squeeze(waveform)

        # Run YAMNet model
        scores, embeddings, spectrogram = yamnet(waveform)

        # Simple classification: Take the top prediction
        predicted_class = int(np.argmax(scores.numpy()))  # Ensure it's an integer
        return emotion_labels[predicted_class % len(emotion_labels)]

    except Exception as e:
        print(f"Error processing {audio_path}: {e}")
        return "error"

# Process all WAV files
data = []
for file in os.listdir(WAV_AUDIO_DIR):
    if file.endswith(".wav"):
        wav_path = os.path.join(WAV_AUDIO_DIR, file)
        emotion = predict_emotion(wav_path)
        data.append({"filename": file, "emotion": emotion})

# Save results
df = pd.DataFrame(data)
df.to_csv(EMOTION_ANALYSIS_PATH, index=False)
print(f"Emotion analysis saved to {EMOTION_ANALYSIS_PATH}")


Emotion analysis saved to /content/drive/MyDrive/metrics_dataset/emotion_analysis.csv
