In [7]:
##UASpeech

import os
import numpy as np
import librosa
import pandas as pd
from pathlib import Path
from tqdm import tqdm

# ===================== CONFIGURATION =====================

INPUT_FOLDER = '/home/the_fat_cat/Documents/GitHub/dysarthria-classification/data/preprocessed/UASpeech_preprocessed/'  
MFCC_OUTPUT_FOLDER = '/home/the_fat_cat/Documents/GitHub/dysarthria-classification/data/features/MFCCs/MFCC_UAS/' 

# Create  output directory if it doesn't exist
os.makedirs(MFCC_OUTPUT_FOLDER, exist_ok=True)

# Define the full path of the master CSV file that will store the extracted features
master_csv_file = os.path.join(MFCC_OUTPUT_FOLDER, "1D_MFCC_features.csv")

# ===================== MFCC FEATURE EXTRACTION FUNCTION =====================
def extract_mfcc_features(audio, sr=16000, n_mfcc=13):
    """
    Extract MFCCs from a 1D audio signal.
    
    Returns a 1D vector, computed as follows:
      - Compute the MFCC matrix (shape: n_mfcc x number_of_frames)
      - Compute the mean and standard deviation of each MFCC coefficient over time.
      
    The output is a vector of length 2*n_mfcc (first 13 values are means, the next 13 are std deviations).
    """
    if len(audio) == 0:
        # Return a placeholder of zeros if the audio is empty.
        return np.zeros(n_mfcc * 2)
    
    mfcc = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=n_mfcc)
    mfcc_means = np.mean(mfcc, axis=1)  # Mean for each MFCC coefficient (length: n_mfcc)
    mfcc_stds  = np.std(mfcc, axis=1)   # Standard deviation for each coefficient (length: n_mfcc)
    features = np.concatenate([mfcc_means, mfcc_stds], axis=0)  # Total length: 2*n_mfcc
    return features

# ===================== DATA PROCESSING =====================
# This will store each row (one per audio file) with both metadata and MFCC features.
rows = []

# Retrieve all .wav files  from the input folder.
audio_files = sorted(Path(INPUT_FOLDER).rglob("*.wav"))

# Iterate over files using a progress bar.
for file_path in tqdm(audio_files, desc="Extracting 1D MFCC features"):
    # Remove extension and split filename based on '_' to extract metadata.
    filename = file_path.stem  # e.g., NDDS_m_01_001_a
    parts = filename.split('_')
    
    # Check the expected filename format: 5 parts
    if len(parts) != 5:
        print(f"Skipping {file_path.name}: unexpected filename format.")
        continue
    
    database, gender, speaker, recording, label_code = parts
    
    # Map the label code to the classification label.
    if label_code == 'a':
        label = 'afflicted'
    elif label_code == 'c':
        label = 'control'
    else:
        print(f"Skipping {file_path.name}: unknown label code '{label_code}'.")
        continue
    
    # Load the audio file (using its native sampling rate)
    y, sr = librosa.load(file_path, sr=None)
    
    # Extract the 1D MFCC features (length 26: 13 means and 13 standard deviations)
    features_1d = extract_mfcc_features(y, sr=sr, n_mfcc=13)
    
    # Create the metadata-feature row.
    row = {
        "filename": file_path.name,
        "database": database,
        "gender": gender,
        "speaker_id": speaker,
        "recording_id": recording,
        "label": label
    }
    
    # Append the MFCC features to the row.
    for i in range(13):
        row[f"mfcc_mean_{i+1}"] = features_1d[i]
    for i in range(13):
        row[f"mfcc_std_{i+1}"] = features_1d[i+13]
    
    rows.append(row)

# ===================== SAVE THE MASTER CSV =====================
# Create a DataFrame and save it as a CSV.
df = pd.DataFrame(rows)
df.to_csv(master_csv_file, index=False)
print(f"Master CSV with 1D MFCC features saved to: {master_csv_file}")


Extracting 1D MFCC features: 100%|██████████| 143565/143565 [5:27:41<00:00,  7.30it/s]   


Master CSV with 1D MFCC features saved to: /home/the_fat_cat/Documents/GitHub/dysarthria-classification/data/features/MFCCs/MFCC_UAS/1D_MFCC_features.csv


In [7]:
##NDDS

import os
import numpy as np
import librosa
import pandas as pd
from pathlib import Path
from tqdm import tqdm

# ===================== CONFIGURATION =====================

INPUT_FOLDER = '/home/the_fat_cat/Documents/GitHub/dysarthria-classification/data/preprocessed/UASpeech_preprocessed/'  
MFCC_OUTPUT_FOLDER = '/home/the_fat_cat/Documents/GitHub/dysarthria-classification/data/features/MFCCs/MFCC_UAS/' 

# Create  output directory if it doesn't exist
os.makedirs(MFCC_OUTPUT_FOLDER, exist_ok=True)

# Define the full path of the master CSV file that will store the extracted features
master_csv_file = os.path.join(MFCC_OUTPUT_FOLDER, "1D_MFCC_features.csv")

# ===================== MFCC FEATURE EXTRACTION FUNCTION =====================
def extract_mfcc_features(audio, sr=16000, n_mfcc=13):
    """
    Extract MFCCs from a 1D audio signal.
    
    Returns a 1D vector, computed as follows:
      - Compute the MFCC matrix (shape: n_mfcc x number_of_frames)
      - Compute the mean and standard deviation of each MFCC coefficient over time.
      
    The output is a vector of length 2*n_mfcc (first 13 values are means, the next 13 are std deviations).
    """
    if len(audio) == 0:
        # Return a placeholder of zeros if the audio is empty.
        return np.zeros(n_mfcc * 2)
    
    mfcc = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=n_mfcc)
    mfcc_means = np.mean(mfcc, axis=1)  # Mean for each MFCC coefficient (length: n_mfcc)
    mfcc_stds  = np.std(mfcc, axis=1)   # Standard deviation for each coefficient (length: n_mfcc)
    features = np.concatenate([mfcc_means, mfcc_stds], axis=0)  # Total length: 2*n_mfcc
    return features

# ===================== DATA PROCESSING =====================
# This will store each row (one per audio file) with both metadata and MFCC features.
rows = []

# Retrieve all .wav files  from the input folder.
audio_files = sorted(Path(INPUT_FOLDER).rglob("*.wav"))

# Iterate over files using a progress bar.
for file_path in tqdm(audio_files, desc="Extracting 1D MFCC features"):
    # Remove extension and split filename based on '_' to extract metadata.
    filename = file_path.stem  # e.g., NDDS_m_01_001_a
    parts = filename.split('_')
    
    # Check the expected filename format: 5 parts
    if len(parts) != 5:
        print(f"Skipping {file_path.name}: unexpected filename format.")
        continue
    
    database, gender, speaker, recording, label_code = parts
    
    # Map the label code to the classification label.
    if label_code == 'a':
        label = 'afflicted'
    elif label_code == 'c':
        label = 'control'
    else:
        print(f"Skipping {file_path.name}: unknown label code '{label_code}'.")
        continue
    
    # Load the audio file (using its native sampling rate)
    y, sr = librosa.load(file_path, sr=None)
    
    # Extract the 1D MFCC features (length 26: 13 means and 13 standard deviations)
    features_1d = extract_mfcc_features(y, sr=sr, n_mfcc=13)
    
    # Create the metadata-feature row.
    row = {
        "filename": file_path.name,
        "database": database,
        "gender": gender,
        "speaker_id": speaker,
        "recording_id": recording,
        "label": label
    }
    
    # Append the MFCC features to the row.
    for i in range(13):
        row[f"mfcc_mean_{i+1}"] = features_1d[i]
    for i in range(13):
        row[f"mfcc_std_{i+1}"] = features_1d[i+13]
    
    rows.append(row)

# ===================== SAVE THE MASTER CSV =====================
# Create a DataFrame and save it as a CSV.
df = pd.DataFrame(rows)
df.to_csv(master_csv_file, index=False)
print(f"Master CSV with 1D MFCC features saved to: {master_csv_file}")


Extracting 1D MFCC features: 100%|██████████| 143565/143565 [5:27:41<00:00,  7.30it/s]   


Master CSV with 1D MFCC features saved to: /home/the_fat_cat/Documents/GitHub/dysarthria-classification/data/features/MFCCs/MFCC_UAS/1D_MFCC_features.csv
