In [1]:
pip install librosa matplotlib pandas numpy


Collecting pandas
  Downloading pandas-2.2.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (89 kB)
Collecting pytz>=2020.1 (from pandas)
  Using cached pytz-2025.2-py2.py3-none-any.whl.metadata (22 kB)
Collecting tzdata>=2022.7 (from pandas)
  Using cached tzdata-2025.2-py2.py3-none-any.whl.metadata (1.4 kB)
Downloading pandas-2.2.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (13.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.1/13.1 MB[0m [31m23.1 MB/s[0m eta [36m0:00:00[0m [36m0:00:01[0m
[?25hUsing cached pytz-2025.2-py2.py3-none-any.whl (509 kB)
Using cached tzdata-2025.2-py2.py3-none-any.whl (347 kB)
Installing collected packages: pytz, tzdata, pandas
Successfully installed pandas-2.2.3 pytz-2025.2 tzdata-2025.2
Note: you may need to restart the kernel to use updated packages.


In [7]:
import os
import librosa
import librosa.display
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import random
from pathlib import Path

# ===================== CONFIGURATION =====================
INPUT_FOLDER = '/home/the_fat_cat/Documents/GitHub/dysarthria-classification/data/processed/output/'  # Folder where .wav files are stored
MFCC_OUTPUT_FOLDER = '/home/the_fat_cat/Documents/GitHub/dysarthria-classification/data/processed/MFCC'  # Folder to save MFCC CSVs
SPECTROGRAM_OUTPUT_FOLDER = '/home/the_fat_cat/Documents/GitHub/dysarthria-classification/data/processed/spectrograms'  # Folder to save Mel spectrogram images (PNG)
EXAMPLE_PLOTS_FOLDER = '/home/the_fat_cat/Documents/GitHub/dysarthria-classification/data/processed/example_plots'  # Folder to save example plots (both MFCC and spectrogram)
METADATA_CSV_PATH = '/home/the_fat_cat/Documents/GitHub/dysarthria-classification/data/processed/metadata'  # Master CSV file that will store the metadata

# Create output directories if they don't exist
os.makedirs(MFCC_OUTPUT_FOLDER, exist_ok=True)
os.makedirs(SPECTROGRAM_OUTPUT_FOLDER, exist_ok=True)
os.makedirs(EXAMPLE_PLOTS_FOLDER, exist_ok=True)

# ===================== DATA PROCESSING =====================
metadata = []  # To store metadata for each file
example_candidates = []  # Store candidate data for generating sample plots

# Get all .wav files (recursively) from the input folder
audio_files = sorted(Path(INPUT_FOLDER).rglob("*.wav"))

for file_path in audio_files:
    filename = file_path.stem  # Get filename without extension
    parts = filename.split('_')
    
    # Expecting the format: <database>_<gender>_<speaker#>_<recording#>_<a|c>
    if len(parts) != 5:
        print(f"Skipping {file_path.name} due to unexpected filename format.")
        continue
    
    database, gender, speaker, recording, label_code = parts
    # Determine label based on the last part of filename
    if label_code == 'a':
        label = 'afflicted'
    elif label_code == 'c':
        label = 'control'
    else:
        print(f"Skipping {file_path.name} due to unknown label code '{label_code}'.")
        continue
    
    # Load the audio file (assuming fixed length per your project)
    y, sr = librosa.load(file_path, sr=None)
    
    # ---- MFCC Extraction ----
    # Extract 13 MFCC coefficients (each column represents a frame)
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
    mfcc_df = pd.DataFrame(mfcc)
    
    # Save the MFCCs as a CSV file (the CSV filename mirrors the audio filename)
    mfcc_csv_filename = f"{filename}.csv"
    mfcc_csv_full_path = os.path.join(MFCC_OUTPUT_FOLDER, mfcc_csv_filename)
    mfcc_df.to_csv(mfcc_csv_full_path, index=False)
    
    # ---- Mel Spectrogram Generation ----
    # Compute the Mel spectrogram and convert to decibel units
    S = librosa.feature.melspectrogram(y=y, sr=sr, n_fft=2048, hop_length=512)
    S_dB = librosa.power_to_db(S, ref=np.max)
    
    # Save Mel spectrogram to disk as a PNG image
    spec_img_filename = f"{filename}.png"
    spec_img_full_path = os.path.join(SPECTROGRAM_OUTPUT_FOLDER, spec_img_filename)
    plt.figure(figsize=(10, 4))
    librosa.display.specshow(S_dB, sr=sr, x_axis='time', y_axis='mel')
    plt.colorbar(format='%+2.0f dB')
    plt.title(f"Mel Spectrogram: {filename}")
    plt.tight_layout()
    plt.savefig(spec_img_full_path)
    plt.close()
    
    # ---- Metadata Collection ----
    metadata.append({
        "filename": f"{filename}.wav",
        "database": database,
        "gender": gender,
        "speaker_id": speaker,
        "recording_id": recording,
        "label": label
    })
    
    # Save data for candidate examples (to generate example plots later)
    example_candidates.append({
        "filename": filename,
        "mfcc": mfcc,
        "spectrogram": S_dB,
        "sr": sr
    })

# ===================== SAVE METADATA =====================
# Create a master metadata CSV file that contains details for every processed file.
metadata_df = pd.DataFrame(metadata)
metadata_df.to_csv(METADATA_CSV_PATH, index=False)

# ===================== GENERATE EXAMPLE PLOTS =====================
# Select 3 random examples (or all available if less than 3 exist)
if len(example_candidates) >= 3:
    selected_examples = random.sample(example_candidates, 3)
else:
    selected_examples = example_candidates

for example in selected_examples:
    file_base = example["filename"]
    mfcc = example["mfcc"]
    spectrogram = example["spectrogram"]
    sr = example["sr"]
    
    # ---- MFCC Example Plot ----
    plt.figure(figsize=(10, 4))
    librosa.display.specshow(mfcc, x_axis='time')
    plt.colorbar()
    plt.title(f"MFCC (13 coefficients): {file_base}")
    plt.tight_layout()
    mfcc_example_path = os.path.join(EXAMPLE_PLOTS_FOLDER, f"{file_base}_mfcc_example.png")
    plt.savefig(mfcc_example_path)
    plt.close()
    
    # ---- Mel Spectrogram Example Plot ----
    plt.figure(figsize=(10, 4))
    librosa.display.specshow(spectrogram, sr=sr, x_axis='time', y_axis='mel')
    plt.colorbar(format='%+2.0f dB')
    plt.title(f"Mel Spectrogram: {file_base}")
    plt.tight_layout()
    spec_example_path = os.path.join(EXAMPLE_PLOTS_FOLDER, f"{file_base}_spectrogram_example.png")
    plt.savefig(spec_example_path)
    plt.close()

print("Processing complete.")
print(f"Metadata saved to: {METADATA_CSV_PATH}")


try:
    import ace_tools
    ace_tools.display_dataframe_to_user(name="Metadata", dataframe=metadata_df)
except ImportError:
    print(metadata_df)


Processing complete.
Metadata saved to: /home/the_fat_cat/Documents/GitHub/dysarthria-classification/data/processed/metadata
                 filename database gender speaker_id recording_id      label
0     NDDS_m_01_001_a.wav     NDDS      m         01          001  afflicted
1     NDDS_m_01_002_a.wav     NDDS      m         01          002  afflicted
2     NDDS_m_01_003_a.wav     NDDS      m         01          003  afflicted
3     NDDS_m_01_004_a.wav     NDDS      m         01          004  afflicted
4     NDDS_m_01_005_a.wav     NDDS      m         01          005  afflicted
...                   ...      ...    ...        ...          ...        ...
2884  NDDS_m_11_249_a.wav     NDDS      m         11          249  afflicted
2885  NDDS_m_11_250_a.wav     NDDS      m         11          250  afflicted
2886  NDDS_m_11_251_a.wav     NDDS      m         11          251  afflicted
2887  NDDS_m_11_252_a.wav     NDDS      m         11          252  afflicted
2888  NDDS_m_11_253_a.wav   