In [1]:
##NDDS preprocess

import os
import librosa
import noisereduce as nr
from pydub import AudioSegment
from scipy.io.wavfile import write as wav_write
import numpy as np

# ==============================
# 🔧 Set your input/output paths
# ==============================
input_dir = "/home/the_fat_cat/Documents/GitHub/dysarthria-classification/data/dysarthria_raw/dysarthria_raw_single-words/NDDS-single-words/"
output_dir = "/home/the_fat_cat/Documents/GitHub/dysarthria-classification/data/processed/output"

# ==============================
# Step 1: Get Maximum Audio Length
# ==============================
def get_max_audio_length(audio_dir):
    max_length = 0
    print("🔍 Scanning for max audio length...")
    for root, _, files in os.walk(audio_dir):
        for file in files:
            if file.endswith(".wav"):
                filepath = os.path.join(root, file)
                try:
                    y, sr = librosa.load(filepath, sr=None)
                    max_length = max(max_length, len(y))
                except Exception as e:
                    print(f"[Error] Could not process {filepath}: {e}")
    print(f"✅ Max length found: {max_length} samples")
    return max_length

# ==============================
# Step 2: Normalize + Denoise + Pad
# ==============================
def normalize_and_pad_audio(filepath, max_len, output_path):
    try:
        # Load and reduce noise
        y, sr = librosa.load(filepath, sr=None)
        y_denoised = nr.reduce_noise(y=y, sr=sr)

        # Save to temporary WAV file for PyDub to process
        temp_path = "temp_denoised.wav"
        wav_write(temp_path, sr, (y_denoised * 32767).astype(np.int16))

        # Normalize volume with PyDub
        audio = AudioSegment.from_wav(temp_path)
        normalized_audio = audio.apply_gain(-audio.max_dBFS)

        # Convert back to numpy
        samples = np.array(normalized_audio.get_array_of_samples()).astype(np.float32)
        if np.max(np.abs(samples)) != 0:
            samples /= np.max(np.abs(samples))  # Normalize to [-1, 1]

        # Pad
        if len(samples) < max_len:
            samples = np.pad(samples, (0, max_len - len(samples)), mode='constant')

        # Save to output
        os.makedirs(os.path.dirname(output_path), exist_ok=True)
        wav_write(output_path, sr, (samples * 32767).astype(np.int16))

        # Cleanup
        os.remove(temp_path)
    except Exception as e:
        print(f"[Error] Failed to normalize {filepath}: {e}")

# ==============================
# Step 3: Normalize Entire Dataset
# ==============================
def normalize_dataset(input_dir, output_dir):
    max_len = get_max_audio_length(input_dir)
    print("🚀 Starting normalization...")

    for root, _, files in os.walk(input_dir):
        for file in files:
            if file.endswith(".wav"):
                rel_path = os.path.relpath(root, input_dir)
                input_path = os.path.join(root, file)
                output_path = os.path.join(output_dir, rel_path, file)

                print(f"→ Processing: {input_path}")
                normalize_and_pad_audio(input_path, max_len, output_path)

    print("🎉 All files normalized and saved!")

# ==============================
# ✅ Run It
# ==============================
normalize_dataset(input_dir, output_dir)


🔍 Scanning for max audio length...
✅ Max length found: 0 samples
🚀 Starting normalization...
🎉 All files normalized and saved!


In [2]:
!pip install librosa noisereduce pydub scipy numpy



In [5]:
##UASpeech Single Word

import os
import librosa
import noisereduce as nr
from pydub import AudioSegment
from scipy.io.wavfile import write as wav_write
import numpy as np
from tqdm import tqdm

# ============================================
# Set your input/output paths and configuration
# ============================================
input_dir = "/home/the_fat_cat/Documents/GitHub/dysarthria-classification/data/dysarthria_raw/dysarthria_raw_single-words/UASpeech-single-words/"
output_dir = "/home/the_fat_cat/Documents/GitHub/dysarthria-classification/data/processed/output/UASpeech_pp/"

# ============================================
# Step 1: Get Maximum Audio Length (in samples)
# ============================================
def get_max_audio_length(audio_dir):
    max_length = 0
    print("🔍 Scanning for maximum audio length...")
    for root, _, files in os.walk(audio_dir):
        for file in files:
            if file.lower().endswith(".wav"):
                filepath = os.path.join(root, file)
                try:
                    y, sr = librosa.load(filepath, sr=None)
                    current_length = len(y)
                    if current_length > max_length:
                        max_length = current_length
                except Exception as e:
                    print(f"[Error] Could not process {filepath}: {e}")
    print(f" Max length found: {max_length} samples")
    return max_length

# ============================================
# Step 2: Normalize, Denoise, and Pad a single audio file
# ============================================
def normalize_and_pad_audio(filepath, max_len, output_path):
    try:
        # Load the audio file using librosa
        y, sr = librosa.load(filepath, sr=None)
        
        # Reduce noise using noisereduce
        y_denoised = nr.reduce_noise(y=y, sr=sr)
        
        # Save to a temporary file (16-bit PCM)
        temp_path = "temp_denoised.wav"
        wav_write(temp_path, sr, (y_denoised * 32767).astype(np.int16))
        
        # Use PyDub to load the temporary file and normalize volume
        audio = AudioSegment.from_wav(temp_path)
        normalized_audio = audio.apply_gain(-audio.max_dBFS)
        
        # Convert the normalized audio back to a NumPy array (float32)
        samples = np.array(normalized_audio.get_array_of_samples()).astype(np.float32)
        if np.max(np.abs(samples)) != 0:
            samples /= np.max(np.abs(samples))  # scale to [-1, 1]
        
        # Pad the audio if it is shorter than the maximum length
        if len(samples) < max_len:
            samples = np.pad(samples, (0, max_len - len(samples)), mode='constant')
        
        # Ensure the output directory exists and save the final WAV file
        os.makedirs(os.path.dirname(output_path), exist_ok=True)
        wav_write(output_path, sr, (samples * 32767).astype(np.int16))
        
        # Cleanup temporary file
        os.remove(temp_path)
    except Exception as e:
        print(f"[Error] Failed to normalize {filepath}: {e}")

# ============================================
# Step 3: Process the Entire Dataset with a Progress Bar
# ============================================
def normalize_dataset(input_dir, output_dir):
    max_len = get_max_audio_length(input_dir)
    print("Starting normalization of dataset...")

    # Build a list of all WAV files (with their relative path info)
    file_list = []
    for root, _, files in os.walk(input_dir):
        for file in files:
            if file.lower().endswith(".wav"):
                full_path = os.path.join(root, file)
                rel_path = os.path.relpath(root, input_dir)
                file_list.append((full_path, rel_path, file))
    
    # Process each file with a progress bar
    for input_file, rel_path, file in tqdm(file_list, desc="Processing WAV Files", total=len(file_list)):
        output_file = os.path.join(output_dir, rel_path, file)
        normalize_and_pad_audio(input_file, max_len, output_file)

    print("Dataset normalization complete!")

# ============================================
# Run the script
# ============================================
if __name__ == "__main__":
    normalize_dataset(input_dir, output_dir)


🔍 Scanning for maximum audio length...
 Max length found: 0 samples
Starting normalization of dataset...


Processing WAV Files: 0it [00:00, ?it/s]

Dataset normalization complete!





In [None]:
##TORGO Single Word

import os
import librosa
import noisereduce as nr
from pydub import AudioSegment
from scipy.io.wavfile import write as wav_write
import numpy as np
from tqdm import tqdm

# ============================================
# Set your input/output paths and configuration
# ============================================
input_dir = "/home/the_fat_cat/Documents/GitHub/dysarthria-classification/data/dysarthria_raw/dysarthria_raw_single-words/UASpeech-single-words/"
output_dir = "/home/the_fat_cat/Documents/GitHub/dysarthria-classification/data/processed/output/UASpeech_pp/"

# ============================================
# Step 1: Get Maximum Audio Length (in samples)
# ============================================
def get_max_audio_length(audio_dir):
    max_length = 0
    print("🔍 Scanning for maximum audio length...")
    for root, _, files in os.walk(audio_dir):
        for file in files:
            if file.lower().endswith(".wav"):
                filepath = os.path.join(root, file)
                try:
                    y, sr = librosa.load(filepath, sr=None)
                    current_length = len(y)
                    if current_length > max_length:
                        max_length = current_length
                except Exception as e:
                    print(f"[Error] Could not process {filepath}: {e}")
    print(f" Max length found: {max_length} samples")
    return max_length

# ============================================
# Step 2: Normalize, Denoise, and Pad a single audio file
# ============================================
def normalize_and_pad_audio(filepath, max_len, output_path):
    try:
        # Load the audio file using librosa
        y, sr = librosa.load(filepath, sr=None)
        
        # Reduce noise using noisereduce
        y_denoised = nr.reduce_noise(y=y, sr=sr)
        
        # Save to a temporary file (16-bit PCM)
        temp_path = "temp_denoised.wav"
        wav_write(temp_path, sr, (y_denoised * 32767).astype(np.int16))
        
        # Use PyDub to load the temporary file and normalize volume
        audio = AudioSegment.from_wav(temp_path)
        normalized_audio = audio.apply_gain(-audio.max_dBFS)
        
        # Convert the normalized audio back to a NumPy array (float32)
        samples = np.array(normalized_audio.get_array_of_samples()).astype(np.float32)
        if np.max(np.abs(samples)) != 0:
            samples /= np.max(np.abs(samples))  # scale to [-1, 1]
        
        # Pad the audio if it is shorter than the maximum length
        if len(samples) < max_len:
            samples = np.pad(samples, (0, max_len - len(samples)), mode='constant')
        
        # Ensure the output directory exists and save the final WAV file
        os.makedirs(os.path.dirname(output_path), exist_ok=True)
        wav_write(output_path, sr, (samples * 32767).astype(np.int16))
        
        # Cleanup temporary file
        os.remove(temp_path)
    except Exception as e:
        print(f"[Error] Failed to normalize {filepath}: {e}")

# ============================================
# Step 3: Process the Entire Dataset with a Progress Bar
# ============================================
def normalize_dataset(input_dir, output_dir):
    max_len = get_max_audio_length(input_dir)
    print("Starting normalization of dataset...")

    # Build a list of all WAV files (with their relative path info)
    file_list = []
    for root, _, files in os.walk(input_dir):
        for file in files:
            if file.lower().endswith(".wav"):
                full_path = os.path.join(root, file)
                rel_path = os.path.relpath(root, input_dir)
                file_list.append((full_path, rel_path, file))
    
    # Process each file with a progress bar
    for input_file, rel_path, file in tqdm(file_list, desc="Processing WAV Files", total=len(file_list)):
        output_file = os.path.join(output_dir, rel_path, file)
        normalize_and_pad_audio(input_file, max_len, output_file)

    print("Dataset normalization complete!")

# ============================================
# Run the script
# ============================================
if __name__ == "__main__":
    normalize_dataset(input_dir, output_dir)
