In [None]:
# Training data stimuli generation script
# Mixed speec and randomized noise background

import os
import random
import numpy as np
import librosa
import soundfile as sf
import csv

def adjust_snr(speech, noise, target_snr_db):
    """Adjust the noise to match the target SNR."""
    speech_power = np.mean(speech**2)
    noise_power = np.mean(noise**2)
    desired_noise_power = speech_power / (10**(target_snr_db / 10))
    scaling_factor = np.sqrt(desired_noise_power / (noise_power + 1e-8))
    return noise * scaling_factor

def mix_speech_with_noise(speech_dir, noise_dirs, output_dir, csv_log, num_samples=None):
    """Mix each speech clip with a randomly chosen noise clip at a random SNR and log details."""
    os.makedirs(output_dir, exist_ok=True)
    speech_files = [f for f in os.listdir(speech_dir) if f.lower().endswith('.wav')]
    
    if num_samples:
        speech_files = random.sample(speech_files, num_samples)
    
    log_data = []
    
    for speech_file in speech_files:
        speech_path = os.path.join(speech_dir, speech_file)
        speech, sr = librosa.load(speech_path, sr=None)
        speech_length = len(speech)
        
        # Select a random noise category
        noise_type = random.choice(list(noise_dirs.keys()))
        noise_dir = noise_dirs[noise_type]
        noise_file = random.choice([f for f in os.listdir(noise_dir) if f.lower().endswith('.wav')])
        noise_path = os.path.join(noise_dir, noise_file)
        noise, _ = librosa.load(noise_path, sr=sr)
        
        # Ensure noise is exactly 2 seconds
        noise_length = sr * 2  # 2 seconds in samples
        if len(noise) > noise_length:
            noise = noise[:noise_length]
        elif len(noise) < noise_length:
            noise = np.pad(noise, (0, noise_length - len(noise)), mode='constant')
        
        # Center the speech within the 2-second noise
        start_idx = (len(noise) - speech_length) // 2
        mixed_noise = noise.copy()
        mixed_noise[start_idx:start_idx + speech_length] += speech
        
        # Determine SNR based on noise type
        if noise_type in ["babble", "scenes"]:
            snr = np.random.normal(-3, 2)
        else:
            snr = np.random.normal(-6, 2)
        
        mixed_noise = adjust_snr(speech, mixed_noise, snr)
        
        # Save mixed output
        output_path = os.path.join(output_dir, speech_file)
        sf.write(output_path, mixed_noise, sr)
        
        # Log details
        log_data.append([speech_file, noise_file, noise_type, snr])
        print(f"Processed: {speech_file} with {noise_type} at {snr:.2f} dB SNR")
    
    # Write CSV log
    with open(csv_log, mode='w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(["Speech File", "Noise File", "Noise Type", "SNR (dB)"])
        writer.writerows(log_data)

# Define directories
speech_dir = "D:/DNN/Training_data/TIMIT/Extracted_Clips_training"
noise_dirs = {
    "babble": "D:/DNN/Training_data/Noise/speaker_babble/Extracted_speaker_noise_new",
    "scenes": "D:/DNN/Training_data/Noise/scenes_stereo/Extracted_scenes_noise",
    "music": "D:/DNN/Training_data/Noise/Extracted_FMA_Instrumental"
}
output_dir = "D:/DNN/Training_data/Mixed_Training"
csv_log = "D:/DNN/Training_data/mixing_log.csv"

# Run processing
mix_speech_with_noise(speech_dir, noise_dirs, output_dir, csv_log)



Processed: greasy_FAKS0_SA1.WAV with music at -6.97 dB SNR
Processed: greasy_FDAC1_SA1.WAV with babble at -4.15 dB SNR
Processed: greasy_FELC0_SA1.WAV with scenes at -3.40 dB SNR
Processed: greasy_FJEM0_SA1.WAV with music at -6.84 dB SNR
Processed: greasy_MDAB0_SA1.WAV with scenes at -3.09 dB SNR
Processed: greasy_MJSW0_SA1.WAV with babble at -5.15 dB SNR
Processed: greasy_MREB0_SA1.WAV with babble at -7.65 dB SNR
Processed: greasy_MRJO0_SA1.WAV with scenes at -2.01 dB SNR
Processed: greasy_MSJS1_SA1.WAV with music at -6.65 dB SNR
Processed: greasy_MSTK0_SA1.WAV with scenes at -0.59 dB SNR
Processed: greasy_MWBT0_SA1.WAV with music at -6.47 dB SNR
Processed: greasy_FCMR0_SA1.WAV with music at -6.74 dB SNR
Processed: greasy_FDRD1_SA1.WAV with music at -3.79 dB SNR
Processed: greasy_FJAS0_SA1.WAV with scenes at -3.22 dB SNR
Processed: greasy_FJRE0_SA1.WAV with babble at -3.55 dB SNR
Processed: greasy_FJWB0_SA1.WAV with music at -6.67 dB SNR
Processed: greasy_FPAS0_SA1.WAV with scenes at 

In [4]:
pip install librosa.output

Note: you may need to restart the kernel to use updated packages.


ERROR: Could not find a version that satisfies the requirement librosa.output (from versions: none)

[notice] A new release of pip is available: 24.3.1 -> 25.0
[notice] To update, run: python.exe -m pip install --upgrade pip
ERROR: No matching distribution found for librosa.output
