In [10]:
import os
import random
import numpy as np
import librosa
import soundfile as sf

def list_files_recursive(directory, extensions=['.wav']):
    """Recursively list all files in the directory with the given extensions."""
    files_list = []
    for root, _, files in os.walk(directory):
        for file in files:
            if any(file.lower().endswith(ext) for ext in extensions):
                files_list.append(os.path.join(root, file))
    return files_list

def choose_random_file(files_list):
    """Choose a random file from the provided list."""
    return random.choice(files_list) if files_list else None

def add_noise(clean_file, noise_file, output_folder, noise_level=0.5):
    clean_audio, sr = librosa.load(clean_file, sr=None)
    noise_audio, _ = librosa.load(noise_file, sr=None)
    if len(noise_audio) < len(clean_audio):
        clean_audio = clean_audio[:len(noise_audio)]
    else:
        noise_audio = noise_audio[:len(clean_audio)]
    noise_audio *= noise_level
    noisy_audio = clean_audio + noise_audio
    noisy_audio /= np.max(np.abs(noisy_audio))
    output_filename = os.path.splitext(os.path.basename(clean_file))[0] + '_noisy.wav'
    output_file = os.path.join(output_folder, output_filename)
    sf.write(output_file, noisy_audio, sr)
    print(f"Saved noisy file: {output_file}")

# Paths to directories
speech_directory = "/kaggle/input/ravdess-8k"  # path to the directory where the clean speeches are placed
noise_directory = "/kaggle/input/urban-sound-8k"  # path to the directory where the noise files are placed
output_folder = "mixed_audio"  # path to the directory where files will be saved

# List all audio files in the directories
print("Listing files in speech directory...")
speech_files = list_files_recursive(speech_directory)
print(f"Found {len(speech_files)} audio files in speech directory.")

print("Listing files in noise directory...")
noise_files = list_files_recursive(noise_directory)
print(f"Found {len(noise_files)} audio files in noise directory.")

# Ensure the output folder exists
os.makedirs(output_folder, exist_ok=True)

if not speech_files:
    print("No valid audio files found in speech directory.")
if not noise_files:
    print("No valid audio files found in noise directory.")

for i in range(1001):
    try:
        clean = choose_random_file(speech_files)
        noise = choose_random_file(noise_files)
        if clean and noise:
            print(f"Processing pair {i}: {clean} + {noise}")
            add_noise(clean, noise, output_folder, noise_level=0.5)
        else:
            print("Error processing files: No valid audio files found.")
    except Exception as e:
        print(f"Error processing files: {e}")


Listing files in speech directory...
Found 1440 audio files in speech directory.
Listing files in noise directory...
Found 8732 audio files in noise directory.
Processing pair 0: /kaggle/input/ravdess-8k/ravdess_rewritten_8k/03-01-05-01-01-01-19.wav + /kaggle/input/urban-sound-8k/urbansound_8k/44735-5-0-0.wav
Saved noisy file: mixed_audio/03-01-05-01-01-01-19_noisy.wav
Processing pair 1: /kaggle/input/ravdess-8k/ravdess_rewritten_8k/03-01-05-02-02-02-11.wav + /kaggle/input/urban-sound-8k/urbansound_8k/196062-2-0-0.wav
Saved noisy file: mixed_audio/03-01-05-02-02-02-11_noisy.wav
Processing pair 2: /kaggle/input/ravdess-8k/ravdess_rewritten_8k/03-01-03-01-01-01-02.wav + /kaggle/input/urban-sound-8k/urbansound_8k/135776-2-0-65.wav
Saved noisy file: mixed_audio/03-01-03-01-01-01-02_noisy.wav
Processing pair 3: /kaggle/input/ravdess-8k/ravdess_rewritten_8k/03-01-04-01-01-02-14.wav + /kaggle/input/urban-sound-8k/urbansound_8k/159735-2-0-99.wav
Saved noisy file: mixed_audio/03-01-04-01-01-02-