In [16]:
import os
from pydub import AudioSegment

# Define paths
base_path = r"F:\Level 3 Term 2\DSP Project\Data\MS-SNSD-master\MS-SNSD-master"
clean_path = os.path.join(base_path, "clean_train")
noise_path = os.path.join(base_path, "noise_train")
output_path = os.path.join(base_path, "noisy_clean_train")

# Ensure the output directory exists
os.makedirs(output_path, exist_ok=True)

# Fetch audio files
clean_files = sorted([os.path.join(clean_path, f) for f in os.listdir(clean_path) if f.endswith('.wav')])
noise_files = sorted([os.path.join(noise_path, f) for f in os.listdir(noise_path) if f.endswith('.wav')])

# Ensure the number of files match
if len(clean_files) != 49 or len(noise_files) != 49:
    raise ValueError("Both clean_test and noise_test must have exactly 49 audio files.")

# Process and merge audio
for i, (clean_file, noise_file) in enumerate(zip(clean_files, noise_files)):
    # Load clean and noise audio
    clean_audio = AudioSegment.from_file(clean_file)
    noise_audio = AudioSegment.from_file(noise_file)
    
    # Adjust noise to match the length of clean audio
    if len(noise_audio) > len(clean_audio):
        noise_audio = noise_audio[:len(clean_audio)]
    else:
        noise_audio = noise_audio + AudioSegment.silent(duration=(len(clean_audio) - len(noise_audio)))
    
    # Triple the amplitude of the noise
    amplified_noise = noise_audio # Increase volume by 10 dB (10 dB ≈ doubling amplitude)
    
    # Combine clean and amplified noise audio
    noisy_audio = clean_audio.overlay(amplified_noise)
    
    # Export the combined audio
    output_file = os.path.join(output_path, f"noisy_clean_test_{i + 1}.wav")
    noisy_audio.export(output_file, format="wav")

print("Processing complete! Combined audios with tripled noise amplitude are saved in the 'noisy_clean_test' folder.")


Processing complete! Combined audios with tripled noise amplitude are saved in the 'noisy_clean_test' folder.
