In [1]:
import os
import random
from pathlib import Path
from pydub import AudioSegment

In [2]:
ambulance_dir = Path("OriginalDataset/ambulance")
police_dir = Path("OriginalDataset/police")
firetruck_dir = Path("OriginalDataset/firetruck")
noise_dir = Path("OriginalDataset/traffic")
output_ambulance_dir = Path("Dataset/ambulance")
output_police_dir = Path("Dataset/police")
output_firetruck_dir = Path("Dataset/firetruck")


os.makedirs(output_ambulance_dir, exist_ok=True)
os.makedirs(output_police_dir, exist_ok=True)
os.makedirs(output_firetruck_dir, exist_ok=True)

In [3]:
def add_noise_to_audio(original_files, noise_files, original_dir, noise_dir, output_dir, noise_level=-20):
    """
    Overlays random noise files onto original audio files from the specified directories
    and saves them to the output directory.
    
    Parameters:
    - original_files (list): List of filenames for original audio files.
    - noise_files (list): List of filenames for noise audio files.
    - original_dir (Path): Path to the original audio files directory.
    - noise_dir (Path): Path to the noise audio files directory.
    - output_dir (Path): Path to the output directory for saving mixed audio files.
    - noise_level (float): The dB level to attenuate the noise (default is -20 dB).
    """
    
    output_dir.mkdir(parents=True, exist_ok=True)
    i = 0
    for original_filename in original_files:
        original_path = original_dir / original_filename
        
        try:
            original_audio = AudioSegment.from_wav(original_path)
            
            # Select a random noise file and construct full path
            noise_filename = random.choice(noise_files)
            noise_path = noise_dir / noise_filename
            noise_audio = AudioSegment.from_wav(noise_path)
            
            # Loop or truncate noise to match original audio duration
            if len(noise_audio) < len(original_audio):
                noise_audio = noise_audio * (len(original_audio) // len(noise_audio) + 1)
            noise_audio = noise_audio[:len(original_audio)]
            
            # Adjust noise level
            noise_audio = noise_audio - noise_level
            combined_audio = original_audio.overlay(noise_audio)
            
            # Save the combined audio
            output_path = output_dir / f"mixed_{original_filename}"
            combined_audio.export(output_path, format="wav")
            i += 1
            if i%20 == 0:
                print(f"Processed {i} files")
       
        except FileNotFoundError as e:
            print(f"File not found: {e}")
        except Exception as e:
            print(f"Error processing {original_filename}: {e}")

In [4]:
ambulance_files = [f.name for f in ambulance_dir.glob("*.wav")]
police_files = [f.name for f in police_dir.glob("*.wav")]
firetruck_files = [f.name for f in firetruck_dir.glob("*.wav")]
noise_files = [f.name for f in noise_dir.glob("*.wav")]

In [5]:
#Preporcessing Ambulance files
add_noise_to_audio(ambulance_files, noise_files, ambulance_dir, noise_dir, output_ambulance_dir, noise_level=-10)

Processed 20 files
Processed 40 files
Processed 60 files
Processed 80 files
Processed 100 files
Processed 120 files
Processed 140 files
Processed 160 files
Processed 180 files
Processed 200 files
Processed 220 files
Processed 240 files
Processed 260 files
Processed 280 files
Processed 300 files
Processed 320 files
Processed 340 files
Processed 360 files
Processed 380 files
Processed 400 files


In [6]:
#Preporcessing police files
add_noise_to_audio(police_files, noise_files, police_dir, noise_dir, output_police_dir, noise_level=-10)

Processed 20 files
Processed 40 files
Processed 60 files
Processed 80 files
Processed 100 files
Processed 120 files
Processed 140 files
Processed 160 files
Processed 180 files
Processed 200 files
Processed 220 files
Processed 240 files
Processed 260 files
Processed 280 files
Processed 300 files
Processed 320 files
Processed 340 files
Processed 360 files
Processed 380 files
Processed 400 files
Processed 420 files
Processed 440 files


In [7]:
#Preporcessing firetruck files
add_noise_to_audio(firetruck_files, noise_files, firetruck_dir, noise_dir, output_firetruck_dir, noise_level=-10)

Processed 20 files
Processed 40 files
Processed 60 files
Processed 80 files
Processed 100 files
Processed 120 files
Processed 140 files
Processed 160 files
Processed 180 files
Processed 200 files
Processed 220 files
Processed 240 files
Processed 260 files
Processed 280 files
Processed 300 files
Processed 320 files
Processed 340 files
Processed 360 files
Processed 380 files
Processed 400 files
