In [11]:
import os
import random
from pydub import AudioSegment

# Define directories
input_folder = r"D:\DNN\Training_data\Noise\speaker_babble\Speech_Babble_Audios"
output_folder = r"D:\DNN\Training_data\Noise\speaker_babble\Extracted_speaker_noise_new"
os.makedirs(output_folder, exist_ok=True)

# Desired clip duration (2 seconds)
clip_duration_ms = 2000  

# Categorize speakers
male_speakers = [f for f in os.listdir(input_folder) if "male" in f.lower() and "female" not in f.lower()]
female_speakers = [f for f in os.listdir(input_folder) if "female" in f.lower()]

# Ensure enough speakers exist
assert len(male_speakers) >= 2, "Not enough male speakers!"
assert len(female_speakers) >= 2, "Not enough female speakers!"

# Define the number of clips per category
num_clips = {
    "male-male": 2000,
    "female-male": 2000,
    "female-female": 2000
}

# Function to extract random 2-second segments
def get_random_clip(file_path):
    try:
        audio = AudioSegment.from_file(file_path)
        if len(audio) < clip_duration_ms:
            return None  # Skip short files
        start_time = random.randint(0, len(audio) - clip_duration_ms)
        return audio[start_time:start_time + clip_duration_ms]
    except Exception as e:
        print(f"Error processing {file_path}: {e}")
        return None  # Skip failed files

# Function to generate babble noise
def generate_babble_noise(category, speaker_list_1, speaker_list_2, num_samples):
    for i in range(num_samples):
        # Randomly pick two different speakers
        speaker1, speaker2 = random.sample(speaker_list_1, 1)[0], random.sample(speaker_list_2, 1)[0]

        # Load and extract clips
        clip1 = get_random_clip(os.path.join(input_folder, speaker1))
        clip2 = get_random_clip(os.path.join(input_folder, speaker2))

        if clip1 is None or clip2 is None:
            continue  # Skip if any clip failed to load

        # Mix the two clips together
        babble_noise = clip1.overlay(clip2)

        # Save the mixed audio
        output_filename = f"{category}_clip_{i+1}.wav"
        babble_noise.export(os.path.join(output_folder, output_filename), format="wav")

        if i % 100 == 0:
            print(f"Saved: {output_filename} ({i}/{num_samples})")

    print(f"✅ Finished generating {category} noise!")

# Generate noise for each category
generate_babble_noise("male-male", male_speakers, male_speakers, num_clips["male-male"])
generate_babble_noise("female-male", female_speakers, male_speakers, num_clips["female-male"])
generate_babble_noise("female-female", female_speakers, female_speakers, num_clips["female-female"])

print("✅ All speaker babble noise clips have been generated successfully!")




Saved: male-male_clip_1.wav (0/2000)
Saved: male-male_clip_101.wav (100/2000)
Saved: male-male_clip_201.wav (200/2000)
Saved: male-male_clip_301.wav (300/2000)
Saved: male-male_clip_401.wav (400/2000)
Saved: male-male_clip_501.wav (500/2000)
Saved: male-male_clip_601.wav (600/2000)
Saved: male-male_clip_701.wav (700/2000)
Saved: male-male_clip_801.wav (800/2000)
Saved: male-male_clip_901.wav (900/2000)
Saved: male-male_clip_1001.wav (1000/2000)
Saved: male-male_clip_1101.wav (1100/2000)
Saved: male-male_clip_1201.wav (1200/2000)
Saved: male-male_clip_1301.wav (1300/2000)
Saved: male-male_clip_1401.wav (1400/2000)
Saved: male-male_clip_1501.wav (1500/2000)
Saved: male-male_clip_1601.wav (1600/2000)
Saved: male-male_clip_1701.wav (1700/2000)
Saved: male-male_clip_1801.wav (1800/2000)
Saved: male-male_clip_1901.wav (1900/2000)
✅ Finished generating male-male noise!
Saved: female-male_clip_1.wav (0/2000)
Saved: female-male_clip_101.wav (100/2000)
Saved: female-male_clip_201.wav (200/2000)
