In [None]:
import numpy
import os
import pandas as pd
import torch
import pyloudnorm as pyln
import random
import soundfile as sf
from tqdm import tqdm
import librosa
from config import DATASET_PERCUSSION_PATH, DATASET_NOISE_PATH, DATASET_MIX_AUDIO_PATH
#%%
# Setup
random.seed(42)  # Ensure reproducibility

# Loudness meter
sample_rate = 7812
meter = pyln.Meter(sample_rate)

# Noise classes
noise_class_list = [
    'air_conditioner',
    'car_horn',
    'children_playing',
    'dog_bark',
    'drilling',
    'engine_idling',
    'siren',
    'jackhammer'
]

def pad_audio_center(audio_path, sample_rate=7812, target_length=31248):
    audio, sr = librosa.load(audio_path, sr=sample_rate)

    if len(audio) < target_length:
        pad_len = (target_length - len(audio)) // 2
        audio = numpy.pad(audio, (pad_len, target_length -
                       len(audio) - pad_len), 'constant')
    
    audio = audio[:target_length]
    # audio = torch.tensor(audio, dtype=torch.float32)
    
    return audio

# Helper functions for normalization and mixing
def normalize_loudness(audio, target_loudness=-3):
    loudness = meter.integrated_loudness(audio)
    if loudness == -float('inf'):
        audio = pyln.normalize.peak(audio, 0)
        loudness = meter.integrated_loudness(audio)

    audio = pyln.normalize.loudness(audio, loudness, target_loudness)
    
    # Ensure audio is within [-1, 1]
    max_amplitude = max(abs(audio))
    audio = audio / max_amplitude
    return audio

def create_mixture(percussion_audio, noise_audio, k):
    percussion_audio = k * percussion_audio
    noise_audio = (1 - k) * noise_audio
    mixture_audio = percussion_audio + noise_audio
    return mixture_audio

# Dataset Creation
def create_dataset(metadata_noise, output_dir, num_mixes=7358, target_loudness=-3, max_noise_classes=2, k_values=[0.5, 0.6, 0.7, 0.8, 0.9]):
    # Create directories if they do not exist
    os.makedirs(output_dir, exist_ok=True)

    # Create a list for storing metadata
    metadata = []

    # Iterate through the percussion files
    percussion_files = sorted(os.listdir(DATASET_PERCUSSION_PATH)) 
    # keep wav files only
    percussion_files = [f for f in percussion_files if f.endswith('.wav')]
    
    # Progress bar
    for perc_idx, perc_file in enumerate(tqdm(percussion_files, desc="Creating Mixtures")):
        percussion_path = os.path.join(DATASET_PERCUSSION_PATH, perc_file)
        percussion_audio = pad_audio_center(percussion_path)
        percussion_audio = normalize_loudness(percussion_audio, target_loudness)

        # Randomly select 1 or 2 noise classes for each mix
        for _ in range(num_mixes // len(percussion_files)):
            num_noise_classes = random.randint(1, max_noise_classes)
            noise_classes = random.sample(noise_class_list, k=num_noise_classes)
            k = random.choice(k_values)
            noise_audio_combined = numpy.zeros_like(percussion_audio)

            noise_files = []
            noise_classes_str = []

            # Load and mix noise files
            for noise_class in noise_classes:
                noise_row = metadata_noise[metadata_noise['class'] == noise_class].sample(n=1).iloc[0]
                noise_file = os.path.join(DATASET_NOISE_PATH, f"fold{noise_row['fold']}", noise_row['slice_file_name'])
                noise_audio = pad_audio_center(noise_file)
                noise_audio = normalize_loudness(noise_audio, target_loudness)

                # Add noise to combined noise audio
                noise_audio_combined += noise_audio

                # Keep track of noise files and classes
                noise_files.append(noise_row['slice_file_name'])
                noise_classes_str.append(noise_class)

            # Create the final mixture
            noise_audio_combined = normalize_loudness(noise_audio_combined, target_loudness)
            mixture_audio = create_mixture(percussion_audio, noise_audio_combined, k)

            # Save mixture audio
            mix_file_name = f"mixture_{perc_idx}_noise_{'_'.join(noise_classes)}_k_{k:.2f}.wav"
            mix_file_path = os.path.join(output_dir, mix_file_name)
            sf.write(mix_file_path, mixture_audio, sample_rate)

            # Append metadata
            metadata.append({
                'percussion_file': perc_file,
                'mix_file': mix_file_name,
                'noise_files': ','.join(noise_files),
                'noise_classes': ','.join(noise_classes_str),
                'k': k
            })

    # Save metadata to CSV
    metadata_df = pd.DataFrame(metadata)
    metadata_df.to_csv(os.path.join(output_dir, "metadata.csv"), index=False)
    print(f"Dataset and metadata saved in '{output_dir}'")

# Load noise metadata
metadata_noise_path = os.path.join(DATASET_NOISE_PATH, "UrbanSound8k.csv")
metadata_noise = pd.read_csv(metadata_noise_path)

# Change classID 8 to 6
metadata_noise['classID'] = metadata_noise['classID'].replace(8, 6)
# Sort by class ID and fold and reset index
metadata_noise = metadata_noise.sort_values(
    by=['classID', 'fold']).reset_index(drop=True)

# # Get unique noise classes and their IDs
# noise_class = metadata_noise['class'].unique()
# noise_classID = metadata_noise['classID'].unique()

# # Create a DataFrame for noise classes
# noise_data = {
#     'class': noise_class,
#     'classID': noise_classID,
#     'count': [len(metadata_noise[metadata_noise['class'] == c]) for c in noise_class],
# }
# df = pd.DataFrame(noise_data)

# Create dataset with mixtures and save metadata
output_dir = DATASET_MIX_AUDIO_PATH
create_dataset(metadata_noise, output_dir, num_mixes=7358)

Creating Mixtures: 100%|██████████| 387/387 [02:37<00:00,  2.45it/s]

Dataset and metadata saved in 'C:\Users\jejep\Desktop\STAGE\data\mixture_audio'





In [None]:
#%%
# Setup
random.seed(42)  # Ensure reproducibility

# Loudness meter
sample_rate = 7812
meter = pyln.Meter(sample_rate)

# Noise classes
noise_class_list = [
    'air_conditioner',
    'car_horn',
    'children_playing',
    'dog_bark',
    'drilling',
    'engine_idling',
    'siren',
    'jackhammer'
]

def pad_audio_center(audio_path, sample_rate=7812, target_length=31248):
    audio, sr = librosa.load(audio_path, sr=sample_rate)

    if len(audio) < target_length:
        pad_len = (target_length - len(audio)) // 2
        audio = numpy.pad(audio, (pad_len, target_length -
                       len(audio) - pad_len), 'constant')
    
    audio = audio[:target_length]
    # audio = torch.tensor(audio, dtype=torch.float32)
    
    return audio

# Helper functions for normalization and mixing
def normalize_loudness(audio, target_loudness=-3):
    loudness = meter.integrated_loudness(audio)
    if loudness == -float('inf'):
        audio = pyln.normalize.peak(audio, 0)
        loudness = meter.integrated_loudness(audio)

    audio = pyln.normalize.loudness(audio, loudness, target_loudness)
    
    # Ensure audio is within [-1, 1]
    max_amplitude = max(abs(audio))
    audio = audio / max_amplitude
    return audio

def create_mixture(percussion_audio, noise_audio, k):
    percussion_audio = k * percussion_audio
    noise_audio = (1 - k) * noise_audio
    mixture_audio = percussion_audio + noise_audio
    return mixture_audio

# Dataset Creation
def create_dataset(metadata_noise, output_dir, num_mixes=7358, target_loudness=-3, max_noise_classes=2, k_values=[0.5, 0.6, 0.7, 0.8, 0.9]):
    # Create directories if they do not exist
    os.makedirs(output_dir, exist_ok=True)

    # Create a list for storing metadata
    metadata = []

    # Iterate through the percussion files
    percussion_files = sorted(os.listdir(DATASET_PERCUSSION_PATH)) 
    # keep wav files only
    percussion_files = [f for f in percussion_files if f.endswith('.wav')]
    
    # Progress bar
    for perc_idx, perc_file in enumerate(tqdm(percussion_files, desc="Creating Mixtures")):
        percussion_path = os.path.join(DATASET_PERCUSSION_PATH, perc_file)
        percussion_audio = pad_audio_center(percussion_path)
        percussion_audio = normalize_loudness(percussion_audio, target_loudness)

        # Randomly select 1 or 2 noise classes for each mix
        for _ in range(num_mixes // len(percussion_files)):
            num_noise_classes = random.randint(1, max_noise_classes)
            noise_classes = random.sample(noise_class_list, k=num_noise_classes)
            k = random.choice(k_values)
            noise_audio_combined = numpy.zeros_like(percussion_audio)

            noise_files = []
            noise_classes_str = []

            # Load and mix noise files
            for noise_class in noise_classes:
                noise_row = metadata_noise[metadata_noise['class'] == noise_class].sample(n=1).iloc[0]
                noise_file = os.path.join(DATASET_NOISE_PATH, f"fold{noise_row['fold']}", noise_row['slice_file_name'])
                noise_audio = pad_audio_center(noise_file)
                noise_audio = normalize_loudness(noise_audio, target_loudness)

                # Add noise to combined noise audio
                noise_audio_combined += noise_audio

                # Keep track of noise files and classes
                noise_files.append(noise_row['slice_file_name'])
                noise_classes_str.append(noise_class)

            # Create the final mixture
            noise_audio_combined = normalize_loudness(noise_audio_combined, target_loudness)
            mixture_audio = create_mixture(percussion_audio, noise_audio_combined, k)

            # Save mixture audio
            mix_file_name = f"mixture_{perc_idx}_noise_{'_'.join(noise_classes)}_k_{k:.2f}.wav"
            mix_file_path = os.path.join(output_dir, mix_file_name)
            sf.write(mix_file_path, mixture_audio, sample_rate)

            # Append metadata
            metadata.append({
                'percussion_file': perc_file,
                'mix_file': mix_file_name,
                'noise_files': ','.join(noise_files),
                'noise_classes': ','.join(noise_classes_str),
                'k': k
            })

    # Save metadata to CSV
    metadata_df = pd.DataFrame(metadata)
    metadata_df.to_csv(os.path.join(output_dir, "metadata.csv"), index=False)
    print(f"Dataset and metadata saved in '{output_dir}'")

# Load noise metadata
metadata_noise_path = os.path.join(DATASET_NOISE_PATH, "UrbanSound8k.csv")
metadata_noise = pd.read_csv(metadata_noise_path)

# Change classID 8 to 6
metadata_noise['classID'] = metadata_noise['classID'].replace(8, 6)
# Sort by class ID and fold and reset index
metadata_noise = metadata_noise.sort_values(
    by=['classID', 'fold']).reset_index(drop=True)

# # Get unique noise classes and their IDs
# noise_class = metadata_noise['class'].unique()
# noise_classID = metadata_noise['classID'].unique()

# # Create a DataFrame for noise classes
# noise_data = {
#     'class': noise_class,
#     'classID': noise_classID,
#     'count': [len(metadata_noise[metadata_noise['class'] == c]) for c in noise_class],
# }
# df = pd.DataFrame(noise_data)

# Create dataset with mixtures and save metadata
output_dir = DATASET_MIX_AUDIO_PATH
create_dataset(metadata_noise, output_dir, num_mixes=7358)

In [1]:
import numpy
import os
import pandas as pd
import torch
import pyloudnorm as pyln
import random
import soundfile as sf
from tqdm import tqdm
import librosa
from config import DATASET_PERCUSSION_PATH, DATASET_NOISE_PATH, DATASET_MIX_AUDIO_PATH

# # Load metadata
metadata_path = os.path.join(output_dir, "metadata.csv")
metadata = pd.read_csv(metadata_path)

NameError: name 'output_dir' is not defined

In [None]:
metadata.keys()

Index(['percussion_file', 'mix_file', 'noise_files', 'noise_classes', 'k'], dtype='object')

In [None]:
print(f"The number of unique noise classes: {metadata['noise_classes'].nunique()}")
print(f"The count of each k value: {metadata['k'].value_counts()}")
print(f"The number of unique percussion files: {metadata['percussion_file'].nunique()}")
print(f"The number of unique mix files: {metadata['mix_file'].nunique()}")
print(f"The number of unique noise files: {metadata['noise_files'].nunique()}")
print(f"The count of each noise file: {metadata['noise_files'].value_counts()}")
print(metadata.groupby("noise_files").size().unique())

The number of unique noise classes: 64
The count of each k value: k
0.9    1499
0.5    1496
0.6    1460
0.8    1457
0.7    1441
Name: count, dtype: int64
The number of unique percussion files: 387
The number of unique mix files: 6879
The number of unique noise files: 6546
The count of each noise file: noise_files
162541-1-1-0.wav                    6
65472-1-0-0.wav                     5
161010-1-2-0.wav                    5
57584-4-0-5.wav                     4
196070-2-0-1.wav                    4
                                   ..
155311-3-0-0.wav                    1
94636-8-0-15.wav                    1
159745-8-1-5.wav                    1
22601-8-0-33.wav,99812-1-0-0.wav    1
105029-7-2-7.wav                    1
Name: count, Length: 6546, dtype: int64
[2 1 3 4 5 6]


In [None]:
print(metadata.groupby("noise_files").size().describe())

count    6546.000000
mean        1.123281
std         0.404239
min         1.000000
25%         1.000000
50%         1.000000
75%         1.000000
max         6.000000
dtype: float64
