# Notebook used to add noise to other datasets


## Imports

In [57]:
from audiomentations import Compose, AddBackgroundNoise
import pandas as pd
import os
import librosa
from scipy.io.wavfile import write

## Set paths

In [58]:
# Set paths
project_root = os.path.dirname(os.getcwd())
dataset = 'MLS'
dataset_path = os.path.join(project_root, dataset, 'Full')
noise_path = os.path.join(project_root, 'noise', 'free-sound')

# Get list of files
dataset_files = os.listdir(dataset_path)
noise_files = os.listdir(noise_path)

## Create dataframes

In [62]:
dataset_df = pd.DataFrame(dataset_files, columns=['FileName'])
dataset_df = dataset_df[dataset_df['FileName'].str.contains(".wav")]
noise_df = pd.DataFrame(noise_files, columns=['NoiseFile'])
noise_df = noise_df.append(noise_df, ignore_index=True)[:len(dataset_df)]
df = pd.concat([dataset_df, noise_df], ignore_index=True, axis=1)
df = df.rename(columns={0 : 'FileName', 1 : 'NoiseFile'})

## Add noise

In [85]:
sr = 22050

for i, row in df.iterrows():
    augmenter = Compose([AddBackgroundNoise(noise_path)])
    sample, sr = librosa.load(os.path.join(dataset_path, row['FileName']), sr=sr)
    audio_with_noise = augmenter(samples=sample, sample_rate=16000)
    write(os.path.join(project_root, 'MLS_noise', row['FileName']), sr, audio_with_noise)
    print(f"\r{i}/{len(df)}", end='')

870/871