In [None]:
# !pip install librosa

In [9]:
import os
import json

import numpy as np
import librosa

from audio import wav_to_mag_phase, mag_phase_to_wav
import dataset

## Load Datasets

In [10]:
librispeech_fmt = ('waveform', 'sample_rate', 'utterance', 'speaker_id', 'chapter_id', 'utterance_id')

data_root = 'data'
data_dir = 'LibriSpeech'
subset_name = 'dev-clean'

srate = 16000

ds = dataset.load_librispeech_subset(subset_name, data_root, data_dir)
N = len(ds)
print('Dataset Size: {}'.format(N))


# Load Room Noise Data
data_dir = 'RIRS_NOISES/real_rirs_isotropic_noises'
fname = 'RVB2014_type1_noise_largeroom1_10.wav'
room, sr = librosa.load(os.path.join(data_root, data_dir, fname))

Dataset Size: 2703


## Create Dataset

In [11]:
# Dataset Options

g_babble_range = np.linspace(0.05, 0.2, 5)
g_room_range = np.linspace(0.5, 2, 4)
n_samples_babble_range = np.arange(10, 71, 10)
niters = 2

data_dir = 'data/noised_synth_babble'
os.makedirs(data_dir, exist_ok=True)

rng = np.random.default_rng()

In [12]:
N = len(ds)
N = 200
for idx in range(N):
    
    sample = ds[idx][0].numpy()[0]
    
    # split samples
    n_splits, rem = sample.shape[0]//srate, sample.shape[0]%srate
    if (rem == 0):
        sample_splits = np.array_split(sample, n_splits)
    else:
        sample_splits = np.array_split(sample[:-rem], n_splits) +  [np.pad(sample[-rem:], (0, srate - rem))]
    
    # compute sample stfts
    sample_mags, sample_phases = [], []
    for sample_split in sample_splits:
        sample_mag, sample_phase = wav_to_mag_phase(sample_split, n_fft=512, hop_len=64, win_len=512)
        sample_mag, sample_phase=  np.pad(sample_mag, ((0, 0), (0, 2))), np.pad(sample_phase, ((0, 0), (0, 2)))
        sample_mags.append(sample_mag)
        sample_phases.append(sample_phase)
    sample_mags, sample_phases = np.asarray(sample_mags), np.asarray(sample_phases)
    
    for niter in range(niters):
        # loop over all babble gains
        for g_babble in g_babble_range:
            # create synthetic babble and add to sample
            n_babble_samples = rng.choice(n_samples_babble_range)
            synth_babble = dataset.create_synthetic_babble(
                ds, max_len=sample.shape[0], 
                n_samples=n_babble_samples, flip_prob=0.4, srate=srate)

            noisy_sample, noise_b = dataset.add_noise_source(sample, synth_babble, g=g_babble)
            # randomly choose a gain for room noise and add to noised sample
            g_room = rng.choice(g_room_range)
            noisy_sample, noise_r = dataset.add_noise_source(noisy_sample, room, g=g_room)
            noise = noise_b*g_babble + noise_r*g_room
            meta = {
                'libri_sample_idx': idx,
                'n_babble_samples': int(n_babble_samples),
                'babble_gain': float(g_babble),
                'room_gain': float(g_room),
                'flip_prob': 0.4,
                'srate': int(srate),
                'filename': f'noised_s{idx}_n{n_babble_samples}_gb{g_babble}_gr{g_room}_{niter}'
            }
            
            # split noises
            if (rem == 0):
                noise_splits = np.array_split(noise, n_splits)
                noised_splits = np.array_split(noisy_sample, n_splits)
            else:
                noise_splits = np.array_split(noise[:-rem], n_splits) +  [np.pad(noise[-rem:], (0, srate - rem))]
                noised_splits = np.array_split(noisy_sample[:-rem], n_splits) +  [np.pad(noisy_sample[-rem:], (0, srate - rem))]

            # compute noise stfts
            noise_mags, noise_phases = [], []
            for noise_split in noise_splits:
                noise_mag, noise_phase = wav_to_mag_phase(noise_split, n_fft=1024)
                noise_mags.append(noise_mag)
                noise_phases.append(noise_phase)
            noise_mags, noise_phases = np.asarray(noise_mags), np.asarray(noise_phases)
            
            noised_mags, noised_phases = [], []
            for noised_split in noised_splits:
                noised_mag, noised_phase = wav_to_mag_phase(noised_split, n_fft=512, hop_len=64, win_len=512)
                noised_mags.append(noised_mag)
                noised_phases.append(noised_phase)
            noised_mags, noised_phases = np.asarray(noised_mags), np.asarray(noised_phases)
            
            
            # save data
            dirpath = os.path.join(data_dir, meta['filename'])
            os.makedirs(dirpath, exist_ok=True)
            np.save(os.path.join(dirpath, 'noise.npy'), noise)
            np.save(os.path.join(dirpath, 'noised.npy'), noisy_sample)
            with open(os.path.join(dirpath, 'meta.json'), 'w') as fobj:
                json.dump(meta, fobj)
                
            np.save(os.path.join(dirpath, 'sample_stft_mags.npy'), sample_mags)
            np.save(os.path.join(dirpath, 'sample_stft_phases.npy'), sample_phases)
            
            np.save(os.path.join(dirpath, 'noise_stft_mags.npy'), noise_mags)
            np.save(os.path.join(dirpath, 'noise_stft_phases.npy'), noise_phases)
            
            np.save(os.path.join(dirpath, 'noised_stft_mags.npy'), noised_mags)
            np.save(os.path.join(dirpath, 'noised_stft_phases.npy'), noised_phases)
            
    print('Progress: {}/{}'.format(idx+1, N), end='\r')

Progress: 200/200