# Build augmented dataset
This code generates the augmented part of the Five Guitar dataset.

In [1]:
import essentia.standard as ess
import numpy as np
from scipy.signal import convolve
import glob, os, sys

from scipy.signal import fftconvolve

In [2]:
import sox
from audiomentations import Compose, AddGaussianNoise, TimeStretch, PitchShift, Shift, AddImpulseResponse

In [3]:
ir_paths = {'MIT_Reverbs':'../Databases/IR/MIT/',
          'MeldaProduction_Microphones':'../Databases/IR/MeldaProduction_Microphones/',
          'MeldaProduction_Rooms':'../Databases/IR/MeldaProduction_Rooms/',
        'Folds_MeldaProduction_Rooms':'../Databases/IR/MeldaProduction_Rooms/',
          'MeldaProduction_Box':'../Databases/IR/MeldaProduction_Box/'}
audio_path = '../Databases/Raw_database/'

## Load Raw dataset audios
Load DI recordings and then apply data augmentation.

In [4]:
raw_audios = glob.glob(audio_path + '*_DI.wav')

## Load Impulse Responses
This code generates the following augmented datasets:
* MeldaProduction_Box
* MeldaProduction_Microphones
* MeldaProduction_Rooms
* Folds_MeldaProduction_Rooms
* MIT_Reverbs

In **database_name** variable, put the name of the dataset you want to generate.

In [5]:
database_name_list = list(ir_paths.keys())

In [6]:
for database_name in database_name_list:
    ir_path = ir_paths[database_name]
    store_path = '../Databases/Augmented_database/' + database_name + '/'

    types = ('*.wav', '*.flac') 
    ir_files = []
    for files in types:
        ir_files.extend(glob.glob(ir_path + files))
    # Rename impulse responses
    if database_name == 'MIT_Reverbs':#for the naming of the output files
        ir_names = [r.split('_')[0].replace(ir_path,'') for r in ir_files]
    else:
        ir_names = []
        for r in ir_files:
            r_norm = r.split('/')[-1].replace('.wav','')
            r_norm = r_norm.replace('.flac','')
            r_norm = r_norm.replace(' ','_')
            ir_names.append(r_norm)

    print('You loaded {} IR files from "{}" database. '.format(len(ir_files),database_name))

    # Convole impulse responses with DI recordings and store the results in the appropriate augmented set.

    filter_in = ['Small_04','Small_05','Small_tiled_room','h008','Living_Room','Medium_02','Medium_05',
                'h027','Large_05','Large_tiled_room','Large_wooden_room','h094',

                'Laptop_Speaker','Mobile_Phone_01','Mobile_Phone_03','Radio_01','Radio_02','Voice_Recorder_01',

                'AKG_C3000_25cm','Sennheiser_e609_Direct','Shure_Beta52_Direct','TakStar_Direct_A']

    if not os.path.exists(store_path):
        os.mkdir(store_path)
        print("Directory " , store_path ,  " Created ")
    else:    
        print("Directory " , store_path ,  " already exists")

    for aud in raw_audios:
        print('Processing {}'.format(aud))
        audio = ess.MonoLoader(filename=aud, sampleRate=44100)()
        len_ref = len(audio)

        for ir,rev in zip(ir_files,ir_names):
            if (rev in filter_in) and (database_name !='Folds_MeldaProduction_Rooms'):
                print('-> 1.{}...'.format(rev))
                ir_samples = ess.MonoLoader(filename=ir, sampleRate= 44100)()

                output_file_name = store_path + aud.split('/')[-1].replace('.wav','_') +rev + '.wav'

                audio_augmented = fftconvolve(audio, ir_samples, mode = 'full')    
                ess.MonoWriter(filename=output_file_name, format='wav')(audio_augmented)

            if database_name =='Folds_MeldaProduction_Rooms':
                print('-> {}...'.format(rev))
                ir_samples = ess.MonoLoader(filename=ir, sampleRate= 44100)()

                output_file_name = store_path + aud.split('/')[-1].replace('.wav','_') +rev + '.wav'

                audio_augmented = fftconvolve(audio, ir_samples, mode = 'full')    
                ess.MonoWriter(filename=output_file_name, format='wav')(audio_augmented)


You loaded 270 IR files from "MIT_Reverbs" database. 
Directory  ../Databases/Augmented_database/MIT_Reverbs/  Created 
Processing ../Databases/Raw_database/Lily_Telecaster_108_DI.wav
-> 1.h094...
-> 1.h027...
-> 1.h008...
Processing ../Databases/Raw_database/Century_Larrivee_130_DI.wav
-> 1.h094...
-> 1.h027...
-> 1.h008...
Processing ../Databases/Raw_database/Lily_Larrivee_108_DI.wav
-> 1.h094...
-> 1.h027...
-> 1.h008...
Processing ../Databases/Raw_database/Mountain_Eastman_104_DI.wav
-> 1.h094...
-> 1.h027...
-> 1.h008...
Processing ../Databases/Raw_database/Lily_Ibanez_108_DI.wav
-> 1.h094...
-> 1.h027...
-> 1.h008...
Processing ../Databases/Raw_database/Century_Telecaster_130_DI.wav
-> 1.h094...
-> 1.h027...
-> 1.h008...
Processing ../Databases/Raw_database/Where_Larrivee_100_DI.wav
-> 1.h094...
-> 1.h027...
-> 1.h008...
Processing ../Databases/Raw_database/Hole_Larrivee_102_DI.wav
-> 1.h094...
-> 1.h027...
-> 1.h008...
Processing ../Databases/Raw_database/Train_Epiphone_112_DI.w

-> 1.TakStar_Direct_A...
-> 1.AKG_C3000_25cm...
Processing ../Databases/Raw_database/Where_Ibanez_100_DI.wav
-> 1.Shure_Beta52_Direct...
-> 1.Sennheiser_e609_Direct...
-> 1.TakStar_Direct_A...
-> 1.AKG_C3000_25cm...
You loaded 34 IR files from "MeldaProduction_Rooms" database. 
Directory  ../Databases/Augmented_database/MeldaProduction_Rooms/  Created 
Processing ../Databases/Raw_database/Lily_Telecaster_108_DI.wav
-> 1.Large_wooden_room...
-> 1.Medium_02...
-> 1.Large_05...
-> 1.Medium_05...
-> 1.Small_04...
-> 1.Living_Room...
-> 1.Small_05...
-> 1.Large_tiled_room...
-> 1.Small_tiled_room...
Processing ../Databases/Raw_database/Century_Larrivee_130_DI.wav
-> 1.Large_wooden_room...
-> 1.Medium_02...
-> 1.Large_05...
-> 1.Medium_05...
-> 1.Small_04...
-> 1.Living_Room...
-> 1.Small_05...
-> 1.Large_tiled_room...
-> 1.Small_tiled_room...
Processing ../Databases/Raw_database/Lily_Larrivee_108_DI.wav
-> 1.Large_wooden_room...
-> 1.Medium_02...
-> 1.Large_05...
-> 1.Medium_05...
-> 1.Smal

-> Snare_big_room...
-> Medium_03...
-> Medium_01...
-> Small_01...
-> Vocal_room...
-> Gated_drum_room...
-> Stone_Room...
-> Small_03...
-> Large_04...
-> Zep_Room...
-> The_Viper_Pit...
-> Tiled_room...
-> Small_05...
-> Stone_Space...
-> Large_tiled_room...
-> Small_wooden_room...
-> Studio_beta...
-> Small_tiled_room...
Processing ../Databases/Raw_database/Century_Larrivee_130_DI.wav
-> Small_06...
-> Small_02...
-> Large_03...
-> Large_wooden_room...
-> Large_01...
-> Medium_02...
-> Large_05...
-> Studio_alpha...
-> Medium_05...
-> Small_04...
-> Living_Room...
-> Medium_04...
-> Wooden_room...
-> Large_02...
-> Small_wood_room...
-> Dark_drum_room...
-> Snare_big_room...
-> Medium_03...
-> Medium_01...
-> Small_01...
-> Vocal_room...
-> Gated_drum_room...
-> Stone_Room...
-> Small_03...
-> Large_04...
-> Zep_Room...
-> The_Viper_Pit...
-> Tiled_room...
-> Small_05...
-> Stone_Space...
-> Large_tiled_room...
-> Small_wooden_room...
-> Studio_beta...
-> Small_tiled_room...
Proces

-> Large_tiled_room...
-> Small_wooden_room...
-> Studio_beta...
-> Small_tiled_room...
Processing ../Databases/Raw_database/Lily_Epiphone_108_DI.wav
-> Small_06...
-> Small_02...
-> Large_03...
-> Large_wooden_room...
-> Large_01...
-> Medium_02...
-> Large_05...
-> Studio_alpha...
-> Medium_05...
-> Small_04...
-> Living_Room...
-> Medium_04...
-> Wooden_room...
-> Large_02...
-> Small_wood_room...
-> Dark_drum_room...
-> Snare_big_room...
-> Medium_03...
-> Medium_01...
-> Small_01...
-> Vocal_room...
-> Gated_drum_room...
-> Stone_Room...
-> Small_03...
-> Large_04...
-> Zep_Room...
-> The_Viper_Pit...
-> Tiled_room...
-> Small_05...
-> Stone_Space...
-> Large_tiled_room...
-> Small_wooden_room...
-> Studio_beta...
-> Small_tiled_room...
Processing ../Databases/Raw_database/Mountain_Telecaster_104_DI.wav
-> Small_06...
-> Small_02...
-> Large_03...
-> Large_wooden_room...
-> Large_01...
-> Medium_02...
-> Large_05...
-> Studio_alpha...
-> Medium_05...
-> Small_04...
-> Living_Room.

-> Medium_02...
-> Large_05...
-> Studio_alpha...
-> Medium_05...
-> Small_04...
-> Living_Room...
-> Medium_04...
-> Wooden_room...
-> Large_02...
-> Small_wood_room...
-> Dark_drum_room...
-> Snare_big_room...
-> Medium_03...
-> Medium_01...
-> Small_01...
-> Vocal_room...
-> Gated_drum_room...
-> Stone_Room...
-> Small_03...
-> Large_04...
-> Zep_Room...
-> The_Viper_Pit...
-> Tiled_room...
-> Small_05...
-> Stone_Space...
-> Large_tiled_room...
-> Small_wooden_room...
-> Studio_beta...
-> Small_tiled_room...
Processing ../Databases/Raw_database/Train_Telecaster_112_DI.wav
-> Small_06...
-> Small_02...
-> Large_03...
-> Large_wooden_room...
-> Large_01...
-> Medium_02...
-> Large_05...
-> Studio_alpha...
-> Medium_05...
-> Small_04...
-> Living_Room...
-> Medium_04...
-> Wooden_room...
-> Large_02...
-> Small_wood_room...
-> Dark_drum_room...
-> Snare_big_room...
-> Medium_03...
-> Medium_01...
-> Small_01...
-> Vocal_room...
-> Gated_drum_room...
-> Stone_Room...
-> Small_03...
-> 

-> 1.Laptop_Speaker...
-> 1.Radio_02...
-> 1.Mobile_Phone_03...
-> 1.Radio_01...
Processing ../Databases/Raw_database/Train_Telecaster_112_DI.wav
-> 1.Mobile_Phone_01...
-> 1.Voice_Recorder_01...
-> 1.Laptop_Speaker...
-> 1.Radio_02...
-> 1.Mobile_Phone_03...
-> 1.Radio_01...
Processing ../Databases/Raw_database/Where_Epiphone_100_DI.wav
-> 1.Mobile_Phone_01...
-> 1.Voice_Recorder_01...
-> 1.Laptop_Speaker...
-> 1.Radio_02...
-> 1.Mobile_Phone_03...
-> 1.Radio_01...
Processing ../Databases/Raw_database/Century_Ibanez_130_DI.wav
-> 1.Mobile_Phone_01...
-> 1.Voice_Recorder_01...
-> 1.Laptop_Speaker...
-> 1.Radio_02...
-> 1.Mobile_Phone_03...
-> 1.Radio_01...
Processing ../Databases/Raw_database/Where_Ibanez_100_DI.wav
-> 1.Mobile_Phone_01...
-> 1.Voice_Recorder_01...
-> 1.Laptop_Speaker...
-> 1.Radio_02...
-> 1.Mobile_Phone_03...
-> 1.Radio_01...


# APPLY EFFECTS

This code generates the SOX_Effects augmented set.

In [9]:
store_path = '../Databases/Augmented_database/SOX_Effects/'

if not os.path.exists(store_path):
    os.mkdir(store_path)
    print("Directory " , store_path ,  " Created ")
else:    
    print("Directory " , store_path ,  " already exists")
    
    
for aud in raw_audios:
# # Chorus
    tfm = sox.Transformer()
    tfm.set_globals(guard = True)
    tfm.chorus()
    output_file_name = store_path+ aud.split('/')[-1].replace('.wav','_').replace('_DI_','_')
    tfm.build_file(aud, output_file_name +'Chorus'+ '.wav')

# Flanger

    tfm = sox.Transformer()
    tfm.set_globals(guard = True)
    tfm.flanger(delay= 30, depth= 10, regen= 0, width= 80, speed= 0.4, phase = 50)

    tfm.build_file(aud,output_file_name +'Flanger'+ '.wav')

# # Overdrive

    tfm = sox.Transformer()
    tfm.set_globals(guard = False)
    tfm.overdrive(gain_db = 50, colour = 20)

    tfm.build_file(aud,output_file_name +'OverDrive'+ '.wav')

# # Phaser

    tfm = sox.Transformer()
    tfm.set_globals(guard = True)
    tfm.phaser(gain_in  = 0.8, gain_out = 1,delay = 5,speed = 1.2,modulation_shape = 'sinusoidal')

    tfm.build_file(aud,output_file_name +'Phaser'+ '.wav')

Directory  ../Databases/Augmented_database/SOX_Effects/  already exists



# APPLY FILTERS

This code generates the SOX_Filters augmented set.

In [10]:
store_path = '../Databases/Augmented_database/SOX_Filters/'

freq_h = [500, 1000, 3000]
freq_l = [500,250,100]
boost = [-20,-10,10,20]

if not os.path.exists(store_path):
    os.mkdir(store_path)
    print("Directory " , store_path ,  " Created ")
else:    
    print("Directory " , store_path ,  " already exists")
    
    
for audio_path in raw_audios:
    output_file = store_path+ audio_path.split('/')[-1].replace('.wav','_').replace('_DI_','_')
# High pass
    for f in freq_h:
        tfm = sox.Transformer()
        tfm.set_globals(guard = True)
        tfm.highpass(frequency = f)
        
        
        tfm.build_file(audio_path, output_file+'Hpass'+str(f)+ '.wav')


# Low pass
    for f in freq_l:
        tfm = sox.Transformer()
        tfm.set_globals(guard = True)
        tfm.lowpass(frequency = f)

        tfm.build_file(audio_path, output_file +'Lpass'+str(f)+ '.wav')

Directory  ../Databases/Augmented_database/SOX_Filters/  already exists
