This Notebook contains code to do the following - 
    1. Copy audio files with specific parameters from the good-sounds database into a temporary folder.
    2. Split the above audio files into train and test

In [1]:
import sqlite3
import os
import IPython.display as ipd
import shutil
import glob
import sys
import numpy as np
import matplotlib.pyplot as pyp
import essentia.standard as ess

In [2]:
# Put the directory to the downloaded dataset here(the directory should point to the sqlite file which contains the table)
dir_gs_dataset = '/home/krishna/Desktop/IITB/DDP/Datasets/good-sounds/database.sqlite'

tables = sqlite3.connect(dir_gs_dataset)
#tables.row_factory = sqlite3.Row
print("Opened database successfully");

Opened database successfully


In [3]:
cursor_sounds = tables.cursor()
cursor_sounds.execute('SELECT * from Sounds')
table_sounds = cursor_sounds.fetchall()
main_sid_list = []
for i in table_sounds:
    main_sid_list.append(i[0])

Select the desired properties of the audio from the good-sounds dataset, which are -   
1. Instrument - flute, cello, clarinet, trumpet, violin, sax_alto, sax_tenor, sax_baritone, sax_soprano, oboe, piccolo, bass
2. Notes - 'C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B'
3. Octave - 3,4,5,6

In [4]:
instrument = ['violin']
notes = ['C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B']
# notes = ['C']
octave = [4]
fs = 48000
# The below line only selects the `good' sounds i.e. the good recordings
rmKlass = ('bad-dynamics-tremolo', 'bad-pitch-vibrato', 'bad-timbre-errors', 'bad-richness-bridge', 'bad-richness-sultasto', 'bad-pitch', 'bad-timbre', 'bad-richness')

In [5]:
oct_dict = {k:{} for k in octave}
for k in octave:
    oct_dict[k]['mlist'] = []
    oct_dict[k]['notes_list'] = []
    oct_dict[k]['uid'] = []
    oct_dict[k]['list_rec_params'] = []


    sounds_dir = '/home/krishna/Desktop/IITB/DDP/Datasets/good-sounds/'
    cou = 0

    # Initializing lists to extract further information from the sql tables
    lst_file_it = []
    lst_sid_it = []

    for instru in instrument:
        for n in notes:
            cursor = tables.cursor()
            c1 = tables.cursor()
            cursor.execute("SELECT t.filename FROM takes t LEFT JOIN sounds s ON t.sound_id = s.id WHERE s.instrument = '{}' AND s.note = '{}' AND s.octave = {} AND s.klass NOT IN {}".format(instru, n, k, rmKlass))
            c1.execute("SELECT t.sound_id FROM takes t LEFT JOIN sounds s ON t.sound_id = s.id WHERE s.instrument = '{}' AND s.note = '{}' AND s.octave = {} AND s.klass NOT IN {}".format(instru, n, k, rmKlass))
            files = cursor.fetchall()
            lst_file_it.append(files)
            fid = c1.fetchall()
            lst_sid_it.append(fid)
            cou = cou + 1
            for it,f in enumerate(files):
    #             Remove Scales from the audio
    #             To select only scales, negate the below if
                if(f[0].find('scales') == -1):
    #                 print(it)
    #                 print(main_sid_list.index(lst_sid_it[it][0][0]))
                    temp = table_sounds[main_sid_list.index(fid[0][0])]
                    temp_params = [temp[2],temp[3],temp[4],temp[15],temp[16],temp[17],temp[18],temp[19],temp[-2]]
                    oct_dict[k]['list_rec_params'].append(temp_params)
                    oct_dict[k]['notes_list'].append(n)
                    oct_dict[k]['uid'].append(it + 1)
                    oct_dict[k]['mlist'].append(sounds_dir + f[0])

In [6]:
# Select octave to process
o = 4
mlist = oct_dict[o]['mlist']
notes_list = oct_dict[o]['notes_list']
uid = oct_dict[o]['uid']
list_rec_params = oct_dict[o]['list_rec_params']

In [7]:
# Copy over the files from the directory to this for analysis
dir_audio = './sounds_folder/'
try: 
    os.makedirs(dir_audio, exist_ok = True) 
    print("Directory '%s' created successfully" %dir_audio) 
except OSError as error: 
    print("Directory '%s' exists") 

for i,f in enumerate(mlist):
#     print(f.split('/')[-1])
    shutil.copy(f,dir_audio)
    os.rename(dir_audio + f.split('/')[-1], dir_audio + notes_list[i] + '_' + str(uid[i]) + '.wav')

Directory './sounds_folder/' created successfully


In [8]:
# Split the files randomly into train and test
# Train Test Split
list_afiles = glob.glob('./sounds_folder/*.wav')
train_dir = './sounds_folder/Train/'
test_dir = './sounds_folder/Test/'

try: 
    os.makedirs(train_dir, exist_ok = True) 
    print("Directory '%s' created successfully" %train_dir) 
except OSError as error: 
    print("Directory '%s' exists") 
    
try: 
    os.makedirs(test_dir, exist_ok = True) 
    print("Directory '%s' created successfully" %test_dir)
except OSError as error: 
    print("Directory '%s' exists") 


dict_train = {}
dict_train['C'] = []
dict_train['C#'] = []
dict_train['D'] = []
dict_train['D#'] = []
dict_train['E'] = []
dict_train['F'] = []
dict_train['F#'] = []
dict_train['G'] = []
dict_train['G#'] = []
dict_train['A'] = []
dict_train['A#'] = []
dict_train['B'] = []

dict_test = {}
dict_test['C'] = []
dict_test['C#'] = []
dict_test['D'] = []
dict_test['D#'] = []
dict_test['E'] = []
dict_test['F'] = []
dict_test['F#'] = []
dict_test['G'] = []
dict_test['G#'] = []
dict_test['A'] = []
dict_test['A#'] = []
dict_test['B'] = []

Directory './sounds_folder/Train/' created successfully
Directory './sounds_folder/Test/' created successfully


In [9]:
np.random.seed(1)
# Fraction of train-test split
choice_split = 0.8

for f in list_afiles:
    k = f.split('/')[-1].split('_')[0]
    p = np.random.rand()
    if(p  < choice_split):
        dict_train[k].append(f)
        shutil.move(f,train_dir)
    else:
        dict_test[k].append(f)
        shutil.move(f,test_dir)