In [60]:
import librosa
import librosa.display
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import pickle
import time
from tqdm import tqdm_notebook

##########Function version of avery's work. meant to return features once called in BuildDataset. ################
def feature_extract(file):
    y, sr = librosa.load(file, sr=None)
    
    hop_length = 512
    
    # Separate harmonics and percussives into two waveforms
    y_harmonic, y_percussive = librosa.effects.hpss(y)    
    
    #Chroma Energy Normalized (CENS)
    chroma_cens = librosa.feature.chroma_cens(y=y, sr=sr)
    
    #Mel Spectrogram
    mel_spec = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128, 
                                                 fmax = 8000)
    #Mel-Frequency Cepstral Coefficients (MFCC) features from the raw signal
    mfcc = librosa.feature.mfcc(y=y, sr=sr, hop_length=hop_length, n_mfcc=13)
    
    #Spectral Contrast
    spec_contrast = librosa.feature.spectral_contrast(y=y, sr=sr)
    
    y_harmonic = np.mean(y_harmonic)
    y_percussive = np.mean(y_percussive)
    mel_spec = np.mean(mel_spec, axis=1)
    mfcc = np.mean(mfcc, axis =1)
    chroma_cens = np.mean(chroma_cens, axis=1)
    spec_contrast = np.mean(spec_contrast, axis=1)
    
    return [y_harmonic, y_percussive, chroma_cens, mfcc, mel_spec, 
            spec_contrast]

def getDataset(dataGroup, instrument, source):
    
    new_dir='Dataset/nsynth-'+dataGroup+'/audio/'     #set the audio directory (test, train, etc)
    dataframe_raw = pd.read_json(path_or_buf='Dataset/nsynth-'+dataGroup+'/examples.json', orient='index') #read all instruments from examples.json
    dataframe_specific = dataframe_raw.loc[dataframe_raw['instrument_family_str'] == instrument]           #narrow down by family (strings, etc)
    dataframe_specific = dataframe_specific.loc[dataframe_specific['instrument_source_str'] == source]     #narrow down by source (acoustic, etc)

    filenames = dataframe_specific.index.tolist()     #get filenames from our dataframe, put into list
    
    dictionary = {}
    for file in tqdm_notebook(filenames):             #for all files in filenames. Also,  tqdm is a loading bar
        features = feature_extract((new_dir+file+'.wav')) #specify directory, file, then add .wav. we will perform feature_extract with the file
        dictionary[file] = features                       #make dictionary using file as rows - features as columns
    featureDf = pd.DataFrame.from_dict(dictionary, orient='index', #turn into dataframe
                                       columns=['y_harmonic', 'y_percussive', 'chroma_cens', 
                                                'mfcc', 'mel_spec', 'spec_contrast'])
    
    return featureDf #returns dataframe of features

def instrument_code(filename):
    """
    Function that takes in a filename and returns instrument based on naming convention
    """
    class_names=['bass', 'brass', 'flute', 'guitar', 
             'keyboard', 'mallet', 'organ', 'reed', 
             'string', 'synth_lead', 'vocal']
    
    for name in class_names:
        if name in filename:
            return class_names.index(name)
    else:
        return None
    
def equalize_data(class1, class2):
    class1_copy = class1
    class2_copy = class2  
    if (class1_copy['y_harmonic'].count() < class2_copy['y_harmonic'].count()):
        while (class1_copy['y_harmonic'].count()<class2_copy['y_harmonic'].count()): 
            temp = [class1_copy, class1]
            class2_copy = pd.concat(temp)
        class2_copy = class2_copy[:class1_copy['y_harmonic'].count()]
    else:
        while (class2_copy['y_harmonic'].count()<class1_copy['y_harmonic'].count()):
            temp = [class2_copy, class2]
            class2_copy = pd.concat(temp)
        class2_copy = class2_copy[:class1_copy['y_harmonic'].count()]
    return (class1_copy, class2_copy)

print('functions declared')


functions declared


In [31]:
CLASS_NAMES=['bass', 'brass', 'flute', 'guitar', 
             'keyboard', 'mallet', 'organ', 'reed', 
             'string', 'synth_lead', 'vocal']

SOURCE_NAMES=['acoustic', 'electronic', 'synthetic']
DATA_GROUPS=['test', 'valid', 'train']

dataGroup = DATA_GROUPS[0]    #SET IF YOU WANT TEST, TRAIN, OR VALID (IF YOU HAVE IT)
source = SOURCE_NAMES[0]      #SET ACOUSTIC, ELECTRONIC, SYNTHETIC

# get string members from dataset
string_df = getDataset(dataGroup, 'string', source)

# get keyboard members
keyboard_df = getDataset(dataGroup, 'keyboard', source)

HBox(children=(IntProgress(value=0, max=306), HTML(value='')))




HBox(children=(IntProgress(value=0, max=119), HTML(value='')))




In [73]:
# add targets to dataframes
string_targets_test = []
keyboard_targets_test = []

for name in string_df.index.tolist():
    string_targets_test.append(instrument_code(name))
    
string_df['targets'] = string_targets_test

for name in keyboard_df.index.tolist():
    keyboard_targets_test.append(instrument_code(name))

keyboard_df['targets'] = keyboard_targets_test

# balance the dataset
(string_df, keyboard_df) = equalize_data(string_df, keyboard_df)
input_df = pd.concat([string_df, keyboard_df])

# randomize dataset
input_df = input_df.sample(frac=1).reset_index(drop=True)
input_df.head()

612


Unnamed: 0,y_harmonic,y_percussive,chroma_cens,mfcc,mel_spec,spec_contrast,targets
0,6.664098e-06,1.1e-05,"[0.1096170072796327, 0.07846977792513349, 0.07...","[-460.9752, 54.22047, 32.74235, 20.803627, 17....","[2.5420077, 4.8765593, 0.78300476, 4.179643, 3...","[22.113388275851623, 28.377618085149557, 29.83...",4
1,-2.448392e-07,-1e-06,"[0.10964852135780465, 0.10612814421720879, 0.0...","[-536.8973, 17.354328, 3.1125765, 2.1465187, -...","[4.213301e-05, 2.5070683e-05, 2.987804e-05, 0....","[25.347767265064782, 27.805910531505422, 25.99...",8
2,2.478584e-06,6e-06,"[0.2487313639043975, 0.1642686204498068, 0.0, ...","[-392.53815, 174.15317, 46.716755, 15.358407, ...","[0.3893757, 2.4550638, 38.302204, 86.18423, 57...","[31.963598967546925, 26.012978061844446, 26.48...",4
3,-7.241782e-07,-2e-05,"[0.23649112390870805, 0.39909175735867236, 0.3...","[-602.82965, 57.561806, 28.508324, 29.559114, ...","[0.32004213, 0.8644902, 0.24710113, 1.1290387,...","[21.05348124833135, 16.901026272116198, 17.290...",4
4,1.531883e-06,-8e-06,"[0.21195912316354568, 0.09669011132710346, 0.0...","[-291.25998, 161.35541, 36.310596, 12.888318, ...","[4.3110814, 22.95175, 771.4433, 2010.0955, 46....","[29.064563852414867, 22.879308547722488, 22.24...",8
