In [3]:
import librosa
import pandas as pd
from tqdm import tqdm_notebook

In [4]:
#from sklearn.preprocessing import LabelEncoder

"""
Name: feature_extract
Input: String name of file to analyze
Returns: 
Array of:
    y_harmonic
    y_percussive 
    chroma_cens 
    mfcc
    mel_spec
    spec_contrast
Note: 
    MFCC and Chroma_cens are arrays of 12
    Mel_spec and spec_contrast are also arrays of 1xN.
"""
def feature_extract(file):
    y, sr = librosa.load(file, sr=None)
    
    hop_length = 512
    
    # Separate harmonics and percussives into two waveforms
    y_harmonic, y_percussive = librosa.effects.hpss(y)    
    
    #Chroma Energy Normalized (CENS)
    chroma_cens = librosa.feature.chroma_cens(y=y, sr=sr)
    
    #Mel Spectrogram
    mel_spec = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128, 
                                                 fmax = 8000)
    #Mel-Frequency Cepstral Coefficients (MFCC) features from the raw signal
    mfcc = librosa.feature.mfcc(y=y, sr=sr, hop_length=hop_length, n_mfcc=13)
    
    #Spectral Contrast
    spec_contrast = librosa.feature.spectral_contrast(y=y, sr=sr)
    
    y_harmonic = np.mean(y_harmonic)
    y_percussive = np.mean(y_percussive)
    mel_spec = np.mean(mel_spec, axis=1)
    mfcc = np.mean(mfcc, axis=1)
    chroma_cens = np.mean(chroma_cens,axis=1)
    spec_contrast = np.mean(spec_contrast, axis=1)
    
    return [y_harmonic, y_percussive, chroma_cens, mfcc, mel_spec, 
            spec_contrast]

"""
Name: get_dataset
Input: 
    data_group: Train, valid, test set name
    source: Acoustic, electronic, synthetic
    class1: First instrument to classify
    class2: second instrument to classify
Returns: 
Array of:
    y_harmonic
    y_percussive 
    mel_spec
    mfcc_0-12
    chroma_0-12
    spec_contrast
"""
def get_dataset(data_group, source, class1, class2):#, instrument, source):
    
    new_dir='Dataset/nsynth-'+data_group+'/audio/'     #set the audio directory (test, train, etc)
    dataframe_raw = pd.read_json(path_or_buf='Dataset/nsynth-'+data_group+'/examples.json', orient='index') #read all instruments from examples.json
    dataframe_specific = dataframe_raw.loc[(dataframe_raw['instrument_family_str'] == class2) | (dataframe_raw['instrument_family_str'] == class1)]           #narrow down by family (strings, etc)
    dataframe_specific = dataframe_specific.loc[dataframe_specific['instrument_source_str'] == source]     #narrow down by source (acoustic, etc)

   
    Y_target_class = dataframe_specific.instrument_family_str.replace(to_replace=[class2, class1], value=[0, 1])
    filenames = dataframe_specific.index.tolist()     #get filenames from our dataframe, put into list
    
    dictionary = {}
    #Create the dictionary of files.
    #Note: TQDM is a loading bar
    for file in tqdm_notebook(filenames):           
        features = feature_extract((new_dir+file+'.wav'))
        dictionary[file] = features
    
    feature_dataframe = pd.DataFrame.from_dict(dictionary, orient='index',
                                       columns=['y_harmonic', 'y_percussive', 'chroma_cens', 
                                                'mfcc', 'mel_spec', 'spec_contrast'])
    
    #Take averages of each coefficient etc and create their own feature
    mel_spec_data = pd.DataFrame(feature_dataframe.mel_spec.values.tolist(), 
                                 index=feature_dataframe.index)
    mel_spec_data = mel_spec_data.add_prefix('Mel_Spec_')
    
    mfcc_data = pd.DataFrame(feature_dataframe.mfcc.values.tolist(), 
                             index=feature_dataframe.index)
    mfcc_data = mfcc_data.add_prefix('MFCC_')
    
    chroma_data = pd.DataFrame(feature_dataframe.chroma_cens.values.tolist(), 
                               index=feature_dataframe.index)
    chroma_data = chroma_data.add_prefix('Chroma_')
    
    spec_contrast_data = pd.DataFrame(feature_dataframe.spec_contrast.values.tolist(), 
                                      index=feature_dataframe.index)
    spec_contrast_data = spec_contrast_data.add_prefix('Spec_Contrast_')
    
    #Drop the old feature columns
    feature_dataframe = feature_dataframe.drop(
        labels=['mel_spec', 'mfcc',
                'chroma_cens', 'spec_contrast'],
                                       axis=1)
    #Add the extracted features
    feature_dataframe = pd.concat([feature_dataframe, mel_spec_data, mfcc_data, 
                           chroma_data, spec_contrast_data],
                         axis = 1, join='inner')
    
    qualities =  pd.DataFrame(dataframe_specific.qualities.values.tolist(), 
                         index = dataframe_specific.index)
    qualities = qualities.add_prefix('NSynth_Quality_')
    dataframe_specific = dataframe_specific.drop(labels=['instrument', 'instrument_family',
                                                          'instrument_family_str', 'instrument_source',
                                                          'instrument_source_str', 'instrument_str',
                                                          'note', 'note_str', 'pitch',
                                                          'qualities_str', 'sample_rate',
                                                         'qualities'], axis=1)
    dataframe_specific.drop(dataframe_specific.columns[0], axis=1, inplace=True)
    feature_final = pd.concat([dataframe_specific, feature_dataframe, qualities], axis=1, sort=False)
    feature_final['target'] = Y_target_class
    feature_final.to_csv('./'+data_group+'.csv')
    
    #returns dataframe of features
    return feature_final 

### Build Dataset 
Builds the dataset extracting all features to be used for prediction.
Extracts the features into csv files for train, test and valid NSynth data folders.

In [None]:
# get string members from dataset
# get string members from dataset
get_dataset('train', 'acoustic', 'keyboard','string')
get_dataset('valid', 'acoustic', 'keyboard','string')
get_dataset('test', 'acoustic', 'keyboard','string')


Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`


HBox(children=(FloatProgress(value=0.0, max=27458.0), HTML(value='')))