In [1]:
import librosa
import librosa.display
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import pickle
import time
from tqdm import tqdm_notebook

#from FeatureExtraction import feature_extract    
#import FeatureExtraction

In [2]:
CLASS_NAMES=['bass', 'brass', 'flute', 'guitar', 
             'keyboard', 'mallet', 'organ', 'reed', 
             'string', 'synth_lead', 'vocal']

SOURCE_NAMES=['acoustic', 'electronic', 'synthetic']
DATA_GROUPS=['test', 'valid', 'train']

In [33]:


dataGroup = DATA_GROUPS[0]    #SET IF YOU WANT TEST, TRAIN, OR VALID (IF YOU HAVE IT)
instrumentTarget = CLASS_NAMES[8]   #SET THE INSTRUMENT YOU WANT
source = SOURCE_NAMES[0]      #SET ACOUSTIC, ELECTRONIC, SYNTHETIC
instrumentOther = CLASS_NAMES[4]



dataFrame = getDataset(dataGroup, source, instrumentTarget, instrumentOther)   #GETS DATAFRAME WITH FEATURES EXTRACTED BY AVERY
print(dataFrame)


HBox(children=(IntProgress(value=0, max=425), HTML(value='')))


                               instrument  instrument_family  \
keyboard_acoustic_004-022-050         327                  4   
keyboard_acoustic_004-025-100         327                  4   
keyboard_acoustic_004-026-050         327                  4   
keyboard_acoustic_004-026-100         327                  4   
keyboard_acoustic_004-027-050         327                  4   
...                                   ...                ...   
string_acoustic_080-049-127           675                  8   
string_acoustic_080-050-075           675                  8   
string_acoustic_080-051-127           675                  8   
string_acoustic_080-053-050           675                  8   
string_acoustic_080-054-050           675                  8   

                              instrument_family_str  instrument_source  \
keyboard_acoustic_004-022-050              keyboard                  0   
keyboard_acoustic_004-025-100              keyboard                  0   
keyboard

[425 rows x 32 columns]


In [32]:
def getDataset(dataGroup, source, target, other):#, instrument, source):
    
    new_dir='Dataset/nsynth-'+dataGroup+'/audio/'     #set the audio directory (test, train, etc)
    dataframe_raw = pd.read_json(path_or_buf='Dataset/nsynth-'+dataGroup+'/examples.json', orient='index') #read all instruments from examples.json
    dataframe_specific = dataframe_raw.loc[(dataframe_raw['instrument_family_str'] == target) | (dataframe_raw['instrument_family_str'] == other)]           #narrow down by family (strings, etc)
    dataframe_specific = dataframe_specific.loc[dataframe_specific['instrument_source_str'] == source]     #narrow down by source (acoustic, etc)

   
    Y = dataframe_specific.instrument_family_str.replace(to_replace=[other, target], value=[0, 1])
    filenames = dataframe_specific.index.tolist()     #get filenames from our dataframe, put into list
    
    dictionary = {}
    for file in tqdm_notebook(filenames):             #for all files in filenames. Also,  tqdm is a loading bar
       # print(new_dir)
       # print(file)
       # print('.wav')
       # print(new_dir+file+'.wav')
        features = feature_extract((new_dir+file+'.wav')) #specify directory, file, then add .wav. we will perform feature_extract with the file
        dictionary[file] = features                       #make dictionary using file as rows - features as columns
    featureDf = pd.DataFrame.from_dict(dictionary, orient='index', #turn into dataframe
                                       columns=['y_harmonic', 'y_percussive', 'chroma_stft', 'chroma_cq', 'chroma_cens', 
                                                'mel_spec', 'mfcc', 'mfcc_delta', 'rms', 'spec_centroid', 'spec_bandwidth', 
                                                'spec_contrast', 'spec_flatness', 'spec_rolloff', 'tonnetz', 'log_power', 'pitches', 
                                                'magnitudes'])
    featureFinal = pd.concat([dataframe_specific, featureDf], axis=1, sort=False)

    featureFinal['targetValue'] = Y
    return featureFinal #returns dataframe of features

In [17]:
#old feature_extract taken from github guy. used for testing only. Use avery's feature_extract instead. 
"""def feature_extract(file):
    
    Define function that takes in a file an returns features in an array
    
    
    #get wave representation
    y, sr = librosa.load(file)
        
    #determine if instruemnt is harmonic or percussive by comparing means
    y_harmonic, y_percussive = librosa.effects.hpss(y)
    if np.mean(y_harmonic)>np.mean(y_percussive):
        harmonic=1
    else:
        harmonic=0
        
    #Mel-frequency cepstral coefficients (MFCCs)
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
    #temporal averaging
    mfcc=np.mean(mfcc,axis=1)
    
    #get the mel-scaled spectrogram
    spectrogram = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128,fmax=8000)  
    #temporally average spectrogram
    spectrogram = np.mean(spectrogram, axis = 1)
    
    #compute chroma energy
    chroma = librosa.feature.chroma_cens(y=y, sr=sr)
    #temporally average chroma
    chroma = np.mean(chroma, axis = 1)
    
    #compute spectral contrast
    contrast = librosa.feature.spectral_contrast(y=y, sr=sr)
    contrast = np.mean(contrast, axis= 1)
    
    return [harmonic, mfcc, spectrogram, chroma, contrast]"""

'def feature_extract(file):\n    \n    Define function that takes in a file an returns features in an array\n    \n    \n    #get wave representation\n    y, sr = librosa.load(file)\n        \n    #determine if instruemnt is harmonic or percussive by comparing means\n    y_harmonic, y_percussive = librosa.effects.hpss(y)\n    if np.mean(y_harmonic)>np.mean(y_percussive):\n        harmonic=1\n    else:\n        harmonic=0\n        \n    #Mel-frequency cepstral coefficients (MFCCs)\n    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)\n    #temporal averaging\n    mfcc=np.mean(mfcc,axis=1)\n    \n    #get the mel-scaled spectrogram\n    spectrogram = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128,fmax=8000)  \n    #temporally average spectrogram\n    spectrogram = np.mean(spectrogram, axis = 1)\n    \n    #compute chroma energy\n    chroma = librosa.feature.chroma_cens(y=y, sr=sr)\n    #temporally average chroma\n    chroma = np.mean(chroma, axis = 1)\n    \n    #compute spectra

In [4]:
def feature_extract(file):
    y, sr = librosa.load(file, sr=None)
    
    hop_length = 512
    
    # Separate harmonics and percussives into two waveforms
    y_harmonic, y_percussive = librosa.effects.hpss(y)
    S = np.abs(librosa.stft(y))

    #Mel Spectrogram
    mel_spec = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128, 
                                                 fmax = 8000)
    #Mel-Frequency Cepstral Coefficients (MFCC) features from the raw signal
    mfcc = librosa.feature.mfcc(y=y, sr=sr, hop_length=hop_length, n_mfcc=13)
    #First-order differences (delta features)
    mfcc_delta = librosa.feature.delta(mfcc)

    #Root Mean Square:
    rms = librosa.feature.rms(S=S)

    #Spectrogram
    spec = librosa.amplitude_to_db(S, ref=np.max)

    #Chromagraph analysis
    #STFT chroma features from the harmonic signal
    chroma_stft = librosa.feature.chroma_stft(y=y,
                                        sr=sr)
    #Constant Q chroma features from the harmonic signal
    chroma_cq = librosa.feature.chroma_cqt(y=y,
                                        sr=sr)
    #Chroma Energy Normalized (CENS)
    chroma_cens = librosa.feature.chroma_cens(y=y, sr=sr)

    #Spectral Analysis:
    #Compute the centroid 
    spec_centroid = librosa.feature.spectral_centroid(y=y, sr=sr)
    #Spectral Bandwidth
    spec_bandwidth = librosa.feature.spectral_bandwidth(y=y, sr=sr)
    #Spectral Contrast
    spec_contrast = librosa.feature.spectral_contrast(y=y, sr=sr)
    #Spectral Flatness
    spec_flatness = librosa.feature.spectral_flatness(y=y)
    #Spectral rolloff
    spec_rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)

    #Poly Features
    p0 = librosa.feature.poly_features(S=S, order=0)
    p1 = librosa.feature.poly_features(S=S, order=1)
    p2 = librosa.feature.poly_features(S=S, order=2)


    #Tonal Centroid Features tonnetz
    tonnetz = librosa.feature.tonnetz(y=y, sr=sr)

    #Zero Crossing Rate
    zero_crossing_rate = librosa.feature.zero_crossing_rate(y=y)

    #Log Power
    log_power = librosa.amplitude_to_db(S, ref=np.max)

    #Pitch
    pitches, magnitudes = librosa.piptrack(y=y, sr=sr)

    #Spectral Flux
    onset_env = librosa.onset.onset_strength(y, sr=sr)
    times = librosa.times_like(onset_env, sr=sr)
    onset_frames = librosa.onset.onset_detect(onset_envelope=onset_env
                                          , sr=sr)
    
    return [y_harmonic, y_percussive, chroma_stft, chroma_cq, chroma_cens, 
            mel_spec, mfcc, mfcc_delta, rms, spec_centroid, spec_bandwidth, 
            spec_contrast, spec_flatness, spec_rolloff, tonnetz, log_power, pitches, 
            magnitudes]