In [1]:
from librosa.core import load as ld_wav
import librosa.feature as ft_extraction
import scipy.io.wavfile as wav
import numpy as np
import os

In [2]:
# directory where we your .wav files are
directoryName = "/home/felipe/UFRJ/Telecomunicações/IRMAS-TrainingData" # put your own directory here
#instruments to evaluate
instruments = ["pia","vio"]
# directory to put our results in, you can change the name if you like
resultsDirectory = directoryName + "/MFCCresults"


In [3]:
def countTrainTracks(input_path, labels):
		""" Counts the number of tracks in the folders of the trainset
		"""
		total = 0
		for l, label in enumerate(labels):
			instrument_dir = os.path.join(input_path, label)
			total += len(os.listdir(instrument_dir))

		return total

In [4]:
if not os.path.exists(resultsDirectory):
    os.makedirs(resultsDirectory)

In [5]:
total_tracks = countTrainTracks(directoryName,instruments)
print("Total tracks: ",total_tracks)

Total tracks:  1301


In [6]:
data_dict = dict()

data_dict["rolloff"] = np.empty(130*total_tracks)
data_dict["bandwidth"] = np.empty(130*total_tracks)
data_dict["centroids"] = np.empty(130*total_tracks)
data_dict["zero_crossing_rate"] = np.empty(130*total_tracks)
data_dict["rms"] = np.empty(130*total_tracks)
data_dict["mfcc0"] = np.empty(130*total_tracks)
data_dict["mfcc1"] = np.empty(130*total_tracks)
data_dict["mfcc2"] = np.empty(130*total_tracks)
data_dict["mfcc3"] = np.empty(130*total_tracks)
data_dict["mfcc4"] = np.empty(130*total_tracks)
data_dict["mfcc5"] = np.empty(130*total_tracks)
data_dict["mfcc6"] = np.empty(130*total_tracks)
data_dict["mfcc7"] = np.empty(130*total_tracks)
data_dict["mfcc8"] = np.empty(130*total_tracks)
data_dict["mfcc9"] = np.empty(130*total_tracks)
data_dict["mfcc10"] = np.empty(130*total_tracks)
data_dict["mfcc11"] = np.empty(130*total_tracks)
data_dict["mfcc12"] = np.empty(130*total_tracks)
data_dict["mfcc13"] = np.empty(130*total_tracks)
data_dict["mfcc14"] = np.empty(130*total_tracks)
data_dict["mfcc15"] = np.empty(130*total_tracks)
data_dict["mfcc16"] = np.empty(130*total_tracks)
data_dict["mfcc17"] = np.empty(130*total_tracks)
data_dict["mfcc18"] = np.empty(130*total_tracks)
data_dict["mfcc19"] = np.empty(130*total_tracks)
data_dict["instrument"] = np.empty(130*total_tracks,dtype=int)

In [7]:
def getDeltaFeat(column):
    original_len = len(column)
    shifted_right_column = np.r_[np.zeros(2),np.asarray(column)]
    shifted_left_column = np.r_[np.asarray(column),np.zeros(2)]
    deltas = list()
    for i in range(0,original_len):
        deltas.append(shifted_left_column[i + 2] - shifted_right_column[i])
    return np.asarray(deltas)

In [8]:
def includeDeltaFeat(df):
    for i in range(0,20):
        df["delta_mfcc"+str(i)]=getDeltaFeat(df["mfcc"+str(i)])
    return df

In [9]:
def getData(directoryName,instruments,data_dict):
    instrument_index = 0
    files_read = 0
    for instrument in instruments:
        for filename in os.listdir(directoryName+"/"+instrument):
            if filename.endswith('.wav'): # only get MFCCs from .wavs
                # read in our file
                (sig,rate) = ld_wav(directoryName +"/"+instrument+"/" +filename)
                #print("Signal len = ",len(sig),"\nRate = ",rate)
                # get mfcc
                mfcc_feat = ft_extraction.mfcc(y=sig,sr=rate)
                rolloff_feat = ft_extraction.spectral_rolloff(y=sig,sr=rate)
                bandwidth_feat = ft_extraction.spectral_bandwidth(y=sig,sr=rate)
                centroid_feat = ft_extraction.spectral_centroid(y=sig,sr=rate)
                zero_crossing_rate_feat = ft_extraction.zero_crossing_rate(y=sig)
                rms_feat = ft_extraction.rms(y=sig)
                for i in range(0,20):
                    data_dict["mfcc"+str(i)][files_read*130:(files_read+1)*130] = mfcc_feat[i]
                data_dict["rolloff"][files_read*130:(files_read+1)*130] = rolloff_feat[0]
                data_dict["bandwidth"][files_read*130:(files_read+1)*130] = bandwidth_feat[0]
                data_dict["centroids"][files_read*130:(files_read+1)*130] = centroid_feat[0]
                data_dict["zero_crossing_rate"][files_read*130:(files_read+1)*130] = zero_crossing_rate_feat[0]
                data_dict["rms"][files_read*130:(files_read+1)*130] = rms_feat[0]
                data_dict["instrument"][files_read*130:(files_read+1)*130] = [instrument_index]*130
                #rolloff = ft_extraction.spectral_rolloff(y=sig,sr=rate)
                # create a file to save our results in
                files_read += 1
        instrument_index += 1
    #data_dict = includeDeltaFeat(data_dict)
    return data_dict                                                                                           

In [10]:
data_dict = getData(directoryName,instruments,data_dict)

In [11]:
np.count_nonzero(~np.isnan(data_dict["mfcc0"]))

169130

In [12]:
print(1301*130)

169130


In [13]:
import pandas as pd
df = pd.DataFrame(data_dict)
df.tail()

Unnamed: 0,rolloff,bandwidth,centroids,zero_crossing_rate,rms,mfcc0,mfcc1,mfcc2,mfcc3,mfcc4,...,mfcc11,mfcc12,mfcc13,mfcc14,mfcc15,mfcc16,mfcc17,mfcc18,mfcc19,instrument
169125,3929.80957,2038.286482,1726.468614,0.083984,0.062677,-227.990265,113.424232,-9.40886,43.264915,6.509571,...,4.518134,1.368757,12.995491,1.665058,11.115401,9.659019,-5.013555,0.967955,12.440106,1
169126,3789.84375,2019.264734,1730.235788,0.089355,0.066654,-226.093658,115.267258,-5.64981,45.312061,7.88273,...,10.353981,5.884489,8.536335,-0.780042,8.331919,7.718971,-7.243509,-1.161903,14.580173,1
169127,3563.745117,1931.438542,1689.990517,0.084473,0.070886,-226.310822,120.728256,-7.477173,46.441296,2.592531,...,10.906251,9.40777,11.517885,-2.472063,8.018284,4.658303,-5.756626,-3.968502,17.26865,1
169128,3176.147461,1986.839598,1587.260859,0.062012,0.07259,-218.313004,122.024727,0.392308,38.631321,6.205506,...,1.361669,11.4848,13.205139,1.215893,8.228657,9.125845,-7.830932,-4.033907,8.28366,1
169129,3940.576172,2317.915459,1623.505099,0.041016,0.075656,-208.40332,113.934624,14.96928,30.570921,12.068377,...,-1.431349,7.468777,10.532847,5.981077,6.776804,9.659662,-4.804807,0.990948,4.017467,1


In [14]:
input_variables = list(df.columns)
input_variables.remove("instrument")
output_variable = "instrument"
print(input_variables)

['rolloff', 'bandwidth', 'centroids', 'zero_crossing_rate', 'rms', 'mfcc0', 'mfcc1', 'mfcc2', 'mfcc3', 'mfcc4', 'mfcc5', 'mfcc6', 'mfcc7', 'mfcc8', 'mfcc9', 'mfcc10', 'mfcc11', 'mfcc12', 'mfcc13', 'mfcc14', 'mfcc15', 'mfcc16', 'mfcc17', 'mfcc18', 'mfcc19']


In [15]:
from sklearn import preprocessing
le = preprocessing.LabelEncoder()
le.fit(instruments)
print(le.inverse_transform([0,1]))

['pia' 'vio']


In [16]:
np.asarray(df[input_variables]).shape

(169130, 25)

In [17]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(df[input_variables], df[output_variable], test_size=0.33, random_state=42)
rfc = RandomForestClassifier(n_estimators=100, random_state=0)
rfc.fit(X_train, y_train)
y_predict = rfc.predict(X_test)

In [18]:
from sklearn import metrics
print(metrics.confusion_matrix(le.inverse_transform(y_test),le.inverse_transform(y_predict)))
print(metrics.classification_report(le.inverse_transform(y_test),le.inverse_transform(y_predict),digits=3))

[[29070  1811]
 [ 2231 22701]]
              precision    recall  f1-score   support

         pia      0.929     0.941     0.935     30881
         vio      0.926     0.911     0.918     24932

   micro avg      0.928     0.928     0.928     55813
   macro avg      0.927     0.926     0.927     55813
weighted avg      0.928     0.928     0.928     55813



In [19]:
fur_elise_data_dict = dict()
fur_elise_data_dict["rolloff"] = list()
fur_elise_data_dict["bandwidth"] = list()
fur_elise_data_dict["centroids"] = list()
fur_elise_data_dict["zero_crossing_rate"] = list()
fur_elise_data_dict["rms"] = list()
fur_elise_data_dict["mfcc0"] = list()
fur_elise_data_dict["mfcc1"] = list()
fur_elise_data_dict["mfcc2"] = list()
fur_elise_data_dict["mfcc3"] = list()
fur_elise_data_dict["mfcc4"] = list()
fur_elise_data_dict["mfcc5"] = list()
fur_elise_data_dict["mfcc6"] = list()
fur_elise_data_dict["mfcc7"] = list()
fur_elise_data_dict["mfcc8"] = list()
fur_elise_data_dict["mfcc9"] = list()
fur_elise_data_dict["mfcc10"] = list()
fur_elise_data_dict["mfcc11"] = list()
fur_elise_data_dict["mfcc12"] = list()
fur_elise_data_dict["mfcc13"] = list()
fur_elise_data_dict["mfcc14"] = list()
fur_elise_data_dict["mfcc15"] = list()
fur_elise_data_dict["mfcc16"] = list()
fur_elise_data_dict["mfcc17"] = list()
fur_elise_data_dict["mfcc18"] = list()
fur_elise_data_dict["mfcc19"] = list()
fur_elise_data_dict["instrument"] = list()
(sig,rate) = ld_wav("/home/felipe/Music/Link to Musicas/Für Elise (Piano version).wav")
mfcc_feat = ft_extraction.mfcc(y=sig,sr=rate)
rolloff_feat = ft_extraction.spectral_rolloff(y=sig,sr=rate)
print(rolloff_feat[0])
bandwidth_feat = ft_extraction.spectral_bandwidth(y=sig,sr=rate)
centroid_feat = ft_extraction.spectral_centroid(y=sig,sr=rate)
zero_crossing_rate_feat = ft_extraction.zero_crossing_rate(y=sig)
rms_feat = ft_extraction.rms(y=sig)
for i in range(0,20):
    fur_elise_data_dict["mfcc"+str(i)].extend(mfcc_feat[i])
fur_elise_data_dict["instrument"] = ["pia"]*len(fur_elise_data_dict["mfcc0"])
fur_elise_data_dict["rolloff"].extend(rolloff_feat[0])
fur_elise_data_dict["bandwidth"].extend(bandwidth_feat[0])
fur_elise_data_dict["centroids"].extend(centroid_feat[0])
fur_elise_data_dict["zero_crossing_rate"].extend(zero_crossing_rate_feat[0])
fur_elise_data_dict["rms"].extend(rms_feat[0])
#fur_elise_data_dict = includeDeltaFeat(fur_elise_data_dict)

[0. 0. 0. ... 0. 0. 0.]


In [20]:
for key in fur_elise_data_dict.keys():
    print(key,len(fur_elise_data_dict[key]))

rolloff 7545
bandwidth 7545
centroids 7545
zero_crossing_rate 7545
rms 7545
mfcc0 7545
mfcc1 7545
mfcc2 7545
mfcc3 7545
mfcc4 7545
mfcc5 7545
mfcc6 7545
mfcc7 7545
mfcc8 7545
mfcc9 7545
mfcc10 7545
mfcc11 7545
mfcc12 7545
mfcc13 7545
mfcc14 7545
mfcc15 7545
mfcc16 7545
mfcc17 7545
mfcc18 7545
mfcc19 7545
instrument 7545


In [21]:
fur_elise_df = pd.DataFrame(fur_elise_data_dict)
y_predict = rfc.predict(fur_elise_df[input_variables])

In [22]:
print(metrics.confusion_matrix(fur_elise_df[output_variable],le.inverse_transform(y_predict)))
print(metrics.classification_report(fur_elise_df[output_variable],le.inverse_transform(y_predict),digits=3))

[[7541    4]
 [   0    0]]
              precision    recall  f1-score   support

         pia      1.000     0.999     1.000      7545
         vio      0.000     0.000     0.000         0

   micro avg      0.999     0.999     0.999      7545
   macro avg      0.500     0.500     0.500      7545
weighted avg      1.000     0.999     1.000      7545



  'recall', 'true', average, warn_for)


In [23]:
y_predict

array([0, 0, 0, ..., 0, 0, 0])

In [24]:
(sig,rate) = ld_wav(directoryName +"/"+'pia'+"/" +'001__[pia][nod][cla]1389__1.wav')
feat = ft_extraction.rms(y=sig)
print(feat.shape)

(1, 130)
