In [205]:
import numpy as np
import pandas as pd
import sklearn as sk
import librosa
import glob
import os
from sklearn import svm
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn import neighbors
from sklearn import decomposition
from sklearn.externals import joblib
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error, accuracy_score, confusion_matrix, classification_report
from sklearn.metrics import precision_recall_fscore_support
#import Preprocessing

In [197]:
def preprocess_traindata():
    music_data = []
    #reading all files recursively
    files = glob.glob('Datasets\TrainingData\*\*.wav')
    np.random.shuffle(files)
    
    for filename in files:
        #Preprocess the music file to get features from it
        music, sr = librosa.load(filename)
        mfccs = librosa.feature.mfcc(y=music, sr=sr)
        mean_mfccs = np.mean(mfccs, axis = 1)
        feature = mean_mfccs.reshape(20)
        if '[cel]' in filename[25:38]:
            instrument_code = 1
        elif '[flu]' in filename[25:38]:
            instrument_code = 2
        elif '[gac]' in filename[25:38]:
            instrument_code = 3
        elif '[gel]' in filename[25:38]:
            instrument_code = 4
        elif '[org]' in filename[25:38]:
            instrument_code = 5
        elif '[pia]' in filename[25:38]:
            instrument_code = 6
        elif '[sax]' in filename[25:38]:
            instrument_code = 7
        elif '[tru]' in filename[25:38]:
            instrument_code = 8
        elif '[vio]' in filename[25:38]:
            instrument_code = 9
        elif '[cla]' in filename[25:38]:
            instrument_code = 10
        elif '[voi]' in filename[25:38]:
            instrument_code = 11
        else:
            instrument_code = 0
            print('Unknown instrument found in the file', filename)
        #Append the filename, feature and instrument list of each file into a list
        #filelist.append(filename)
        #featurelist.append(feature)
        #inst_code.append(instrument)
        #Copy the list into a dataframe
        #df=pd.DataFrame()    
        #df['filename']= filelist
        #df['feature']= featurelist
        #df['instrumentcode']= inst_code
        file_data = [filename, feature, instrument_code]
        music_data.append(file_data)

    return music_data
    

In [162]:
def preprocess_testdata():
    music_data = []

    #reading all files recursively
    files = glob.glob('Datasets\TestingData-Part1\Part1\*.wav')
    np.random.shuffle(files)

    for filename in files:
        #Preprocess the music file to get features from it
        music, sr = librosa.load(filename)
        mfccs = librosa.feature.mfcc(y=music, sr=sr)
        mean_mfccs = np.mean(mfccs, axis = 1)
        feature = mean_mfccs.reshape(20)
        # Open the corresponding text file containing list of instruments.
        instrument_file = os.path.splitext(filename)[0]+'.txt'
        #CHECK IF THE FILE IS TAKEN FROM THE RIGHT PATH
        f = open(instrument_file, 'r')
        instrument = []
        for word in f:
            if 'cel' in word:
                instrument_code = 1
            elif 'cla' in word:
                instrument_code = 2
            elif 'flu' in word:
                instrument_code = 3
            elif 'gac' in word:
                instrument_code = 4
            elif 'gel' in word:
                instrument_code = 5
            elif 'org' in word:
                instrument_code = 6
            elif 'pia' in word:
                instrument_code = 7
            elif 'sax' in word:
                instrument_code = 8
            elif 'tru' in word:
                instrument_code = 9
            elif 'vio' in word:
                instrument_code = 10
            elif 'voi' in word:
                instrument_code = 11
            else:
                instrument_code = 0
            instrument.append(instrument_code)
        #Append the filename, feature and instrument list of each file into a list
        #filelist.append(filename)
        #featurelist.append(feature)
        #inst_code.append(instrument)
        #Copy the list into a dataframe
        #df=pd.DataFrame()    
        #df['filename']= filelist
        #df['feature']= featurelist
        #df['instrumentcode']= inst_code
        file_data = [filename, feature, instrument]
        music_data.append(file_data)

        #return df
    return music_data
    

In [163]:
def unpack_data(data):
    filename = np.array(map(lambda n: n[0], data))
    feature = np.array(map(lambda n: n[1], data))
    instrument = np.array(map(lambda n: n[2], data))
    return filename, feature, instrument

In [189]:
def train(data):
    #Copy the parameters into a dataframe
    #df = pd.DataFrame()
    #df['feature'] = features
    #df['instrumentcode'] = instrument_code
    #Copying the paramters into an array
    #features = np.array(features)
    #instrument_code = np.array(instrument_code)
    filename, features, instrument_code = unpack_data(data)
    
    #PCA
    #pca = decomposition.PCA(n_components=10)
    #features = pca.fit_transform(features)
    
    #SVM
    svc = svm.LinearSVC()
    svc.fit(features, instrument_code)
    
    #KNN
    #knn = neighbors.KNeighborsClassifier(n_neighbors=12)
    #knn.fit(features, instrument_code)

    return svc

In [165]:
def predict(model, data):
    filename, features, instrument_code = unpack_data(data)
    
    #PCA
    #features = pca.transform(features)
    
    #SVM
    prediction = model.predict(features)
    
    #mlb = MultiLabelBinarizer()
    #instrument_code = mlb.fit_transform(instrument_code)/mlb.transform(instrument_code)
    
    #KNN
    #prediction = model.predict(features)

    #Evaluate the performance
    Evaluate_accuracy(prediction, instrument_code)
    return 

In [204]:
def Evaluate_accuracy(pred, true_value):
    #Evaluate the accuracy of the model

    print("Accuracy score is", accuracy_score(true_value.astype(int), pred.astype(int))*100)
    print("Mean squared error", mean_squared_error(true_value, pred))
    rmse = np.sqrt(mean_squared_error(true_value, pred))
    print("Root Mean Squared Error: {}".format(rmse))
    print("Mean absolute error:", mean_absolute_error(true_value,pred))
    print("Classification Report: ",classification_report(true_value, pred))
    print('confusion matrix:', confusion_matrix(true_value, pred))
    print "Micro stats:"
    print precision_recall_fscore_support(true_value, pred, average='micro')
    print "Macro stats:"
    print precision_recall_fscore_support(true_value, pred, average='macro')
    return

In [198]:
def saveModel(model):
    #joblib.dump(model, 'newclassifier.model')
    #joblib.dump(pca, 'pcaclassifier.model')
    #joblib.dump(model, 'svmclassifier.model')
    #joblib.dump(knn, 'knnclassifier.model')
    return

In [199]:
def loadModel():
    #model = joblib.load('newclassifier.model')
    #pca = joblib.load('pcaclassifier.model')
    svc = joblib.load('svmclassifier.model')
    #knn = joblib.load('knnclassifier.model')
    return svc

In [200]:
def main():
    #train_data = preprocess_traindata()

    #model= train(train_data)
    #saveModel(model)

    model = loadModel()
    #test_data = preprocess_testdata()
    test_data = preprocess_traindata()
    predict(model, test_data)


In [201]:
if __name__ == '__main__':
    main()

('Number of filessssssssssssss trained', 1001)
('Accuracy score is', 14.685314685314685)
('Mean squared error', 31.002997002997002)
Root Mean Squared Error: 5.56803349514
('Mean absolute error:', 4.615384615384615)


When using KNN:
('Accuracy score is', 62.4750499001996)
('Mean squared error', 7.3732534930139719)
Root Mean Squared Error: 2.71537354576
('Mean absolute error:', 1.4131736526946108)
When using SVM:
('Number of filessssssssssssss trained', 1001)
('Accuracy score is', 14.685314685314685)
('Mean squared error', 31.002997002997002)
Root Mean Squared Error: 5.56803349514
('Mean absolute error:', 4.615384615384615)