In [1]:
import pandas as pd
import glob
import numpy as np
from sklearn import svm
from sklearn import preprocessing
from sklearn.cross_validation import StratifiedKFold, train_test_split
from sklearn.feature_selection import SelectKBest, mutual_info_classif
from sklearn.grid_search import GridSearchCV
from sklearn import neighbors
from sklearn.pipeline import Pipeline

from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error, accuracy_score
from sklearn.metrics import precision_recall_fscore_support
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.metrics import precision_recall_fscore_support
from sklearn.externals import joblib
import random
import seaborn as sns
import matplotlib.pyplot as plt
from os import sys, path

THRESHOLD = 20
IRMAS = 1
RWC=2
PHILHARMONICA = 3
sns.set()



In [14]:
def feature_preprocessing(dataset):
    if(dataset == IRMAS):
        # import wav files to build features
        files = glob.glob('Datasets\*.csv')
        dfs = []
        for filename in files:
            dfs.append(pd.read_csv(filename))     
        # Concatenate all dataFrames into a single DataFrame
        data = pd.concat(dfs, ignore_index=True)
    if(dataset == RWC):    
        data = pd.read_csv("Datasets\dataset_rwc\essentia_rwc_features.csv")
        data.drop(data.columns[0],axis=1,inplace=True)
    if(dataset == PHILHARMONICA):
        files = glob.glob('Datasets\philharmoni\*.csv')
        dfs = []
        for filename in files:
            dfs.append(pd.read_csv(filename))
        # Concatenate all dataFrames into a single DataFrame
        data = pd.concat(dfs, ignore_index=True)
    print "Done reading the music files"
    
    #Transform the features and target value to fit
    X = data.drop(['class'], axis=1).values
    X = preprocessing.Imputer().fit_transform(X)
    y = data['class']
    le = preprocessing.LabelEncoder()
    y = le.fit_transform(y)

    steps = [("scale", preprocessing.StandardScaler()),
              ('anova_filter', SelectKBest(mutual_info_classif, k=100)),
              ('svm', svm.SVC(decision_function_shape='ovo'))]
    print dataset
    print X.shape
    print y.shape
    model = Pipeline(steps)
    return model, X, y

In [15]:
def saveModel(model, dataset):
    if dataset ==IRMAS:
        joblib.dump(model, 'irmas1.model')
    if dataset == RWC:
        joblib.dump(model, 'rwc1.model')
    if dataset == PHILHARMONICA:
        joblib.dump(model, 'Philharmonica1.model')
    return

In [12]:
def loadModel(dataset):
    if dataset ==IRMAS:
        model = joblib.load('irmas1.model')
    if dataset == RWC:
        model = joblib.load('rwc1.model')
    if dataset == PHILHARMONICA:
        model = joblib.load('Philharmonica1.model')
    return model

In [11]:
def train_test(clf, X_train, y_train, dataset):
    print("Fitting the data")
    clf.fit(X_train, y_train)
    print("Save the model")
    saveModel(clf, dataset)
    print("Testing the model")
    y_pred = clf.predict(X_train)
    print("Quantify the performance")
    Evaluate_accuracy(y_pred, y_train)
    return


In [21]:
def Evaluate_accuracy(pred, true_value):

    print("Accuracy score is ", accuracy_score(true_value, pred)*100)
    rmse = np.sqrt(mean_squared_error(true_value, pred))
    print("Root Mean Squared Error: {}".format(rmse))
    print("Mean absolute error:", mean_absolute_error(true_value,pred))
    print "Micro stats:"
    print precision_recall_fscore_support(true_value, pred, average='micro')
    print "Macro stats:"
    print precision_recall_fscore_support(true_value, pred, average='macro')
    print("Classification Report: ",classification_report(true_value, pred))
    
    cm=confusion_matrix(true_value,pred)
    cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
    sns.heatmap(cm, annot=True, linewidths=.5)
    plt.title('Confusion matrix: Normalized')
    plt.xlabel('Predicted instrument code')
    plt.ylabel('True value')
    plt.show()
    return

In [16]:
if __name__ == "__main__":
    print("Training and testing on IRMAS dataset")
    classifier, X, y = feature_preprocessing(IRMAS)
    train_test(classifier, X, y, IRMAS)
    print("Training and testing on RWC dataset")
    classifier, X, y = feature_preprocessing(RWC)
    train_test(classifier, X, y, RWC)
    print("Training and testing on PHILHARMONICA dataset")
    classifier, X, y = feature_preprocessing(PHILHARMONICA)
    train_test(classifier, X, y, PHILHARMONICA)
    
    #model1 = loadModel(IRMAS)
    #test(model1, X, y)
    
    #model2 = loadModel(RWC)
    #test(model2, X, y)
    
    #model3 = loadModel(PHILHARMONICA)
    #test(model3, X, y)


Training and testing on IRMAS dataset
Done reading the music files
1
(6706L, 372L)
(6706L,)
Fitting the data
Save the model
Testing the model
Quantify the performance


NameError: global name 'Evaluate_accuracy' is not defined

SVM MODEL: newsvc
testing adn training with same data:
('Accuracy score is', 59.000745712155108)
('Mean squared error', 8.0609992542878448)
Root Mean Squared Error: 2.83918989402
('Mean absolute error:', 1.5372110365398957)

KNN: newknn
('Accuracy score is', 29.351230425055931)
('Mean squared error', 14.32393736017897)
Root Mean Squared Error: 3.78469778981
('Mean absolute error:', 2.7221476510067113)


svc MODEL WITH IRMAS AND rws dataset newsvc1:
('Accuracy score is', 64.301159819146847)
('Mean squared error', 18.851091016316101)
Root Mean Squared Error: 4.34178431251
('Mean absolute error:', 1.7246903872616473)

svc model with collected irmas and downloaded irmas pdf. newsvc2:
'Accuracy score is ', 45.569712946690103)
('Mean squared error ', 8.7254686584651431)
Root Mean Squared Error: 2.95389042763
('Mean absolute error:', 1.7147773872290568)

with rwc dataset alone: rwcsvc
('Accuracy score is ', 83.962829736211035)
('Mean squared error ', 15.04046762589928)
Root Mean Squared Error: 3.87820417538
('Mean absolute error:', 0.967326139088729)


DATASET: Philharmonica ('Accuracy score is ', 97.343311506080454)
Root Mean Squared Error: 0.600436386363
('Mean absolute error:', 0.070907390084190836)
Micro stats:
(0.97343311506080454, 0.97343311506080454, 0.97343311506080454, None)
Macro stats:
(0.94799850484804005, 0.93801257128445081, 0.93641155563669853, None)
