In [1]:
import pandas as pd
import numpy as np

import sklearn
from sklearn.preprocessing import StandardScaler, normalize 
from sklearn.metrics import classification_report, confusion_matrix

In [4]:
dataset = pd.read_csv("extractedmusicfeaturesetallfeatures.csv")

In [5]:
dataset.shape

(1000, 31)

In [6]:
dataset.head()

Unnamed: 0,Filename,Chromagram,RootMeanSquare,LowEnergyBrightness,SpectralCentroid,Flatness,Bandwidth,InHarmonicity,Rolloff,ZeroCrossingRate,...,mfcc12,mfcc13,mfcc14,mfcc15,mfcc16,mfcc17,mfcc18,mfcc19,mfcc20,label
0,blues.00000.wav,0.308197,0.082482,23.69837,1535.037572,0.001726,1834.035228,0.000684,3175.514131,0.070391,...,6.815101,0.110116,4.418635,-1.796708,-1.78779,0.341001,-2.629384,-0.309009,-0.734714,blues
1,blues.00001.wav,0.301338,0.064592,24.1268,1178.338976,0.000634,1719.624593,0.018852,2425.427,0.044456,...,2.652497,0.832175,2.269111,-4.967726,-1.012166,0.995347,-0.605718,1.565148,-0.685939,blues
2,blues.00002.wav,0.296208,0.121434,25.195057,1303.581375,0.000763,1579.884288,0.022247,2479.285008,0.064793,...,4.676634,-7.930208,-5.197453,-8.683246,0.549078,-7.521339,-4.570396,-3.990504,-3.933193,blues
3,blues.00003.wav,0.378564,0.117686,23.537749,765.650527,0.00024,1289.465222,0.008652,1387.583276,0.024866,...,5.115472,0.440344,-1.861215,0.198612,1.217629,-0.75634,1.820389,-0.057328,-4.009497,blues
4,blues.00004.wav,0.261476,0.060902,25.02219,1539.421609,0.001334,1584.038983,-0.050764,2954.19047,0.085597,...,-6.884608,-6.819392,-12.406286,-10.841472,-8.15515,-5.439511,-1.90889,-7.093715,-17.604401,blues


In [7]:
X = dataset.drop(['Filename','label'],axis=1) #Filename is not required
Y = dataset['label']
X

Unnamed: 0,Chromagram,RootMeanSquare,LowEnergyBrightness,SpectralCentroid,Flatness,Bandwidth,InHarmonicity,Rolloff,ZeroCrossingRate,mfcc1,...,mfcc11,mfcc12,mfcc13,mfcc14,mfcc15,mfcc16,mfcc17,mfcc18,mfcc19,mfcc20
0,0.308197,0.082482,23.698370,1535.037572,0.001726,1834.035228,0.000684,3175.514131,0.070391,-188.219376,...,-3.807535,6.815101,0.110116,4.418635,-1.796708,-1.787790,0.341001,-2.629384,-0.309009,-0.734714
1,0.301338,0.064592,24.126800,1178.338976,0.000634,1719.624593,0.018852,2425.427000,0.044456,-286.179047,...,-0.355949,2.652497,0.832175,2.269111,-4.967726,-1.012166,0.995347,-0.605718,1.565148,-0.685939
2,0.296208,0.121434,25.195057,1303.581375,0.000763,1579.884288,0.022247,2479.285008,0.064793,-165.737717,...,-10.915662,4.676634,-7.930208,-5.197453,-8.683246,0.549078,-7.521339,-4.570396,-3.990504,-3.933193
3,0.378564,0.117686,23.537749,765.650527,0.000240,1289.465222,0.008652,1387.583276,0.024866,-274.498962,...,2.306320,5.115472,0.440344,-1.861215,0.198612,1.217629,-0.756340,1.820389,-0.057328,-4.009497
4,0.261476,0.060902,25.022190,1539.421609,0.001334,1584.038983,-0.050764,2954.190470,0.085597,-236.696884,...,-12.310408,-6.884608,-6.819392,-12.406286,-10.841472,-8.155150,-5.439511,-1.908890,-7.093715,-17.604401
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,0.312355,0.066177,26.828857,1585.924113,0.001471,1878.318531,-0.020311,3388.996174,0.066483,-213.199493,...,-9.252023,10.070771,-10.888953,2.198623,-8.872386,-0.953045,-12.683362,0.744433,-4.840060,-3.064359
996,0.346405,0.060725,26.802236,1685.060612,0.001741,1869.974954,-0.012931,3395.737800,0.080015,-197.364792,...,-10.789764,12.185410,-15.749372,2.540702,-1.597802,0.542841,-10.845517,-0.644788,-7.396799,-5.802427
997,0.366321,0.060540,26.163345,1807.488277,0.002249,1787.003967,0.003787,3561.578464,0.103777,-181.841599,...,-14.268676,6.846561,-16.928728,-3.545232,-11.599463,-2.044957,-12.994104,0.291607,-13.022841,-6.441196
998,0.327976,0.071790,25.974198,1047.122495,0.000313,1548.829422,0.020775,2153.220313,0.036349,-285.583069,...,-5.814487,5.102398,-8.889581,-3.410276,-4.522196,-5.790967,-3.085016,0.166745,-4.406106,-5.594728


In [8]:
# Scaling the data so that all the features become comparable 
scaler = StandardScaler() 
X_scaled = scaler.fit_transform(X) 
  
# Normalizing the data so that the data approximately  
# follows a Gaussian distribution 
X_normalized = normalize(X_scaled) 
  
# Converting the numpy array into a pandas DataFrame 
X_normalized = pd.DataFrame(X_normalized) 
X_normalized

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,19,20,21,22,23,24,25,26,27,28
0,-0.170842,-0.111654,-0.054815,-0.199805,-0.197653,-0.188492,-0.081569,-0.196089,-0.107627,0.121314,...,0.043919,0.195385,0.268942,0.293598,0.113267,-0.175664,0.334990,-0.226033,0.163351,0.122690
1,-0.134257,-0.181251,0.013253,-0.281215,-0.143918,-0.175439,0.361948,-0.258052,-0.289271,-0.225797,...,0.169816,-0.049856,0.197601,0.070783,-0.094712,-0.067139,0.241322,-0.021070,0.212610,0.078416
2,-0.133511,0.150613,0.111878,-0.193383,-0.121062,-0.212199,0.376061,-0.212622,-0.098081,0.123202,...,-0.228580,0.028261,-0.160408,-0.267610,-0.246323,0.013552,-0.180111,-0.216673,-0.101906,-0.089048
3,0.144771,0.115479,-0.039021,-0.351865,-0.115166,-0.301767,0.098972,-0.339554,-0.347036,-0.144051,...,0.212266,0.038640,0.136801,-0.107068,0.131516,0.038937,0.112573,0.091621,0.087801,-0.082082
4,-0.132477,-0.089648,0.048946,-0.053194,-0.056923,-0.107688,-0.534009,-0.070781,0.028192,-0.032426,...,-0.142308,-0.193043,-0.060722,-0.299233,-0.175220,-0.195887,-0.043797,-0.042662,-0.133386,-0.382283
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,-0.067254,-0.131105,0.237328,-0.077079,-0.097235,-0.071457,-0.403136,-0.062708,-0.076514,-0.001402,...,-0.151579,0.194231,-0.242784,0.051407,-0.227598,-0.048982,-0.370414,0.044308,-0.129858,-0.042216
996,0.040828,-0.151212,0.225969,-0.041788,-0.089462,-0.071859,-0.264296,-0.059369,0.012007,0.034403,...,-0.192479,0.250370,-0.390417,0.062328,0.058993,0.011395,-0.284353,-0.017182,-0.237079,-0.153707
997,0.088626,-0.131459,0.150709,-0.001642,-0.070829,-0.089413,0.012784,-0.032045,0.140252,0.060037,...,-0.257883,0.077466,-0.370448,-0.144754,-0.279134,-0.077601,-0.318270,0.019975,-0.418495,-0.155690
998,-0.019281,-0.118700,0.188643,-0.296086,-0.131227,-0.230163,0.353901,-0.268582,-0.313200,-0.195427,...,-0.048776,0.043992,-0.200160,-0.192559,-0.062466,-0.280152,0.021788,0.021034,-0.125445,-0.172127


In [9]:
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(X_normalized, Y, test_size=0.10)

In [10]:
from sklearn.svm import SVC
#Classifying normalised data using SVM
svclassifier = SVC(kernel='linear')
svclassifier.fit(X_train, Y_train)

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
    kernel='linear', max_iter=-1, probability=False, random_state=None,
    shrinking=True, tol=0.001, verbose=False)

In [11]:
y_pred = svclassifier.predict(X_test)

In [12]:
print(confusion_matrix(Y_test,y_pred))
print(classification_report(Y_test,y_pred))

[[ 1  0  1  0  0  3  4  0  0  3]
 [ 0 13  1  0  0  0  0  0  0  0]
 [ 1  0  5  0  0  0  0  1  1  1]
 [ 0  0  0  5  3  1  0  0  1  2]
 [ 0  0  0  1  6  1  0  1  3  1]
 [ 0  2  0  0  0  4  0  0  0  0]
 [ 0  0  0  1  0  0  7  0  1  0]
 [ 0  0  2  0  1  0  0 10  0  0]
 [ 0  0  0  0  2  1  0  0  3  1]
 [ 1  0  0  2  0  1  0  0  0  1]]
              precision    recall  f1-score   support

       blues       0.33      0.08      0.13        12
   classical       0.87      0.93      0.90        14
     country       0.56      0.56      0.56         9
       disco       0.56      0.42      0.48        12
      hiphop       0.50      0.46      0.48        13
        jazz       0.36      0.67      0.47         6
       metal       0.64      0.78      0.70         9
         pop       0.83      0.77      0.80        13
      reggae       0.33      0.43      0.38         7
        rock       0.11      0.20      0.14         5

    accuracy                           0.55       100
   macro avg       

In [13]:
import seaborn as sns
sns.heatmap(confusion_matrix(Y_test, y_pred), annot=True)

<matplotlib.axes._subplots.AxesSubplot at 0x23e9c7df188>