In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
import sklearn


import librosa
import librosa.display
import IPython.display as ipd

In [2]:
import os
general_path = 'D:/datasets_for_final_year/Data'
print(list(os.listdir(f'{general_path}/genres_original/')))

['blues', 'classical', 'country', 'disco', 'hiphop', 'jazz', 'metal', 'pop', 'reggae', 'rock']


In [3]:
y, sr = librosa.load(f'{general_path}/genres_original/blues/0_blues.00000.wav')

In [4]:
y

array([ 0.00732422,  0.01660156,  0.00762939, ..., -0.04373169,
       -0.05706787, -0.04086304], dtype=float32)

In [5]:
def Feature_extraction(filename):
    import librosa
    import numpy as np


    y, sr = librosa.load(filename)
    #fetching tempo

    onset_env = librosa.onset.onset_strength(y, sr)
    tempo = librosa.beat.tempo(onset_envelope=onset_env, sr=sr)

    #fetching beats

    y_harmonic, y_percussive = librosa.effects.hpss(y)
    tempo, beat_frames = librosa.beat.beat_track(y=y_percussive,sr=sr)

    #chroma_stft

    chroma_stft = librosa.feature.chroma_stft(y=y, sr=sr)

    #rmse

    rmse = librosa.feature.rms(y=y)

    #fetching spectral centroid

    spec_centroid = librosa.feature.spectral_centroid(y, sr=sr)[0]

    #spectral bandwidth

    spec_bw = librosa.feature.spectral_bandwidth(y=y, sr=sr)

    #fetching spectral rolloff

    spec_rolloff = librosa.feature.spectral_rolloff(y+0.01, sr=sr)[0]

    #zero crossing rate

    zero_crossing = librosa.feature.zero_crossing_rate(y)

    #mfcc

    mfcc = librosa.feature.mfcc(y=y, sr=sr)

    #metadata dictionary

    metadata_dict = {'chroma_stft_mean':np.mean(chroma_stft),'rms_mean':np.mean(rmse),
                     'spectral_centroid_mean':np.mean(spec_centroid),'spectral_bandwidth_mean':np.mean(spec_bw), 
                     'rolloff_mean':np.mean(spec_rolloff), 'zero_crossing_rate_mean':np.mean(zero_crossing), 'harmony_mean':np.mean(y_harmonic)}

    for i in range(1,21):
        metadata_dict.update({'mfcc'+str(i)+'_mean':np.mean(mfcc[i-1])})
    
    metadata_dict.update({'tempo':tempo})
    return list(metadata_dict.values())

In [10]:
data=pd.read_csv('features_3_sec_.csv')

In [11]:

data

Unnamed: 0,filename,chroma_stft_mean,rms_mean,spectral_centroid_mean,spectral_bandwidth_mean,rolloff_mean,zero_crossing_rate_mean,harmonic_mean,tempo,mfcc1_mean,...,mfcc12_mean,mfcc13_mean,mfcc14_mean,mfcc15_mean,mfcc16_mean,mfcc17_mean,mfcc18_mean,mfcc19_mean,mfcc20_mean,label
0,blues.00000.0.wav,0.335406,0.130405,1773.065032,1972.744388,3714.560359,0.081851,-0.000078,129.199219,-118.627914,...,10.183875,-4.681614,8.417439,-7.233477,-2.853603,-3.241280,0.722209,-5.050335,-0.243027,blues
1,blues.00000.1.wav,0.343065,0.112699,1816.693777,2010.051501,3869.682242,0.087173,-0.000099,123.046875,-125.590706,...,8.145000,-7.717751,8.397150,-8.300493,4.074709,-6.055294,0.159015,-2.837699,5.784063,blues
2,blues.00000.2.wav,0.346815,0.132003,1788.539719,2084.565132,3997.639160,0.071383,-0.000066,123.046875,-132.441940,...,11.853963,-4.677677,6.571110,-2.424750,4.806280,-1.768610,2.378768,-1.938424,2.517375,blues
3,blues.00000.3.wav,0.363639,0.132565,1655.289045,1960.039988,3568.300218,0.069426,-0.000014,123.046875,-118.231087,...,10.389314,-4.362739,9.156193,-9.889441,-1.359111,-3.841155,1.218588,-3.580352,3.630866,blues
4,blues.00000.4.wav,0.335579,0.143289,1630.656199,1948.503884,3469.992864,0.070095,0.000041,123.046875,-105.968376,...,17.045437,-5.681399,5.705521,-7.986080,2.092937,0.664582,1.689446,-3.392489,0.536961,blues
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9985,rock.00099.5.wav,0.349126,0.050019,1499.083005,1718.707215,3015.559458,0.072885,-0.000034,123.046875,-224.972168,...,9.496511,-16.715929,8.380793,-2.225949,5.773784,-9.094270,-4.246976,-5.625813,1.818823,rock
9986,rock.00099.6.wav,0.372564,0.057897,1847.965128,1906.468492,3746.694524,0.089111,-0.000027,83.354335,-192.806641,...,11.510695,-19.505360,2.598546,-8.003813,2.074155,-12.375726,-3.081278,-11.960546,0.428857,rock
9987,rock.00099.7.wav,0.347481,0.052403,1346.157659,1561.859087,2442.362154,0.072194,0.000015,123.046875,-287.840088,...,6.899863,-13.186050,-3.914767,-9.124884,-1.005473,-2.524483,4.809936,1.775686,-0.299545,rock
9988,rock.00099.8.wav,0.387527,0.066430,2084.515327,2018.366254,4313.266226,0.104072,0.000004,123.046875,-162.659592,...,12.855068,-14.699870,4.112486,-5.266814,4.123402,-5.363541,6.462601,2.354765,0.675824,rock


In [12]:
df=data.drop(['filename'], axis = 1)

In [14]:
from xgboost import XGBClassifier
from xgboost import plot_tree, plot_importance

from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn import preprocessing
from sklearn.model_selection import train_test_split


In [15]:
y =df['label'] # dependent feature
X = df.loc[:, df.columns != 'label'] #select all columns but not the labels  --independent fearutures

cols = X.columns
min_max_scaler = preprocessing.MinMaxScaler()
np_scaled = min_max_scaler.fit_transform(X)
X = pd.DataFrame(np_scaled, columns = cols)

In [16]:
y =df ['label'] # dependent feature
X = df.loc[:, df.columns != 'label'] #select all columns but not the labels  --independent

In [17]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [18]:
train_x = X_train.values
test_x = X_test.values
train_y=y_train
test_y=y_test

In [19]:
def model_assess(model, title = "Default"):
    model.fit(X_train, y_train)
    pred_train = model.predict(train_x)
    pred_test=model.predict(test_x)
   
    print(' Prediction Accuracy ', ':', round(accuracy_score(train_y, pred_train), 5), '\n')
    print('Test Accuracy', ':', round(accuracy_score(test_y, pred_test), 5), '\n')

In [21]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC


## Decision Tree 

In [22]:
tree = DecisionTreeClassifier()
model_assess(tree, "Decission trees")

 Prediction Accuracy  : 0.99928 

Test Accuracy : 0.61228 



## Random Forest 

In [23]:
rforest = RandomForestClassifier(n_estimators=1000, max_depth=10, random_state=0)
model_assess(rforest, "Random Forest")

 Prediction Accuracy  : 0.95667 

Test Accuracy : 0.78111 



In [24]:
svm = SVC(decision_function_shape="ovo")
model_assess(svm, "Support Vector Machine")

 Prediction Accuracy  : 0.34678 

Test Accuracy : 0.33867 



In [25]:
knn = KNeighborsClassifier(n_neighbors=19)
model_assess(knn, "KNN")

 Prediction Accuracy  : 0.5025 

Test Accuracy : 0.41975 



In [28]:
xgb1 = XGBClassifier()           
xgb1.fit(train_x,train_y)
pred_train = xgb1.predict(train_x)
pred_test = xgb1.predict(test_x)

print(' Prediction Accuracy ', ':', round(accuracy_score(y_train, pred_train), 5), '\n')
print('Accuracy', ':', round(accuracy_score(test_y, pred_test), 5), '\n')


 Prediction Accuracy  : 0.99928 

Accuracy : 0.87187 



## Hyperparameter Tuning

In [32]:
params={
    'learning_rate':[.05,.1,.15,.2],
    'n_estimators':[100,200,300,400,500,600,700,800,900,1000],
    'max_depth':[3,4,5,6,8,10],
    'gamma':[0.0,.1,.2,.3,.4]
    
}

In [33]:
from sklearn.model_selection import RandomizedSearchCV

In [35]:
random_search=RandomizedSearchCV(xgb1,param_distributions=params,n_iter=5,scoring='accuracy',n_jobs=-1,cv=5,verbose=3)

In [36]:
label_encoder = preprocessing.LabelEncoder()
  


In [37]:
Y= label_encoder.fit_transform(df['label'])

array([0, 0, 0, ..., 9, 9, 9])

In [41]:
random_search.fit(X,Y)

Fitting 5 folds for each of 5 candidates, totalling 25 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 out of  25 | elapsed: 57.1min finished




RandomizedSearchCV(cv=5, error_score=nan,
                   estimator=XGBClassifier(base_score=0.5, booster='gbtree',
                                           colsample_bylevel=1,
                                           colsample_bynode=1,
                                           colsample_bytree=1, gamma=0,
                                           gpu_id=-1, importance_type='gain',
                                           interaction_constraints='',
                                           learning_rate=0.300000012,
                                           max_delta_step=0, max_depth=6,
                                           min_child_weight=1, missing=nan,
                                           monotone_constraints='()',
                                           n_estimators=100...
                                           validate_parameters=1,
                                           verbosity=None),
                   iid='deprecated', n_iter=5, n_jobs=

In [42]:
random_search.best_estimator_

XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=1, gamma=0.0, gpu_id=-1,
              importance_type='gain', interaction_constraints='',
              learning_rate=0.2, max_delta_step=0, max_depth=4,
              min_child_weight=1, missing=nan, monotone_constraints='()',
              n_estimators=500, n_jobs=4, num_parallel_tree=1,
              objective='multi:softprob', random_state=0, reg_alpha=0,
              reg_lambda=1, scale_pos_weight=None, subsample=1,
              tree_method='exact', use_label_encoder=True,
              validate_parameters=1, verbosity=None)

In [43]:
random_search.best_params_

{'n_estimators': 500, 'max_depth': 4, 'learning_rate': 0.2, 'gamma': 0.0}

### Final Model 

In [46]:
xgb2 = XGBClassifier(n_estimators= 500, max_depth= 4, learning_rate= 0.2, gamma= 0.0)           
xgb2.fit(train_x,train_y)
pred_train = xgb2.predict(train_x)
pred_test = xgb2.predict(test_x)

print(' Prediction Accuracy ', ':', round(accuracy_score(y_train, pred_train), 5), '\n')
print('Accuracy', ':', round(accuracy_score(test_y, pred_test), 5), '\n')


 Prediction Accuracy  : 0.99928 

Accuracy : 0.87721 



### Music Genre Prediction 

In [20]:
filename=f'{general_path}/genres_original/blues/0_blues.00000.wav'

In [23]:
def predict_genre(filename):
    a=Feature_extraction(filename)
    d1 =np.array(a)
    data1 = min_max_scaler.transform([d1])
    genre_prediction = xgb.predict(data1)
    print(genre_prediction[0])


In [24]:
predict_genre(filename)

blues


In [25]:
xgb.save_model('model.json')