In [16]:
import librosa
import soundfile
import os, glob, pickle
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score
import warnings
warnings.filterwarnings('ignore')
import pandas as pd

In [2]:
def extract_feature(file_name, mfcc, chroma, mel):
    X, sample_rate = librosa.load(os.path.join(file_name), res_type='kaiser_fast')
    if chroma:
        stft=np.abs(librosa.stft(X))
    result=np.array([])
    if mfcc:
        mfccs=np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T, axis=0)
        result=np.hstack((result, mfccs))
    if chroma:
        chroma=np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0)
        result=np.hstack((result, chroma))
    if mel:
        mel=np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T,axis=0)
        result=np.hstack((result, mel))
    return result

In [3]:
emotions={
  '00':'blues',
  '01':'classical',
  '02':'country',
  '03':'disco',
  '04':'hiphop',
  '05':'jazz',
  '06':'metal',
  '07':'pop',
    '08':'reggae',
    '09':'rock'
}

#DataFlair - Emotions to observe
observed_emotions=emotions.values()

In [4]:
def load_data(test_size=0.2):
    x,y=[],[]
    for file in glob.glob("C:\\Users\\91967\\Desktop\\Bhavya\\Lohita\\Data\\All*\\*.wav"):
        file_name=os.path.basename(file)
#         print(file_name)
        emotion=file_name.split(".")[0]
#         print(emotion)
        if emotion not in observed_emotions:
            continue
        feature=extract_feature(file, mfcc=True, chroma=True, mel=True)
        x.append(feature)
        y.append(emotion)
    return train_test_split(np.array(x), y, test_size=test_size, train_size= 0.75 ,random_state=9)

In [5]:
x_train,x_test,y_train,y_test=load_data(test_size=0.25)

In [6]:
x_train.shape,x_test.shape

((749, 180), (250, 180))

In [7]:
from sklearn.preprocessing import LabelEncoder
import tensorflow as tf
from tensorflow import keras

In [8]:
model=MLPClassifier(alpha=0.01, batch_size=128, epsilon=1e-08, hidden_layer_sizes=(300,), learning_rate='adaptive', max_iter=6000)

In [9]:
model.fit(x_train,y_train)

MLPClassifier(alpha=0.01, batch_size=128, hidden_layer_sizes=(300,),
              learning_rate='adaptive', max_iter=6000)

In [10]:
y_pred=model.predict(x_test)

In [11]:
accuracy=accuracy_score(y_true=y_test, y_pred=y_pred)

#DataFlair - Print the accuracy
print("Accuracy: {:.2f}%".format(accuracy*100))

Accuracy: 67.20%


In [12]:
pickle. dump(model, open('model.pkl', 'wb'))

In [13]:
loaded_model = pickle. load(open('model.pkl', 'rb'))
result = loaded_model. score(x_test, y_test)

In [14]:
result

0.672

In [21]:
d= pd.DataFrame(list(zip(y_pred,y_test)))

In [22]:
d.columns=['Predicted Genre','Actual Genre']

In [24]:
d.to_csv('Results.csv',index=False)

The better accuracy till now with from all the models - it is 67.24% 

###### Trying out other models

###### KNN

In [68]:
from sklearn import neighbors
clf=neighbors.KNeighborsClassifier()
clf.fit(x_train,y_train)
print("Prediction : ")
y_pred_2 = clf.predict(x_test)

Prediction : 


In [69]:
accuracy=accuracy_score(y_true=y_test, y_pred=y_pred_2)

#DataFlair - Print the accuracy
print("Accuracy: {:.2f}%".format(accuracy*100))

Accuracy: 56.80%


###### RF 

In [70]:
from sklearn.ensemble import RandomForestClassifier
clf = RandomForestClassifier()
clf = clf.fit(x_train,y_train)
y_pred_4 = clf.predict(x_test)

In [71]:
accuracy=accuracy_score(y_true=y_test, y_pred=y_pred_4)

#DataFlair - Print the accuracy
print("Accuracy: {:.2f}%".format(accuracy*100))

Accuracy: 67.60%


In [73]:
from sklearn.preprocessing import StandardScaler
fit=StandardScaler()
x_train=fit.fit_transform(x_train)
x_test=fit.fit_transform(x_test)

In [75]:
x_train

array([[-0.52870233,  1.61842215,  0.67603053, ..., -0.16403393,
        -0.16465611, -0.13098915],
       [-0.1138869 ,  0.32528907,  0.80699364, ..., -0.16401412,
        -0.16465429, -0.13098912],
       [ 0.34867484,  1.10632185, -0.74120213, ..., -0.16136911,
        -0.16193331, -0.12891207],
       ...,
       [ 0.46174126,  0.07722509, -0.69267165, ..., -0.16402928,
        -0.16465465, -0.13098814],
       [ 0.41056891, -0.62808158, -1.50861969, ..., -0.1640046 ,
        -0.16465167, -0.13098698],
       [ 0.82193996, -0.1692424 , -0.21651827, ..., -0.05211038,
        -0.05038866, -0.0425319 ]])

In [76]:
from sklearn.ensemble import RandomForestClassifier
clf = RandomForestClassifier()
clf = clf.fit(x_train,y_train)
y_pred_4 = clf.predict(x_test)

In [77]:
accuracy=accuracy_score(y_true=y_test, y_pred=y_pred_4)

#DataFlair - Print the accuracy
print("Accuracy: {:.2f}%".format(accuracy*100))

Accuracy: 47.60%


In [78]:
from sklearn import neighbors
clf=neighbors.KNeighborsClassifier()
clf.fit(x_train,y_train)
print("Prediction : ")
y_pred_2 = clf.predict(x_test)

Prediction : 


In [79]:
accuracy=accuracy_score(y_true=y_test, y_pred=y_pred_2)

#DataFlair - Print the accuracy
print("Accuracy: {:.2f}%".format(accuracy*100))

Accuracy: 60.80%


In [82]:
from sklearn.linear_model import LogisticRegression
clf=LogisticRegression()
clf.fit(x_train,y_train)
y_pred_log= clf.predict(x_test)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


In [83]:
accuracy=accuracy_score(y_true=y_test, y_pred=y_pred_log)

#DataFlair - Print the accuracy
print("Accuracy: {:.2f}%".format(accuracy*100))

Accuracy: 65.60%
