In [None]:
from google.colab import drive
drive.mount('/content/drive',force_remount=True)

Mounted at /content/drive


In [None]:
import pandas as pd
import numpy as np
import os
import random
import sys
import glob 
import librosa
import librosa.display
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.utils.multiclass import unique_labels
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier, VotingClassifier

import lightgbm as lgb
import xgboost as xgb
from tqdm import tqdm

import warnings
warnings.filterwarnings('ignore')

In [None]:
def extract_feature(file_name):
    X, sample_rate = librosa.load(file_name)
    stft=np.abs(librosa.stft(X))
    result=np.array([])
    mfccs=np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T,axis=0)
    result=np.hstack((result, mfccs))
    chroma=np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0)
    result=np.hstack((result, chroma))
    mel=np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T,axis=0)
    result=np.hstack((result, mel))
    return result

In [None]:
emotions={
  '01':'neutral',
  '02':'calm',
  '03':'happy',
  '04':'sad',
  '05':'angry',
  '06':'fearful',
  '07':'disgust',
  '08':'surprised'
}

def gender(g):
    if int(g[0:2]) % 2 == 0:
        return 'female'
    else:
        return 'male'

In [None]:
def load_data(test_size=0.2):
    x,y=[],[]
    for file in tqdm(glob.glob("drive/MyDrive/Audio_Speech_Actors_01-24/Actor_*/*.wav")):
        file_name=os.path.basename(file)
        if(file_name.split("-")[2] in emotions):
          emotion=emotions[file_name.split("-")[2]] + '_' + gender(file_name.split("-")[-1])
          feature=extract_feature(file)
          x.append(feature)
          y.append(emotion)
    return train_test_split(np.array(x), y, test_size=test_size, random_state=9)

In [None]:
X_train, X_test, y_train, y_test = load_data()

100%|██████████| 1440/1440 [13:45<00:00,  1.74it/s]


In [None]:
print((X_train.shape[0], X_test.shape[0]))
print(f'Features extracted: {X_train.shape[1]}')

(691, 173)
Features extracted: 194


In [None]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
import joblib
xt = 'X.train'
joblib.dump(X_train, xt, compress=9)

['X.train']

In [None]:
'''mlp_params = {'activation': 'relu', 
              'solver': 'lbfgs', 
              'hidden_layer_sizes': 1194, 
              'alpha': 0.8432377345669054, 
              'batch_size': 173, 
              'learning_rate': 'constant',
              'max_iter':1000}'''
lgb_params = {'num_leaves': 5, 
              'max_depth': 58, 
              'n_estimators': 14734, 
              'subsample_for_bin': 491645, 
              'min_data_in_leaf': 27, 
              'reg_alpha': 1.744123586157066, 
              'colsample_bytree': 0.6495503686746514, 
              'learning_rate': 0.8581745963346554, 
              'boosting_type': 'dart'}
mlp2_params = {'activation': 'relu', 
              'solver': 'lbfgs', 
              'hidden_layer_sizes': 1283, 
              'alpha': 0.3849485717707319, 
              'batch_size': 163, 
              'learning_rate': 'constant',
              'max_iter':1000}


In [None]:
v_params = {'estimators':[('mlp2', MLPClassifier(**mlp2_params)), 
                          ('lgb', lgb.LGBMClassifier(**lgb_params))], 
            'voting':'soft'}
models = {}
models['v'] = VotingClassifier(**v_params)
#models['mlp'] = MLPClassifier(**mlp_params)
model_abrv = {'v':'Voting Classifier: MLP2, LGB'}#,'mlp':'MLP'}




In [None]:
def print_confusion_matrix(confusion_matrix, class_names, figsize = (10,7), fontsize=14, model='clf', save=True):
    df_cm = pd.DataFrame(
        confusion_matrix, index=class_names, columns=class_names, 
    )
    fig, ax = plt.subplots(1, 1, figsize=figsize)
    try:
        heatmap = sns.heatmap(df_cm, annot=True, ax=ax, fmt="d", cmap=plt.cm.Oranges)
    except ValueError:
        raise ValueError("Confusion matrix values must be integers.")
        
    heatmap.yaxis.set_ticklabels(heatmap.yaxis.get_ticklabels(), rotation=0, ha='right', fontsize=fontsize)
    heatmap.xaxis.set_ticklabels(heatmap.xaxis.get_ticklabels(), rotation=45, ha='right', fontsize=fontsize)
    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    plt.tight_layout()
    b, t = plt.ylim() 
    b += 0.5 
    t -= 0.5 
    plt.ylim(b, t) 
    if save == True:
        plt.savefig('confusion_matrix.jpg')
    plt.show()

In [None]:
def model(clf, X_train=X_train, X_test=X_test, y_train=y_train, y_test=y_test, models=models, save=False, print_stat=True, inc_train=False, cv=False):
    clf_model = models[clf]
    clf_model.fit(X_train, y_train)
    y_pred = clf_model.predict(X_test)
    if print_stat == True:
        clf_report = pd.DataFrame(classification_report(y_test,y_pred, output_dict=True)).T
        clf_report.to_csv('tuned_' + model_abrv[clf] + '_classification_report.csv')
        print(model_abrv[clf])
        print('\nTest Stats\n', classification_report(y_test,y_pred))
        print_confusion_matrix(confusion_matrix(y_test, y_pred), unique_labels(y_test, y_pred), model=clf)
        if inc_train == True:
            print(model_abrv[clf])
            print('\nTrain Stats\n', classification_report(y_train,clf_model.predict(X_train)))
            print_confusion_matrix(confusion_matrix(y_train, clf_model.predict(X_train)), unique_labels(y_test, y_pred), model=clf)
    if cv == True:
        print(model_abrv[clf] + ' CV Accuracy:',  
              np.mean(cross_val_score(clf_model, X_train, y_train, cv=5, scoring='accuracy')))
    if save == True:
        return clf_model

In [None]:
for key in models.keys():
    fmodel=model(key,save=True,print_stat=False,cv=True)

Voting Classifier: MLP2, LGB CV Accuracy: 0.7077885517672818


In [None]:
import joblib
model_name = 'Emotion_Voice_Detection_Model.h5'
save_dir = '/content'
fmodel_path = os.path.join(save_dir, model_name)
joblib.dump(fmodel, fmodel_path, compress=9)

['/content/Emotion_Voice_Detection_Model.h5']