In [None]:
import pandas as pd
import numpy as np
import os
from google.colab import drive 

import matplotlib.pyplot as plt
import seaborn as sns

import librosa
import librosa.display
# to play the audio files
from IPython.display import Audio

from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.model_selection import train_test_split

import tensorflow as tf
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split

import keras
from keras.callbacks import ReduceLROnPlateau
from keras.models import Sequential
from keras.layers import Dense, Conv1D, MaxPooling1D, Flatten, Dropout, BatchNormalization
from keras.utils import np_utils, to_categorical
from keras.callbacks import ModelCheckpoint

from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import LabelEncoder


In [None]:
drive.mount('/content/drive')
data_path = '/content/drive/My Drive/empathic_data/data/audio-files/'

In [None]:
filepath = []
file_name = []
total = [] 
for filename in os.listdir(data_path):
    if filename.endswith(".wav"): 
      file_name.append(filename)
      filepath.append(data_path)

In [None]:
merged_path = [''.join(x) for x in zip(filepath, file_name)]

In [None]:
dictionary = dict(zip(file_name, merged_path))

In [None]:
df = pd.read_csv('/content/merged_data.csv')
df['path'] = df['file_name'].map(dictionary)
df['Emotion'] = df['Emotion'].str.capitalize()
df = df[df['Emotion'] != 'Pleasure']
df['Emotion'].replace('Pain', 'Sad', inplace=True)
df['Emotion'].replace('Achievement', 'Happy', inplace=True)
df.head()

In [None]:
print("number of files is {}".format(len(df)))

In [None]:
df.Emotion.replace('Ps','Surprise',inplace=True)

In [None]:
df.Emotion.unique()

In [None]:
fig = plt.figure()
plt.figure(figsize=(8, 4))
sns.countplot(x="Emotion", data=df)
plt.show();

In [None]:
def extract_feature(file_name, mfcc, chroma, mel):
    X, sample_rate = librosa.load(file_name, res_type='kaiser_fast')
    if chroma:
        stft=np.abs(librosa.stft(X))
    result=np.array([])
    if mfcc:
        mfccs=np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T, axis=0)
        result=np.hstack((result, mfccs))
    if chroma:
        chroma=np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0)
        result=np.hstack((result, chroma))
    if mel:
        mel=np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T,axis=0)
        result=np.hstack((result, mel))
    return result

In [None]:
#@title Input_feature
def features_extractor(file,inputfeature):
  if inputfeature == 'mfcc':
    audio, sample_rate = librosa.load(file_name, res_type='kaiser_fast') 
    mfccs_features = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40)
    return np.mean(mfccs_features.T,axis=0)

  if inputfeature == 'mel':
    audio, sample_rate = librosa.load(file_name, res_type='kaiser_fast')
    mel = np.mean(librosa.feature.melspectrogram(y=audio, sr=sample_rate).T, axis=0)
    return mel
    #result = np.hstack((result, mel)) # stacking horizontally
    


In [None]:
from tqdm import tqdm
### Now we iterate through every audio file and extract features 
### using Mel-Frequency Cepstral Coefficients

extracted_features=[]
for index_num,row in tqdm(df.iterrows()):
    file_name = row["path"]
    final_class_labels=row["Emotion"]
    #data=feature=extract_feature(file_name, mfcc=True, chroma=True, mel=True)
    data=features_extractor(file_name,'mfcc')
    extracted_features.append([data,final_class_labels])

In [None]:
### converting extracted_features to Pandas dataframe
extracted_features_df=pd.DataFrame(extracted_features,columns=['feature','class'])
extracted_features_df.head()

In [None]:
### Split the dataset into independent and dependent dataset
X=np.array(extracted_features_df['feature'].tolist())
y=np.array(extracted_features_df['class'].tolist())

In [None]:
### Label Encoding
###y=np.array(pd.get_dummies(y))
### Label Encoder
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import LabelEncoder
labelencoder=LabelEncoder()
y=to_categorical(labelencoder.fit_transform(y))

In [None]:
### Train Test Split
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.25,random_state=42)

X_train.shape, X_test.shape, y_train.shape, y_test.shape

In [None]:
from sklearn.neural_network import MLPClassifier

mlp_gs = MLPClassifier(max_iter=100)
parameter_space = {
    'hidden_layer_sizes': [(10,30,10),(20,)],
    'activation': ['tanh', 'relu'],
    'solver': ['sgd', 'adam'],
    'alpha': [0.0001, 0.01],
    'learning_rate': ['constant','adaptive'],
    'batch_size' : [32,64]
}

from sklearn.model_selection import GridSearchCV
clf = GridSearchCV(mlp_gs, parameter_space, n_jobs=-1, cv=5)
clf.fit(X, y) # X is train samples and y is the corresponding labels

In [None]:
print('Best parameters found:\n', clf.best_params_)

In [None]:
# Initialize the Multi Layer Perceptron Classifier
model=MLPClassifier(alpha=0.01, batch_size=32, epsilon=1e-08, hidden_layer_sizes=(300,), learning_rate='adaptive', max_iter=500)
#model=MLPClassifier(activation = 'relu', alpha=0.0001, batch_size=32, epsilon=1e-08, hidden_layer_sizes=(10, 30, 10), learning_rate='adaptive', max_iter=500, solver = 'adam')

In [None]:
# Train the model
model.fit(X_train,y_train)

In [None]:
# Predict for the test set
y_pred=model.predict(X_test)

In [None]:
from sklearn.metrics import accuracy_score
     
# Calculate the accuracy of our model
accuracy=accuracy_score(y_true=y_test, y_pred=y_pred)
# Print the accuracy
print("Accuracy: {:.2f}%".format(accuracy*100))
     

In [None]:
from sklearn.metrics import mean_squared_error
rmse = mean_squared_error(y_test, y_pred)
print("Loss: {:.2f}%".format(rmse))

In [None]:
from sklearn.metrics import classification_report
print(classification_report(y_test,y_pred,target_names=['Angry','Calm','Disgust','Fear','Happy','Sad','Surprise']))


In [None]:
from sklearn.metrics import f1_score
print(f1_score(y_test, y_pred, average='macro'))

In [None]:
from sklearn.metrics import confusion_matrix
y_pred=np.argmax(y_pred, axis=1)
y_test=np.argmax(y_test, axis=1)
matrix = confusion_matrix(y_test,y_pred)

ax= plt.subplot()
sns.heatmap(matrix, linecolor='white', cmap='Blues', annot=True, fmt='g', ax=ax);  #annot=True to annotate cells, ftm='g' to disable scientific notation

# labels, title and ticks
ax.set_xlabel('Predicted labels')
ax.set_ylabel('True labels')
ax.set_title('Confusion Matrix')
ax.xaxis.set_ticklabels(['Angry','Calm','Disgust','Fear','Happy','Sad','Surprise'])
ax.yaxis.set_ticklabels(['Angry','Calm','Disgust','Fear','Happy','Sad','Surprise']);

In [None]:
import joblib

# save the model to disk
#filename = 'mlpClassifier_1.2.sav'
#joblib.dump(model, filename)

In [None]:
print(joblib.__version__)

In [None]:
# load the model from disk
file_name = '/content/mlpClassifier_1.2.sav'
loaded_model = joblib.load(file_name)

In [None]:
loaded_model

In [None]:
loaded_model.classes_

In [None]:
def predict_new_audio(audio):
  #preprocess it to the input to the model


  ### Now we iterate through every audio file and extract features 
  ### using Mel-Frequency Cepstral Coefficients

  audio, sample_rate = librosa.load(audio, res_type='kaiser_fast') 
  mfccs_features = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40)
  input_val = np.mean(mfccs_features.T,axis=0)
  X=np.array(input_val.tolist())
  X = np.expand_dims(X,axis=0)

  #pass it throught the model
  pred = []
  y_pred=loaded_model.predict(X)
  max_emo = np.argmax(y_pred)
  pred.append(max_emo)
  di = {0:'Angry',1: 'Calm',2: 'Disgust',3: 'Fear',4:'Happy',5:'Sad',6:'Surprise'}
  emo =[di.get(a) if di.get(a) else a for a in pred]
  #predict!
  print(emo)

In [None]:
predict_new_audio('/content/S01_achievement_low_02.wav')

In [None]:
predict_new_audio('/content/S02_pain_moderate_02.wav')