In [None]:
import pandas as pd
import numpy as np

import os
import sys
import pickle

import librosa
import librosa.display
import seaborn as sns
import matplotlib.pyplot as plt
from IPython.display import Audio

from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.model_selection import train_test_split

import keras
from keras.callbacks import ReduceLROnPlateau
from keras.models import Sequential
from keras.layers import Dense, Conv1D, MaxPooling1D, Flatten, Dropout, BatchNormalization
from keras.utils import np_utils, to_categorical
from keras.callbacks import ModelCheckpoint

import tensorflow as tf
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.models import load_model

In [None]:
import soundfile
import numpy as np
import librosa
import glob
import os # to use operating system dependent functionality
from sklearn.model_selection import train_test_split # for splitting training and testing
from sklearn.neural_network import MLPClassifier # multi-layer perceptron model
from sklearn.metrics import accuracy_score # to measure how good we are

<h1>Dataset

In [None]:
#RAVDESS
!gdown 13NCkIfuEJ-rgEQbqPo4CI31aBUQ2Mc77
!unzip "/content/archive (1).zip" -d "/content/ravdess_dataset/"

<h1>Version 2</h1>

In [None]:
#def get_feature(file_name,mfccs,mel,chroma,contrast):

def get_feature(file_name):

        data, sample_rate = librosa.load(file_name)
        stft = np.abs(librosa.stft(data))
        mfccs = np.mean(librosa.feature.mfcc(y=data, sr=sample_rate, n_mfcc=40).T, axis=0)
        mel = np.mean(librosa.feature.melspectrogram(y=data, sr=sample_rate).T,axis=0)
        chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0)
        contrast = np.mean(librosa.feature.spectral_contrast(S=stft, sr=sample_rate).T,axis=0)



        return mfccs,mel,chroma,contrast

In [None]:
# emotions in dataset
list_emotion = {
    "01": "neutral",
    "02": "calm",
    "03": "happy",
    "04": "sad",
    "05": "angry",
    "06": "fearful",
    "07": "disgust",
    "08": "surprised"
}

# I am using only 3 emotions to observe,feel free to add more.
classify_emotions = {
    "neutral",
    "calm",
    "happy",
    "sad",
    "angry",
    "fearful",
    "surprised"
}

In [None]:
def load_data(test_size=0.2):
    feature, y = [], []

    for file in glob.glob("/content/ravdess_dataset/Actor_*//*.wav"):
        basename = os.path.basename(file)  # get the base name of the audio file

        emotion = list_emotion[basename.split("-")[2]]   # get the emotion label

        if emotion in classify_emotions:    # we allow only classify_emotions we set
            mfccs,mel,chroma,contrast = get_feature(file)

            ext_features = np.hstack([mfccs,mel,chroma,contrast])
            feature.append(ext_features)
            y.append(emotion)

    # split the data to training and testing and return it
    return train_test_split(np.array(feature), y, test_size=test_size, random_state=9)

In [None]:
feature_train, feature_test, y_train, y_test = load_data(0.8)

In [None]:
# using get_features() function
print("Number of samples in training data:", feature_train.shape[0])

print("Number of samples in testing data:", feature_test.shape[0])

Number of samples in training data: 873
Number of samples in testing data: 375


In [None]:
print("Training the model.....")
clf=MLPClassifier(alpha=0.01, batch_size=256, epsilon=1e-08, hidden_layer_sizes=(300,), learning_rate='adaptive', max_iter=500).fit(feature_train, y_train)

Training the model.....


In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [None]:
import joblib
joblib.dump(clf,'/content/gdrive/MyDrive/Projects/CSE 4622 ML/model\\emotion_detection_from_audio_v2.h5')

['/content/gdrive/MyDrive/Projects/CSE 4622 ML/model\\emotion_detection_from_audio_v2.h5']

In [None]:
# predict 25% of data
y_pred = clf.predict(feature_test)

# calculate the accuracy
accuracy = accuracy_score(y_true=y_test, y_pred=y_pred)

print("Accuracy is: {:.2f}%".format(accuracy*100))

Accuracy is: 50.93%


In [None]:
print("Number of features:", feature_train.shape[1])

Number of features: 187


In [None]:
path_ = '/content/ravdess_dataset/Actor_03/03-01-07-02-01-02-03.wav'
feature_=[]

mfccs,mel,chroma,contrast = get_feature(path_)
ext_features_ = np.hstack([mfccs,mel,chroma,contrast])
feature_.append(ext_features_)

y_pred_ = clf.predict(feature_)
#y_pred_ = encoder.inverse_transform(pred_test_)
print(y_pred_) #emotion prediction
print(y_pred_[0]) #emotion prediction

['angry']
angry


<h2>Version 3

In [None]:
print("Total number of training sample: ",feature_train.shape[0])
print("Total number of testing example: ",feature_test.shape[0])
print("Feature extracted",feature_train.shape[1])

Total number of training sample:  249
Total number of testing example:  999
Feature extracted 187


In [None]:
import tensorflow as tf
from tensorflow.keras.datasets import imdb
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import LSTM
from tensorflow.keras.layers import Embedding
from tensorflow.keras.preprocessing import sequence
# fix random seed for reproducibility
tf.random.set_seed(7)
sent_length = 5000

# truncate and pad input sequences
voc_size=10000

In [None]:
embedding_vector_features=40

model=Sequential()
model.add(Embedding(voc_size,embedding_vector_features,input_length=sent_length))
model.add(Dropout(0.3))
model.add(LSTM(100)) #Adding 100 lstm neurons in the layer
model.add(Dropout(0.3))
model.add(Dense(1,activation='sigmoid'))

#Compiling the model
model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])
print(model.summary())

Model: "sequential_5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_3 (Embedding)     (None, 5000, 40)          400000    
                                                                 
 dropout (Dropout)           (None, 5000, 40)          0         
                                                                 
 lstm_3 (LSTM)               (None, 100)               56400     
                                                                 
 dropout_1 (Dropout)         (None, 100)               0         
                                                                 
 dense_3 (Dense)             (None, 1)                 101       
                                                                 
Total params: 456,501
Trainable params: 456,501
Non-trainable params: 0
_________________________________________________________________
None


In [None]:
feature_train = np.array(feature_train)
y_train = np.array(y_train)
feature_test = np.array(feature_test)
y_test = np.array(y_test)

In [None]:
model.fit(feature_train,y_train,validation_data=(feature_test,y_test),epochs=10,batch_size=64)

Epoch 1/10


ValueError: ignored