### Importing dependencies

In [53]:
import librosa
import os
import numpy as np
import pandas as pd
import keras
from keras.models import Sequential
from keras.layers import Conv1D, MaxPooling1D
from keras import optimizers
from sklearn import metrics 
from sklearn.preprocessing import LabelEncoder
from keras import regularizers
from collections import Counter
from keras.models import Model
from sklearn.metrics import accuracy_score
from keras.utils import to_categorical
from sklearn.metrics import confusion_matrix,classification_report
from sklearn.model_selection import train_test_split 
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
import cv2
from keras.optimizers import SGD,RMSprop, adam, Adagrad, Adadelta, Adamax, Nadam
from sklearn.preprocessing import LabelEncoder
import warnings
warnings.filterwarnings('ignore')
from keras.models import load_model
import pickle

### Loading saved models

In [2]:
visual_model = load_model('../weights/visual_network.h5')
audio_model = load_model('../weights/audio_network.h5')

Instructions for updating:
If using Keras pass *_constraint arguments to layers.




### Extracting features from speech data

In [5]:
audio_file_paths=[]
audio_labels=[]
audio_data = '../Data/training/Speech_Data'
exp = os.listdir(audio_data)
for i in exp:
    files = os.listdir(audio_data+'/'+i)
    for j in files:
        audio_file_paths.append(audio_data+'/'+i+'/'+j)
        audio_labels.append(i)

In [7]:
def extract_features(file_name):
    audio, sample_rate = librosa.load(file_name, res_type='kaiser_fast') 
    mfccs = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40)
    mfccsscaled = np.mean(mfccs.T,axis=0)
    return mfccsscaled
features = []
# Iterate through each sound file and extract the features 
for i in range(0,len(audio_file_paths)) :
    file_name = audio_file_paths[i]
    class_label = audio_labels[i]
    data = extract_features(file_name)
    features.append([data, class_label])
    
audio_df = pd.DataFrame(features, columns=['Audio_Features','Expression_Label'])

In [13]:
le = LabelEncoder()
audio_y = le.fit_transform(audio_df.Expression_Label)
keys = le.classes_
values = le.transform(le.classes_)
dictionary = dict(zip(keys, values))
print(dictionary)

{'Angry': 0, 'Disgust': 1, 'Fearful': 2, 'Happy': 3, 'Neutral': 4, 'Sad': 5, 'Surprised': 6}


In [14]:
#Splitting audio data into train and test
audio_X = np.array(audio_df.Audio_Features.tolist()).reshape(-1,40,1)
audio_x_train, audio_x_test, audio_y_train, audio_y_test = train_test_split(audio_X, audio_y,  test_size=0.2, stratify=audio_y)

### Extracting features from visual data

In [15]:
visual_data_path = '../Data/training/Visual_Data'
Exp_list = os.listdir(visual_data_path)

In [16]:
img_data_list=[]
visual_labels = []
for dataset in Exp_list:
    img_list=os.listdir(visual_data_path+'/'+ dataset)
    #print ('Loaded ' + str(len(img_list)) +' images of Expression - '+'{}\n'.format(dataset))
    count =0 
    for img in img_list:
        input_img=cv2.imread(visual_data_path + '/'+ dataset + '/'+ img )
        #input_img_gray = cv2.cvtColor(input_img, cv2.COLOR_BGR2GRAY)
        input_img_resize=cv2.resize(input_img,(256,256))
        img_data_list.append(input_img_resize)
        visual_labels.append(dataset)
visual_data = np.array(img_data_list)
visual_data = visual_data.astype('float32')
visual_data = visual_data/255
print('Data Shape - ',visual_data.shape)

Data Shape -  (4200, 256, 256, 3)


In [18]:
vle = LabelEncoder()
visual_y = vle.fit_transform(visual_labels)
keys = vle.classes_
values = vle.transform(vle.classes_)
dictionary = dict(zip(keys, values))
print(dictionary)

{'anger': 0, 'disgust': 1, 'fear': 2, 'joy': 3, 'neutral': 4, 'sadness': 5, 'surprise': 6}


In [19]:
#splitting visual data into train and test
visual_X_train, visual_X_test, visual_Y_train, visual_Y_test = train_test_split(visual_data, visual_y, test_size=0.2, stratify=visual_y)

In [23]:
#checking value counts for each expression
print(Counter(audio_y_train))
print(Counter(visual_Y_train))
print(Counter(audio_y_test))
print(Counter(visual_Y_test))

Counter({6: 480, 4: 480, 3: 480, 5: 480, 2: 480, 1: 480, 0: 480})
Counter({0: 480, 6: 480, 5: 480, 4: 480, 2: 480, 3: 480, 1: 480})
Counter({4: 120, 6: 120, 3: 120, 5: 120, 2: 120, 1: 120, 0: 120})
Counter({0: 120, 1: 120, 4: 120, 5: 120, 2: 120, 6: 120, 3: 120})


In [24]:
print('Visual train data Shape - ',visual_X_train.shape)
print('Visual test data Shape - ',visual_X_test.shape)
print('Audio train data Shape - ',audio_x_train.shape)
print('Audio test data Shape - ',audio_x_test.shape)

visual_Y_traincat= to_categorical(visual_Y_train)
visual_Y_testcat= to_categorical(visual_Y_test)

Visual train data Shape -  (3360, 256, 256, 3)
Visual test data Shape -  (840, 256, 256, 3)
Audio train data Shape -  (3360, 40, 1)
Audio test data Shape -  (840, 40, 1)
visual data labels Shape -  (3360, 7)
visual data labels Shape -  (840, 7)


In [25]:
visualpreds_train = visual_model.predict(visual_X_train)
visualpreds_test = visual_model.predict(visual_X_test)

In [26]:
audiopreds_train = audio_model.predict(audio_x_train)
audiopreds_test = audio_model.predict(audio_x_test)

### Creating new data from results of two models

In [27]:
Audiotraindf = pd.DataFrame(audiopreds_train,columns=['angerAN','disgustAN','fearAN', 'joyAN', 'neutralAN', 'sadnessAN', 'surpriseAN'])
Audiotraindf['labelA'] = audio_y_train
Audiotraindf = Audiotraindf.sort_values('labelA')
Audiotraindf.reset_index(drop=True, inplace=True)
Audiotestdf = pd.DataFrame(audiopreds_test,columns=['angerAN','disgustAN','fearAN', 'joyAN', 'neutralAN', 'sadnessAN', 'surpriseAN'])
Audiotestdf['labelA'] = audio_y_test
Audiotestdf = Audiotestdf.sort_values('labelA')
Audiotestdf.reset_index(drop=True, inplace=True)

In [28]:
Visualtraindf = pd.DataFrame(visualpreds_train,columns=['angerVN','disgustVN','fearVN', 'joyVN', 'neutralVN', 'sadnessVN', 'surpriseVN'])
Visualtraindf['labelV'] = visual_Y_train
Visualtraindf = Visualtraindf.sort_values('labelV')
Visualtraindf.reset_index(drop=True, inplace=True)
Visualtestdf = pd.DataFrame(visualpreds_test,columns=['angerVN','disgustVN','fearVN', 'joyVN', 'neutralVN', 'sadnessVN', 'surpriseVN'])
Visualtestdf['labelV'] = visual_Y_test
Visualtestdf = Visualtestdf.sort_values('labelV')
Visualtestdf.reset_index(drop=True, inplace=True)

In [29]:
frames = [Audiotestdf,Visualtestdf]
testdata = pd.concat(frames,axis=1)
testdata = testdata.sample(frac=1).reset_index(drop=True)

In [30]:
frames = [Audiotraindf,Visualtraindf]
traindata = pd.concat(frames,axis=1)
traindata =traindata.sample(frac=1).reset_index(drop=True)

In [31]:
x_train = traindata.drop(['labelA','labelV'],axis=1)
y_train =traindata['labelA']

In [32]:
x_test = testdata.drop(['labelA','labelV'],axis=1)
y_test =testdata['labelA']

### Random Forest

In [126]:
clf = RandomForestClassifier(criterion='entropy',max_depth=2)

In [127]:
clf.fit(x_train,y_train)

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='entropy',
                       max_depth=2, max_features='auto', max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=10,
                       n_jobs=None, oob_score=False, random_state=None,
                       verbose=0, warm_start=False)

In [128]:
score = clf.score(x_test,y_test)
print(score)

0.9821428571428571


In [131]:
pkl_filename = "../weights/ensemble.pkl"
with open(pkl_filename, 'wb') as file:
    pickle.dump(clf, file)