In [25]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pickle
import cv2
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import MinMaxScaler
from sklearn.svm import SVC
from sklearn.pipeline import make_pipeline, Pipeline
from sklearn.linear_model import Lasso
# from scikeras.wrappers import KerasClassifier, KerasRegressor
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix

# Read datasets

In [26]:
readDataPath = './resources/'
# Read datasets
landmarksJaffe = pd.read_csv(readDataPath+'features/landmarksJaffe.csv')
print(f"landmarksJaffe shape: {landmarksJaffe.shape}")
landmarksKdef = pd.read_csv(readDataPath+'features/landmarksKDEF.csv')
print(f"landmarksKdef.shape: {landmarksKdef.shape}")
landmarksCKplus = pd.read_csv(readDataPath+'features/landmarksCKPLUS.csv')
print(f"landmarksCKplus.shape: {landmarksCKplus.shape}")
landmarksCustom = pd.read_csv(readDataPath+'features/landmarksCustom.csv')
print(f"landmarksCustom.shape: {landmarksCustom.shape}")

landmarksJaffe shape: (213, 207)
landmarksKdef.shape: (2917, 207)
landmarksCKplus.shape: (840, 207)
landmarksCustom.shape: (118, 207)


# Concatenate 3D landmark datasets

In [27]:
# stack datasets
emotionLandmarks = pd.concat([landmarksJaffe, landmarksKdef, landmarksCKplus, landmarksCustom], axis=0)
print(f"emotionLandmarks shape: {emotionLandmarks.shape}")

emotionLandmarks shape: (4088, 207)


# Split 3D landmark Training data & Labels

In [28]:
X_landmarks = emotionLandmarks.drop('emotion', axis=1)
y = emotionLandmarks['emotion'].astype('int8')
print(f"X_landmarks shape: {X_landmarks.shape}")

X_landmarks shape: (4088, 206)


# Scaling 3D landmarks datasets

In [29]:
scaler = MinMaxScaler()

# 3D landmark scaling
X_landmarks_scaled = scaler.fit_transform(X_landmarks)
print(f"X_3D_scaled Shape = {np.shape(X_landmarks_scaled)}")
# print(f"scaler min = {scaler.data_min_}, scaler max = {scaler.data_max_}")

# # save min/max values of each feature
# with open('./resources/combiModels/scalerParams_svm_mvp1.pkl', 'wb') as file:
#     params = {'minValues': scaler.data_min_, 'maxValues': scaler.data_max_}
#     pickle.dump(params, file)
# file.close()

X_3D_scaled Shape = (4088, 206)


# Dimensionality Reduction of 3D landmark dataset

In [30]:
n_components = 120
pca = PCA(n_components= n_components)
X_landmarks_scaled_pca = pca.fit_transform(X_landmarks_scaled)
print(f"X_3D_scaled_pca shape: {X_landmarks_scaled_pca.shape}, type: {type(X_landmarks_scaled_pca)}")

# # Save the fitted PCA model to a file
# with open(readDataPath+'combiModels/pca_svm_mvp1.pkl', 'wb') as file:
#     pickle.dump(pca, file)
# file.close()

print(f"Explained variance: {round(np.cumsum(pca.explained_variance_ratio_ * 100)[-1],2)}%")

X_3D_scaled_pca shape: (4088, 120), type: <class 'numpy.ndarray'>
Explained variance: 99.3%


# Shuffle datasets

In [31]:
print(f"X_landmarks_scaled_pca shape: {X_landmarks_scaled_pca.shape}, type: {type(X_landmarks_scaled_pca)}")
print(f"y shape: {y.shape}, type: {type(y)}")

# shuffle
X_landmarks_scaled_pca_shuffled, y_shuffled = shuffle(X_landmarks_scaled_pca, y, random_state=512)

print(f"X_landmarks_gabor_scaled_pca_shuffled shape: {X_landmarks_scaled_pca_shuffled.shape}, type: {type(X_landmarks_scaled_pca_shuffled)}")
print(f"y shape: {y_shuffled.shape}, type: {type(y_shuffled)}")

X_landmarks_scaled_pca shape: (4088, 120), type: <class 'numpy.ndarray'>
y shape: (4088,), type: <class 'pandas.core.series.Series'>
X_landmarks_gabor_scaled_pca_shuffled shape: (4088, 120), type: <class 'numpy.ndarray'>
y shape: (4088,), type: <class 'pandas.core.series.Series'>


# Check label count

In [32]:
# string labels
emo = ['angry', 
        'disgust',
        'fear',
        'happy',
        'sad',
        'surprised',
        'neutral'
]

# count classes
emotions, counts =np.unique(y_shuffled, return_counts=True)
for c, count in zip(emotions, counts):
    print(f"{emo[c]} count = {count}")

angry count = 569
disgust count = 602
fear count = 538
happy count = 654
sad count = 518
surprised count = 664
neutral count = 543


# Build and Validate Single run SVM-RBF

In [33]:
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_landmarks_scaled_pca_shuffled, y_shuffled, test_size=0.1, shuffle=True, random_state=190)

# train model
svmClf = SVC(kernel ='rbf', C=7, gamma=0.6)
hist = svmClf.fit(X_train, y_train)

# Calculate training accuracy
y_train_pred = svmClf.predict(X_train)
train_acc = accuracy_score(y_train, y_train_pred)

# Calculate validation accuracy
y_val_pred = svmClf.predict(X_test)
val_acc = accuracy_score(y_test, y_val_pred)

print(f"Training accuracy: {round(train_acc*100,2)}%, Validation accuracy: {round(val_acc*100,2)}%")

Training accuracy: 99.97%, Validation accuracy: 84.11%


# Confusion Matrix Typical Case

In [34]:
y_pred = svmClf.predict(X_test)
cm = confusion_matrix(y_true=y_test, y_pred=y_pred)
TP = cm.diagonal().sum()
totalIns = cm.sum()

print(f"Number of Test Instances= {totalIns}")
print(f"Accuracy= {round((TP/totalIns)*100,2)}%")
print('Confusion Matrix')
print(cm)

Number of Test Instances= 409
Accuracy= 84.11%
Confusion Matrix
[[42  3  3  0  2  0  1]
 [ 5 38  1  0  5  0  0]
 [ 2  1 43  2  0  8  0]
 [ 0  0  0 63  0  0  1]
 [ 2  5  2  0 42  1  7]
 [ 0  1  5  1  0 62  0]
 [ 2  1  1  0  3  0 54]]


# Grid Search Best Estimator for SVM-RBF

In [38]:
pipeline = Pipeline([
                        ('svm', SVC(kernel ='rbf', probability=True, random_state=42)),
])

# define the hyperparameters to search over
param_grid = {
    'svm__C': [7],
    'svm__gamma': [0.6,0.65],
}

# perform a grid search to find the best hyperparameters
grid_search = GridSearchCV(pipeline, param_grid=param_grid, cv=10, verbose=2)
hist = grid_search.fit(X_landmarks_scaled_pca_shuffled, y_shuffled)

# print the best hyperparameters and the corresponding score
print(f"Best hyperparameters: {grid_search.best_params_}")
print(f"Best score: {round(grid_search.best_score_,4)*100}%")

Fitting 10 folds for each of 2 candidates, totalling 20 fits
[CV] END ...........................svm__C=7, svm__gamma=0.6; total time=   5.1s
[CV] END ...........................svm__C=7, svm__gamma=0.6; total time=   5.0s
[CV] END ...........................svm__C=7, svm__gamma=0.6; total time=   5.1s
[CV] END ...........................svm__C=7, svm__gamma=0.6; total time=   5.0s
[CV] END ...........................svm__C=7, svm__gamma=0.6; total time=   5.0s
[CV] END ...........................svm__C=7, svm__gamma=0.6; total time=   5.0s
[CV] END ...........................svm__C=7, svm__gamma=0.6; total time=   5.0s
[CV] END ...........................svm__C=7, svm__gamma=0.6; total time=   5.0s
[CV] END ...........................svm__C=7, svm__gamma=0.6; total time=   5.1s
[CV] END ...........................svm__C=7, svm__gamma=0.6; total time=   5.0s
[CV] END ..........................svm__C=7, svm__gamma=0.65; total time=   5.5s
[CV] END ..........................svm__C=7, svm

# K-Fold Cross Validation

In [39]:
k=10
svmClfCV = SVC(kernel ='rbf', C=7, gamma=0.6)
scores = cross_val_score(svmClfCV, X_landmarks_scaled_pca_shuffled, y_shuffled, cv=k)
print(f"Mean Cross-Validation Score: {round(np.mean(scores)*100,2)}%")

Mean Cross-Validation Score: 83.15%


# Save Serialized Model

In [37]:
# bestModel = grid_search.best_estimator_
# with open(readDataPath+'combiModels/modelSvmRbf_mvp1.pkl', 'wb') as file:
#     pickle.dump(bestModel,file)

# file.close()