## 1 - Importing related libraries and functions
In this section we import necessary libraries and functions:


In [1]:
from delaunay import extract_features
import os
import cv2
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from sklearn.ensemble import RandomForestClassifier

from sklearn.metrics import classification_report, accuracy_score
from sklearn.preprocessing import StandardScaler

## 2 - Load and preprocess the dataset
In this section we first import our designated dataset which is `CK+` dataset, read ,and load it in an array


In [2]:
dataset_dir = "dataset"

label_map = {
    "anger": 0,
    "contempt": 1,
    "disgust": 2,
    "fear": 3,
    "happy": 4,
    "sadness": 5,
    "surprise": 6
}
# Load and preprocess the dataset
def load_ckplus_dataset(dataset_dir):
    images = []
    labels = []
    for subdir in os.listdir(dataset_dir):
        if subdir in label_map:  # Check if directory name is a valid label
            label = label_map[subdir]  # Get integer label from the mapping
            for filename in os.listdir(os.path.join(dataset_dir, subdir)):
                if filename.endswith(".png"):  # Load only image files (assuming images are in PNG format)
                    img_path = os.path.join(dataset_dir, subdir, filename)
                    img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)  # Convert to grayscale
                    img = cv2.resize(img, (100, 100))  # Resize image
                    images.append(img)
                    labels.append(label)

    return images, labels

images, labels = load_ckplus_dataset(dataset_dir)


## 3 - Compare the results from raw image classification and delaunay triangulation method
### 3.1 - Train the raw dataset
As we know the dataset is in grayscale, we need to get the brightness level of each pixel and train our models on each image   
then we will report the results:
> Note: We have used standard scalar normalization that leads to better results


#### Pre-Process data:

In [9]:
# Initialize the scalar
scalar = StandardScaler()

# Split dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(images, labels, test_size=0.2, random_state=42)

X_train = np.array([img.flatten() for img in X_train])
X_test = np.array([img.flatten() for img in X_test])

X_norm = scalar.fit_transform(X_train)
X_norm_test = scalar.fit_transform(X_test)


array([[152, 128,  76, ..., 160, 161, 161],
       [ 16,  17,  21, ...,  91,  90,  90],
       [ 73,  74,  75, ...,  86,  86,  86],
       ...,
       [132, 132, 132, ..., 232, 222, 218],
       [203, 203, 202, ..., 188, 187, 187],
       [ 56,  56,  56, ...,  56,  56,  56]], dtype=uint8)

#### Training phase: 
1. SVM Model:

In [22]:
# Train the SVM classifier
svm_classifier = SVC(kernel='rbf', C=1.0, gamma='scale')
svm_classifier.fit(X_norm, y_train)

# Evaluate the classifier
y_pred = svm_classifier.predict(X_norm_test)
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print("Accuracy:", accuracy)
print("Classification Report:\n", report)

Accuracy: 0.8934010152284264
Classification Report:
               precision    recall  f1-score   support

           0       0.83      0.87      0.85        23
           1       0.64      0.78      0.70         9
           2       0.86      0.88      0.87        43
           3       0.82      0.93      0.88        15
           4       1.00      0.95      0.98        43
           5       1.00      0.68      0.81        19
           6       0.91      0.96      0.93        45

    accuracy                           0.89       197
   macro avg       0.87      0.87      0.86       197
weighted avg       0.90      0.89      0.89       197


2. K-Nearest-Neighbors model:

In [23]:
# Train the KNN classifier
knn_classifier = KNeighborsClassifier(n_neighbors=5)
knn_classifier.fit(X_norm, y_train)

# Evaluate the classifier
y_pred = knn_classifier.predict(X_norm_test)
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print("Accuracy:", accuracy)
print("Classification Report:\n", report)

Accuracy: 0.3604060913705584
Classification Report:
               precision    recall  f1-score   support

           0       0.21      0.43      0.28        23
           1       0.50      0.44      0.47         9
           2       0.37      0.40      0.38        43
           3       0.29      0.13      0.18        15
           4       0.34      0.33      0.33        43
           5       0.24      0.21      0.22        19
           6       0.67      0.44      0.53        45

    accuracy                           0.36       197
   macro avg       0.37      0.34      0.34       197
weighted avg       0.40      0.36      0.37       197


3. Naive Bayesian Classification:

In [24]:
# Train the GNB classifier
gnb_classifier = GaussianNB()
gnb_classifier.fit(X_norm, y_train)

# Evaluate the classifier
y_pred = gnb_classifier.predict(X_norm_test)
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print("Accuracy:", accuracy)
print("Classification Report:\n", report)

Accuracy: 0.467005076142132
Classification Report:
               precision    recall  f1-score   support

           0       0.10      0.17      0.13        23
           1       0.21      0.56      0.30         9
           2       0.43      0.47      0.45        43
           3       0.00      0.00      0.00        15
           4       0.66      0.63      0.64        43
           5       0.40      0.11      0.17        19
           6       0.89      0.76      0.82        45

    accuracy                           0.47       197
   macro avg       0.39      0.38      0.36       197
weighted avg       0.50      0.47      0.47       197


4. Decision Tree:

In [25]:
# Train the Tree classifier
tree_classifier = DecisionTreeClassifier() 
tree_classifier.fit(X_norm, y_train)

# Evaluate the classifier
y_pred = tree_classifier.predict(X_norm_test)
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print("Accuracy:", accuracy)
print("Classification Report:\n", report)

Accuracy: 0.7918781725888325
Classification Report:
               precision    recall  f1-score   support

           0       0.56      0.61      0.58        23
           1       0.56      0.56      0.56         9
           2       0.92      0.79      0.85        43
           3       0.50      0.80      0.62        15
           4       0.86      0.88      0.87        43
           5       0.94      0.79      0.86        19
           6       0.90      0.84      0.87        45

    accuracy                           0.79       197
   macro avg       0.75      0.75      0.74       197
weighted avg       0.82      0.79      0.80       197


5. Quadratic Discriminant Analysis:

In [26]:
# Train the QDA classifier
qda_classifier = QuadraticDiscriminantAnalysis()
qda_classifier.fit(X_norm, y_train)

# Evaluate the classifier
y_pred = qda_classifier.predict(X_norm_test)
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print("Accuracy:", accuracy)
print("Classification Report:\n", report)



Accuracy: 0.48223350253807107
Classification Report:
               precision    recall  f1-score   support

           0       0.43      0.43      0.43        23
           1       0.30      0.67      0.41         9
           2       0.51      0.51      0.51        43
           3       0.75      0.20      0.32        15
           4       0.53      0.49      0.51        43
           5       0.35      0.37      0.36        19
           6       0.55      0.58      0.57        45

    accuracy                           0.48       197
   macro avg       0.49      0.46      0.44       197
weighted avg       0.51      0.48      0.48       197


6. Random Forest:

In [27]:
# Train the rf classifier
rf_classifier = RandomForestClassifier()
rf_classifier.fit(X_norm, y_train)

# Evaluate the classifier
y_pred = rf_classifier.predict(X_norm_test)
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print("Accuracy:", accuracy)
print("Classification Report:\n", report)

Accuracy: 0.9898477157360406
Classification Report:
               precision    recall  f1-score   support

           0       0.96      0.96      0.96        23
           1       1.00      1.00      1.00         9
           2       1.00      1.00      1.00        43
           3       1.00      1.00      1.00        15
           4       1.00      1.00      1.00        43
           5       1.00      0.95      0.97        19
           6       0.98      1.00      0.99        45

    accuracy                           0.99       197
   macro avg       0.99      0.99      0.99       197
weighted avg       0.99      0.99      0.99       197


### 3.2 - Train the on dataset with triangulation
As implemented in the corresponding class, we need to detect landmarks of each image and use the detected points to train our model and give the results: 
> Note: We have used standard scalar normalization that leads to better results


We first build a function to process landmarks for rach image and save it to save time:

In [50]:
def extract_landmark_features(images):
    landmarks_list = []
    feature_lengths = []
    
    for image in images:
        features = extract_features(image)[0].tolist()
        landmarks_list.append(features)
        feature_lengths.append(len(features))

    max_features = max(feature_lengths)

    for i in range(len(images)):
        current_length = len(landmarks_list[i])
        if current_length < max_features:
            padding = [0] * (max_features - current_length)
            landmarks_list[i].extend(padding)

    head = np.arange(max_features)
    data = pd.DataFrame(landmarks_list, columns=head)
    data.to_csv('features.csv', index=False)
    
    return np.array(landmarks_list)

landmark_features = extract_landmark_features(images)

### Pre-Process data:
we use previously stored data for training:

In [6]:
df = pd.read_csv('features.csv')
landmark_features = df.values

# Initialize the scalar
scalar = StandardScaler()

# Split dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(landmark_features, labels, test_size=0.1, random_state=42)

X_train = np.array([img.flatten() for img in X_train])
X_test = np.array([img.flatten() for img in X_test])

X_norm = scalar.fit_transform(X_train)
X_norm_test = scalar.transform(X_test)

landmark_features

array([[ 9., 55., 10., ...,  0.,  0.,  0.],
       [ 9., 55., 10., ...,  0.,  0.,  0.],
       [17., 36.,  0., ...,  0.,  0.,  0.],
       ...,
       [10., 54., 11., ...,  0.,  0.,  0.],
       [15., 13., 14., ...,  0.,  0.,  0.],
       [35., 15., 14., ...,  0.,  0.,  0.]])

#### Training phase: 
1. SVM Model:

In [7]:
# Train the SVM classifier
svm_classifier_tri = SVC(kernel='rbf', C=1.0, gamma='scale')
svm_classifier_tri.fit(X_norm, y_train)

# Evaluate the classifier
y_pred = svm_classifier_tri.predict(X_norm_test)
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print("Accuracy:", accuracy)
print("Classification Report:\n", report)

Accuracy: 0.41414141414141414
Classification Report:
               precision    recall  f1-score   support

           0       0.36      0.38      0.37        13
           1       0.00      0.00      0.00         4
           2       0.50      0.30      0.38        23
           3       0.00      0.00      0.00         3
           4       0.40      0.60      0.48        20
           5       0.00      0.00      0.00         9
           6       0.41      0.63      0.50        27

    accuracy                           0.41        99
   macro avg       0.24      0.27      0.25        99
weighted avg       0.36      0.41      0.37        99


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


2. K-Nearest-Neighbors model:

In [8]:
# Train the KNN classifier
knn_classifier_tri = KNeighborsClassifier(n_neighbors=5)
knn_classifier_tri.fit(X_norm, y_train)

# Evaluate the classifier
y_pred = knn_classifier_tri.predict(X_norm_test)
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print("Accuracy:", accuracy)
print("Classification Report:\n", report)

Accuracy: 0.2828282828282828
Classification Report:
               precision    recall  f1-score   support

           0       0.13      0.31      0.19        13
           1       0.00      0.00      0.00         4
           2       0.33      0.61      0.43        23
           3       0.00      0.00      0.00         3
           4       0.67      0.20      0.31        20
           5       0.00      0.00      0.00         9
           6       0.60      0.22      0.32        27

    accuracy                           0.28        99
   macro avg       0.25      0.19      0.18        99
weighted avg       0.39      0.28      0.28        99


3. Naive Bayesian Classification:

In [55]:
# Train the GNB classifier
gnb_classifier_tri = GaussianNB()
gnb_classifier_tri.fit(X_norm, y_train)

# Evaluate the classifier
y_pred = gnb_classifier_tri.predict(X_norm_test)
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print("Accuracy:", accuracy)
print("Classification Report:\n", report)

Accuracy: 0.050505050505050504
Classification Report:
               precision    recall  f1-score   support

           0       0.00      0.00      0.00        13
           1       0.07      1.00      0.14         4
           2       0.00      0.00      0.00        23
           3       0.00      0.00      0.00         3
           4       0.25      0.05      0.08        20
           5       0.00      0.00      0.00         9
           6       0.00      0.00      0.00        27

    accuracy                           0.05        99
   macro avg       0.05      0.15      0.03        99
weighted avg       0.05      0.05      0.02        99


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


4. Decision Tree:

In [36]:
# Train the Tree classifier
tree_classifier_tri = DecisionTreeClassifier() 
tree_classifier_tri.fit(X_norm, y_train)

# Evaluate the classifier
y_pred = tree_classifier_tri.predict(X_norm_test)
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print("Accuracy:", accuracy)
print("Classification Report:\n", report)

Accuracy: 0.29441624365482233
Classification Report:
               precision    recall  f1-score   support

           0       0.09      0.13      0.10        23
           1       0.08      0.11      0.09         9
           2       0.35      0.26      0.30        43
           3       0.25      0.20      0.22        15
           4       0.39      0.33      0.35        43
           5       0.04      0.05      0.05        19
           6       0.54      0.56      0.55        45

    accuracy                           0.29       197
   macro avg       0.25      0.23      0.24       197
weighted avg       0.32      0.29      0.31       197


5. Quadratic Discriminant Analysis:

In [74]:
# Train the QDA classifier
qda_classifier_tri = QuadraticDiscriminantAnalysis()
qda_classifier_tri.fit(X_norm, y_train)

y_pred = qda_classifier_tri.predict(X_norm)
accuracy = accuracy_score(y_train, y_pred)
print("train Accuracy:", accuracy)

# Evaluate the classifier
y_pred = qda_classifier_tri.predict(X_norm_test)
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print("Accuracy:", accuracy)
print("Classification Report:\n", report)



train Accuracy: 1.0
Accuracy: 0.16161616161616163
Classification Report:
               precision    recall  f1-score   support

           0       0.33      0.46      0.39        13
           1       0.00      0.00      0.00         4
           2       0.15      0.13      0.14        23
           3       0.00      0.00      0.00         3
           4       0.17      0.15      0.16        20
           5       0.11      0.22      0.15         9
           6       0.22      0.07      0.11        27

    accuracy                           0.16        99
   macro avg       0.14      0.15      0.13        99
weighted avg       0.18      0.16      0.16        99


6. Random Forest:

In [73]:
# Train the rf classifier
rf_classifier_tri = RandomForestClassifier()
rf_classifier_tri.fit(X_norm, y_train)

y_pred = rf_classifier_tri.predict(X_norm)
accuracy = accuracy_score(y_train, y_pred)
print("train Accuracy:", accuracy)

# Evaluate the classifier
y_pred = rf_classifier_tri.predict(X_norm_test)
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print("test Accuracy:", accuracy)
print("Classification Report:\n", report)

train Accuracy: 1.0
test Accuracy: 0.46464646464646464
Classification Report:
               precision    recall  f1-score   support

           0       0.25      0.23      0.24        13
           1       0.00      0.00      0.00         4
           2       0.50      0.26      0.34        23
           3       0.00      0.00      0.00         3
           4       0.40      0.60      0.48        20
           5       0.25      0.11      0.15         9
           6       0.62      0.89      0.73        27

    accuracy                           0.46        99
   macro avg       0.29      0.30      0.28        99
weighted avg       0.42      0.46      0.42        99


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


So based on the results:
- some models
- some other


Now we introduce our own Neural Network model to outperform others:

### Neural Network:

In [79]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.regularizers import l2
from tensorflow.keras.callbacks import EarlyStopping

model = Sequential([
    Dense(256, activation='relu', kernel_regularizer=l2(0.1), input_shape=(X_norm.shape[1],)),
    Dropout(0.6),
    Dense(128, activation='relu', kernel_regularizer=l2(0.01)),
    Dropout(0.6),
    Dense(64, activation='relu', kernel_regularizer=l2(0.01)),
    Dense(7, activation='softmax')
])

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Early stopping monitor
early_stopping_monitor = EarlyStopping(
    monitor='val_accuracy',
    patience=15,
    restore_best_weights=True
)

# Train the model with early stopping
model.fit(X_norm, y_train, epochs=50, validation_split=0.1, callbacks=[early_stopping_monitor])

# Evaluate on the test set
test_loss, test_accuracy = model.evaluate(X_norm_test, y_test)
print(f"Test Accuracy: {test_accuracy * 100:.2f}%")


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Test Accuracy: 41.41%
