## 1 - Importing related libraries and functions
In this section we import necessary libraries and functions:


In [1]:
import os
import cv2
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score
from sklearn.preprocessing import StandardScaler


from delaunay import extract_features

## 2 - Load and preprocess the dataset
In this section we first import our designated dataset which is `CK+` dataset, read ,and load it in an array


In [2]:
dataset_dir = "dataset"

label_map = {
    "anger": 0,
    "contempt": 1,
    "disgust": 2,
    "fear": 3,
    "happy": 4,
    "sadness": 5,
    "surprise": 6
}
# Load and preprocess the dataset
def load_ckplus_dataset(dataset_dir):
    images = []
    labels = []
    for subdir in os.listdir(dataset_dir):
        if subdir in label_map:  # Check if directory name is a valid label
            label = label_map[subdir]  # Get integer label from the mapping
            for filename in os.listdir(os.path.join(dataset_dir, subdir)):
                if filename.endswith(".png"):  # Load only image files (assuming images are in PNG format)
                    img_path = os.path.join(dataset_dir, subdir, filename)
                    img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)  # Convert to grayscale
                    img = cv2.resize(img, (100, 100))  # Resize image
                    images.append(img)
                    labels.append(label)

    return images, labels

images, labels = load_ckplus_dataset(dataset_dir)

## 3 - Compare the results from raw image classification and delaunay triangulation method
### 3.1 - Train the raw dataset
As we know the dataset is in grayscale, we need to get the brightness level of each pixel and train our models on each image   
then we will report the results:
> Note: We have used standard scalar normalization that leads to better results


#### Pre-Process data:

In [3]:
# Initialize the scalar
scalar = StandardScaler()

# Split dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(images, labels, test_size=0.2, random_state=42)

X_train = np.array([img.flatten() for img in X_train])
X_test = np.array([img.flatten() for img in X_test])

X_norm = scalar.fit_transform(X_train)
X_norm_test = scalar.fit_transform(X_test)




#### Training phase: 
1. SVM Model:

In [4]:
# Train the SVM classifier
svm_classifier = SVC(kernel='rbf', C=1.0, gamma='scale')
svm_classifier.fit(X_norm, y_train)

# Evaluate the classifier
y_pred = svm_classifier.predict(X_norm_test)
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print("Accuracy:", accuracy)
print("Classification Report:\n", report)

Accuracy: 0.868020304568528
Classification Report:
               precision    recall  f1-score   support

           0       0.74      0.74      0.74        27
           1       0.55      0.67      0.60         9
           2       0.85      0.91      0.88        32
           3       0.94      0.70      0.80        23
           4       0.89      1.00      0.94        34
           5       0.78      0.70      0.74        20
           6       1.00      1.00      1.00        52

    accuracy                           0.87       197
   macro avg       0.82      0.82      0.81       197
weighted avg       0.87      0.87      0.87       197



2. K-Nearest-Neighbors model:

In [5]:
# Train the KNN classifier
knn_classifier = KNeighborsClassifier(n_neighbors=5)
knn_classifier.fit(X_norm, y_train)

# Evaluate the classifier
y_pred = knn_classifier.predict(X_norm_test)
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print("Accuracy:", accuracy)
print("Classification Report:\n", report)

Accuracy: 0.36548223350253806
Classification Report:
               precision    recall  f1-score   support

           0       0.26      0.44      0.32        27
           1       0.46      0.67      0.55         9
           2       0.20      0.28      0.23        32
           3       0.40      0.17      0.24        23
           4       0.31      0.32      0.31        34
           5       0.17      0.10      0.12        20
           6       0.82      0.54      0.65        52

    accuracy                           0.37       197
   macro avg       0.37      0.36      0.35       197
weighted avg       0.42      0.37      0.37       197



3. Naive Bayesian Classification:

In [6]:
# Train the GNB classifier
gnb_classifier = GaussianNB()
gnb_classifier.fit(X_norm, y_train)

# Evaluate the classifier
y_pred = gnb_classifier.predict(X_norm_test)
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print("Accuracy:", accuracy)
print("Classification Report:\n", report)

Accuracy: 0.49238578680203043
Classification Report:
               precision    recall  f1-score   support

           0       0.24      0.30      0.27        27
           1       0.32      0.67      0.43         9
           2       0.45      0.44      0.44        32
           3       0.22      0.09      0.12        23
           4       0.46      0.68      0.55        34
           5       0.31      0.20      0.24        20
           6       0.95      0.77      0.85        52

    accuracy                           0.49       197
   macro avg       0.42      0.45      0.42       197
weighted avg       0.51      0.49      0.49       197



4. Decision Tree:

In [7]:
# Train the Tree classifier
tree_classifier = DecisionTreeClassifier() 
tree_classifier.fit(X_norm, y_train)

# Evaluate the classifier
y_pred = tree_classifier.predict(X_norm_test)
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print("Accuracy:", accuracy)
print("Classification Report:\n", report)

Accuracy: 0.7461928934010152
Classification Report:
               precision    recall  f1-score   support

           0       0.56      0.70      0.62        27
           1       0.50      0.56      0.53         9
           2       0.71      0.69      0.70        32
           3       0.83      0.43      0.57        23
           4       0.78      0.94      0.85        34
           5       0.85      0.55      0.67        20
           6       0.86      0.92      0.89        52

    accuracy                           0.75       197
   macro avg       0.73      0.69      0.69       197
weighted avg       0.76      0.75      0.74       197



5. Quadratic Discriminant Analysis:

In [8]:
# Train the QDA classifier
qda_classifier = QuadraticDiscriminantAnalysis()
qda_classifier.fit(X_norm, y_train)

# Evaluate the classifier
y_pred = qda_classifier.predict(X_norm_test)
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print("Accuracy:", accuracy)
print("Classification Report:\n", report)



Accuracy: 0.467005076142132
Classification Report:
               precision    recall  f1-score   support

           0       0.43      0.33      0.38        27
           1       0.50      0.33      0.40         9
           2       0.50      0.53      0.52        32
           3       0.38      0.48      0.42        23
           4       0.45      0.50      0.47        34
           5       0.27      0.50      0.35        20
           6       0.78      0.48      0.60        52

    accuracy                           0.47       197
   macro avg       0.47      0.45      0.45       197
weighted avg       0.52      0.47      0.48       197



6. Random Forest:

In [9]:
# Train the rf classifier
rf_classifier = RandomForestClassifier()
rf_classifier.fit(X_norm, y_train)

# Evaluate the classifier
y_pred = rf_classifier.predict(X_norm_test)
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print("Accuracy:", accuracy)
print("Classification Report:\n", report)

Accuracy: 0.9593908629441624
Classification Report:
               precision    recall  f1-score   support

           0       0.96      0.93      0.94        27
           1       1.00      0.78      0.88         9
           2       0.94      1.00      0.97        32
           3       1.00      0.83      0.90        23
           4       0.92      1.00      0.96        34
           5       0.91      1.00      0.95        20
           6       1.00      1.00      1.00        52

    accuracy                           0.96       197
   macro avg       0.96      0.93      0.94       197
weighted avg       0.96      0.96      0.96       197



## 3.2 - Train the on dataset with triangulation
As implemented in the corresponding class, we need to detect landmarks of each image and use the detected points to train our model and give the results: 
> Note: We have used standard scalar normalization that leads to better results


We first build a function to process landmarks for rach image and save it to save time:

981

In [None]:
def extract_landmark_features(images):
    landmarks_list = []
    feature_lengths = []
    landmarks_vector = []
    maxX = len(images)
    x = 0
    for image in images:
        features, lands  = extract_features(image)
        x +=1 
        print(x/maxX)
        landmarks_list.append(features.tolist())
        landmarks_vector.append(np.array(lands).flatten())
        feature_lengths.append(len(features))

    max_features = max(feature_lengths)

    
    for i in range(len(images)):
        current_length = len(landmarks_list[i])
        if current_length < max_features:
            padding = [0] * (max_features - current_length)
            landmarks_list[i].extend(padding)

    head = np.arange(max_features)
    data_combined = pd.DataFrame(landmarks_list, columns=head)
    data_combined.to_csv('features.csv', index=False)
    data_landmarks = pd.DataFrame(landmarks_vector)
    data_landmarks.to_csv('landmarks.csv',index=False)
    
    return np.array(landmarks_list)

landmark_features = extract_landmark_features()

### Pre-Process data:
we use previously stored data for training:

In [45]:
df = pd.read_csv('features.csv')
drop_list = [f"{i}" for i in range(776,904)]
df = df.drop(drop_list,axis=1)
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,766,767,768,769,770,771,772,773,774,775
0,19.0,40.0,3.0,41.0,10.0,32.0,10.666667,37.666668,3.0,41.0,...,59.666668,88.000000,43.0,85.0,33.0,75.0,43.0,77.0,39.666668,79.000000
1,50.0,60.0,57.0,65.0,53.0,66.0,53.333332,63.666668,57.0,65.0,...,63.333332,77.666664,62.0,85.0,66.0,97.0,54.0,90.0,60.666668,90.666664
2,61.0,43.0,59.0,64.0,51.0,54.0,57.000000,53.666668,59.0,64.0,...,58.666668,70.666664,64.0,70.0,70.0,73.0,68.0,73.0,67.333336,72.000000
3,25.0,41.0,9.0,41.0,16.0,35.0,16.666666,39.000000,9.0,41.0,...,61.333332,88.333336,66.0,80.0,58.0,85.0,58.0,78.0,60.666668,81.000000
4,27.0,43.0,23.0,32.0,31.0,41.0,27.000000,38.666668,23.0,32.0,...,46.666668,76.000000,57.0,86.0,50.0,87.0,50.0,81.0,52.333332,84.666664
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
976,22.0,39.0,5.0,42.0,12.0,32.0,13.000000,37.666668,5.0,42.0,...,62.333332,79.666664,54.0,77.0,54.0,78.0,49.0,77.0,52.333332,77.333336
977,15.0,33.0,8.0,47.0,7.0,35.0,10.000000,38.333332,8.0,47.0,...,62.333332,76.000000,61.0,73.0,66.0,75.0,64.0,75.0,63.666668,74.333336
978,27.0,39.0,32.0,42.0,27.0,42.0,28.666666,41.000000,32.0,42.0,...,59.333332,88.666664,41.0,75.0,45.0,77.0,40.0,79.0,42.000000,77.000000
979,38.0,41.0,33.0,41.0,33.0,39.0,34.666668,40.333332,33.0,41.0,...,65.333336,74.333336,61.0,79.0,61.0,72.0,66.0,76.0,62.666668,75.666664


In [44]:
landmark_features = df.values

# Initialize the scalar
scalar = StandardScaler()

# Split dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(landmark_features, labels, test_size=0.1, random_state=42)

X_train = np.array([img.flatten() for img in X_train])
X_test = np.array([img.flatten() for img in X_test])

X_norm = scalar.fit_transform(X_train)
X_norm_test = scalar.transform(X_test)

landmark_features

ValueError: Found input variables with inconsistent numbers of samples: [776, 981]

#### Training phase: 
1. SVM Model:

In [None]:
# Train the SVM classifier
svm_classifier_tri = SVC(kernel='rbf', C=1.0, gamma='scale')
svm_classifier_tri.fit(X_norm, y_train)

# Evaluate the classifier
y_pred = svm_classifier_tri.predict(X_norm_test)
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print("Accuracy:", accuracy)
print("Classification Report:\n", report)

Accuracy: 0.42424242424242425
Classification Report:
               precision    recall  f1-score   support

           0       0.75      0.20      0.32        15
           1       0.00      0.00      0.00         5
           2       0.62      0.72      0.67        18
           3       0.50      0.08      0.13        13
           4       0.38      0.59      0.47        17
           5       0.00      0.00      0.00        12
           6       0.33      0.79      0.46        19

    accuracy                           0.42        99
   macro avg       0.37      0.34      0.29        99
weighted avg       0.42      0.42      0.36        99



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


2. K-Nearest-Neighbors model:

In [None]:
# Train the KNN classifier
knn_classifier_tri = KNeighborsClassifier(n_neighbors=5)
knn_classifier_tri.fit(X_norm, y_train)

# Evaluate the classifier
y_pred = knn_classifier_tri.predict(X_norm_test)
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print("Accuracy:", accuracy)
print("Classification Report:\n", report)

Accuracy: 0.37373737373737376
Classification Report:
               precision    recall  f1-score   support

           0       0.25      0.27      0.26        15
           1       0.33      0.20      0.25         5
           2       0.52      0.72      0.60        18
           3       0.36      0.31      0.33        13
           4       0.36      0.47      0.41        17
           5       0.00      0.00      0.00        12
           6       0.33      0.37      0.35        19

    accuracy                           0.37        99
   macro avg       0.31      0.33      0.32        99
weighted avg       0.32      0.37      0.34        99



3. Naive Bayesian Classification:

In [None]:
# Train the GNB classifier
gnb_classifier_tri = GaussianNB()
gnb_classifier_tri.fit(X_norm, y_train)

# Evaluate the classifier
y_pred = gnb_classifier_tri.predict(X_norm_test)
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print("Accuracy:", accuracy)
print("Classification Report:\n", report)

Accuracy: 0.2727272727272727
Classification Report:
               precision    recall  f1-score   support

           0       0.26      0.67      0.37        15
           1       0.29      0.40      0.33         5
           2       0.00      0.00      0.00        18
           3       0.38      0.62      0.47        13
           4       0.00      0.00      0.00        17
           5       0.50      0.08      0.14        12
           6       0.20      0.32      0.24        19

    accuracy                           0.27        99
   macro avg       0.23      0.30      0.22        99
weighted avg       0.20      0.27      0.20        99



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


4. Decision Tree:

In [None]:
# Train the Tree classifier
tree_classifier_tri = DecisionTreeClassifier() 
tree_classifier_tri.fit(X_norm, y_train)

# Evaluate the classifier
y_pred = tree_classifier_tri.predict(X_norm_test)
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print("Accuracy:", accuracy)
print("Classification Report:\n", report)

Accuracy: 0.3838383838383838
Classification Report:
               precision    recall  f1-score   support

           0       0.36      0.33      0.34        15
           1       0.17      0.20      0.18         5
           2       0.54      0.72      0.62        18
           3       0.29      0.15      0.20        13
           4       0.53      0.47      0.50        17
           5       0.20      0.17      0.18        12
           6       0.30      0.37      0.33        19

    accuracy                           0.38        99
   macro avg       0.34      0.35      0.34        99
weighted avg       0.37      0.38      0.37        99



5. Quadratic Discriminant Analysis:

In [None]:
# Train the QDA classifier
qda_classifier_tri = QuadraticDiscriminantAnalysis()
qda_classifier_tri.fit(X_norm, y_train)

y_pred = qda_classifier_tri.predict(X_norm)
accuracy = accuracy_score(y_train, y_pred)
print("train Accuracy:", accuracy)

# Evaluate the classifier
y_pred = qda_classifier_tri.predict(X_norm_test)
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print("Accuracy:", accuracy)
print("Classification Report:\n", report)



train Accuracy: 1.0
Accuracy: 0.21212121212121213
Classification Report:
               precision    recall  f1-score   support

           0       0.11      0.07      0.08        15
           1       0.00      0.00      0.00         5
           2       0.30      0.50      0.38        18
           3       0.00      0.00      0.00        13
           4       0.36      0.29      0.32        17
           5       0.21      0.25      0.23        12
           6       0.19      0.16      0.17        19

    accuracy                           0.21        99
   macro avg       0.17      0.18      0.17        99
weighted avg       0.19      0.21      0.20        99



6. Random Forest:

In [None]:
# Train the rf classifier
rf_classifier_tri = RandomForestClassifier()
rf_classifier_tri.fit(X_norm, y_train)

y_pred = rf_classifier_tri.predict(X_norm)
accuracy = accuracy_score(y_train, y_pred)
print("train Accuracy:", accuracy)

# Evaluate the classifier
y_pred = rf_classifier_tri.predict(X_norm_test)
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print("test Accuracy:", accuracy)
print("Classification Report:\n", report)

train Accuracy: 1.0
test Accuracy: 0.47474747474747475
Classification Report:
               precision    recall  f1-score   support

           0       0.29      0.13      0.18        15
           1       0.00      0.00      0.00         5
           2       0.65      0.72      0.68        18
           3       0.67      0.15      0.25        13
           4       0.44      0.88      0.59        17
           5       1.00      0.08      0.15        12
           6       0.41      0.74      0.53        19

    accuracy                           0.47        99
   macro avg       0.49      0.39      0.34        99
weighted avg       0.53      0.47      0.41        99



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


So based on the results:
- some models
- some other


Now we introduce our own Neural Network model to outperform others:

### Neural Network:

In [None]:
X_train, X_test, y_train, y_test = train_test_split(landmark_features, labels, test_size=0.1, random_state=42)

X_train.shape

(882, 904)

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Conv2D, Flatten
from tensorflow.keras.regularizers import l2
from tensorflow.keras.callbacks import EarlyStopping


X_train, X_test, y_train, y_test = train_test_split(landmark_features, labels, test_size=0.1, random_state=42)


X_train = np.array(X_train)
X_test = np.array(X_test)
y_train = np.array(y_train)
y_test = np.array(y_test)

model = Sequential([
    Conv2D(32, (3, 3), activation='relu', kernel_regularizer=l2(0.1), input_shape=(982, 136, 1)),
    Conv2D(64, (3, 3), activation='relu', kernel_regularizer=l2(0.1)),
    Flatten(),
    Dense(128, activation='relu', kernel_regularizer=l2(0.01)),
    Dropout(0.4),
    Dense(64, activation='relu', kernel_regularizer=l2(0.01)),
    Dense(7, activation='softmax')
])

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Early stopping monitor
early_stopping_monitor = EarlyStopping(
    monitor='val_accuracy',
    patience=15,
    restore_best_weights=True
)

# Train the model with early stopping
model.fit(X_train, y_train, epochs=50, validation_split=0.1, callbacks=[early_stopping_monitor])

# Evaluate on the test set
test_loss, test_accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {test_accuracy * 100:.2f}%")


ModuleNotFoundError: No module named 'tensorflow'

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class SimpleNet(nn.Module):
    def __init__(self):
        super(SimpleNet, self).__init__()
        self.fc1 = nn.Linear(10000,256)
        self.fc2 = nn.Linear(256,128)
        self.fc3 = nn.Linear(128,64)
        self.fc4 = nn.Linear(64,32)
        self.fc5 = nn.Linear(32,1)
        self.fc6 = nn.Softmax(0)

    def forward(self,x):
        x = self.fc1(x)
        x = F.relu(x)
        x = self.fc2(x)
        x = F.relu(x)
        x = self.fc3(x)
        x = F.relu(x)
        x = F.dropout(x,0.7)
        x = self.fc4(x)
        x = F.relu(x)
        x = self.fc5(x)
        x = F.relu(x)
        return self.fc6(x)
        

