## 1 - Importing related libraries and functions
In this section we import necessary libraries and functions:


In [140]:
import os
import cv2
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score
from sklearn.preprocessing import StandardScaler


from delaunay import extract_features

## 2 - Load and preprocess the dataset
In this section we first import our designated dataset which is `CK+` dataset, read ,and load it in an array


In [141]:
dataset_dir = "dataset"

label_map = {
    "anger": 0,
    "contempt": 1,
    "disgust": 2,
    "fear": 3,
    "happy": 4,
    "sadness": 5,
    "surprise": 6
}
# Load and preprocess the dataset
def load_ckplus_dataset(dataset_dir):
    images = []
    labels = []
    for subdir in os.listdir(dataset_dir):
        if subdir in label_map:  # Check if directory name is a valid label
            label = label_map[subdir]  # Get integer label from the mapping
            for filename in os.listdir(os.path.join(dataset_dir, subdir)):
                if filename.endswith(".png"):  # Load only image files (assuming images are in PNG format)
                    img_path = os.path.join(dataset_dir, subdir, filename)
                    img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)  # Convert to grayscale
                    img = cv2.resize(img, (100, 100))  # Resize image
                    images.append(img)
                    labels.append(label)

    return images, labels

images, labels = load_ckplus_dataset(dataset_dir)

## 3 - Compare the results from raw image classification and delaunay triangulation method
### 3.1 - Train the raw dataset
As we know the dataset is in grayscale, we need to get the brightness level of each pixel and train our models on each image   
then we will report the results:
> Note: We have used standard scalar normalization that leads to better results


#### Pre-Process data:

In [142]:
# Initialize the scalar
scalar = StandardScaler()

# Split dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(images, labels, test_size=0.2, random_state=42)

X_train = np.array([img.flatten() for img in X_train])
X_test = np.array([img.flatten() for img in X_test])

X_norm = scalar.fit_transform(X_train)
X_norm_test = scalar.fit_transform(X_test)




#### Training phase: 
1. SVM Model:

In [143]:
# Train the SVM classifier
svm_classifier = SVC(kernel='rbf', C=1.0, gamma='scale')
svm_classifier.fit(X_norm, y_train)

# Evaluate the classifier
y_pred = svm_classifier.predict(X_norm_test)
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print("Accuracy:", accuracy)
print("Classification Report:\n", report)

Accuracy: 0.868020304568528
Classification Report:
               precision    recall  f1-score   support

           0       0.74      0.74      0.74        27
           1       0.55      0.67      0.60         9
           2       0.85      0.91      0.88        32
           3       0.94      0.70      0.80        23
           4       0.89      1.00      0.94        34
           5       0.78      0.70      0.74        20
           6       1.00      1.00      1.00        52

    accuracy                           0.87       197
   macro avg       0.82      0.82      0.81       197
weighted avg       0.87      0.87      0.87       197



2. K-Nearest-Neighbors model:

In [144]:
# Train the KNN classifier
knn_classifier = KNeighborsClassifier(n_neighbors=5)
knn_classifier.fit(X_norm, y_train)

# Evaluate the classifier
y_pred = knn_classifier.predict(X_norm_test)
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print("Accuracy:", accuracy)
print("Classification Report:\n", report)

Accuracy: 0.36548223350253806
Classification Report:
               precision    recall  f1-score   support

           0       0.26      0.44      0.32        27
           1       0.46      0.67      0.55         9
           2       0.20      0.28      0.23        32
           3       0.40      0.17      0.24        23
           4       0.31      0.32      0.31        34
           5       0.17      0.10      0.12        20
           6       0.82      0.54      0.65        52

    accuracy                           0.37       197
   macro avg       0.37      0.36      0.35       197
weighted avg       0.42      0.37      0.37       197



3. Naive Bayesian Classification:

In [145]:
# Train the GNB classifier
gnb_classifier = GaussianNB()
gnb_classifier.fit(X_norm, y_train)

# Evaluate the classifier
y_pred = gnb_classifier.predict(X_norm_test)
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print("Accuracy:", accuracy)
print("Classification Report:\n", report)

Accuracy: 0.49238578680203043
Classification Report:
               precision    recall  f1-score   support

           0       0.24      0.30      0.27        27
           1       0.32      0.67      0.43         9
           2       0.45      0.44      0.44        32
           3       0.22      0.09      0.12        23
           4       0.46      0.68      0.55        34
           5       0.31      0.20      0.24        20
           6       0.95      0.77      0.85        52

    accuracy                           0.49       197
   macro avg       0.42      0.45      0.42       197
weighted avg       0.51      0.49      0.49       197



4. Decision Tree:

In [146]:
# Train the Tree classifier
tree_classifier = DecisionTreeClassifier() 
tree_classifier.fit(X_norm, y_train)

# Evaluate the classifier
y_pred = tree_classifier.predict(X_norm_test)
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print("Accuracy:", accuracy)
print("Classification Report:\n", report)

Accuracy: 0.8121827411167513
Classification Report:
               precision    recall  f1-score   support

           0       0.69      0.81      0.75        27
           1       0.50      0.44      0.47         9
           2       0.81      0.81      0.81        32
           3       0.88      0.65      0.75        23
           4       0.78      0.94      0.85        34
           5       0.93      0.65      0.76        20
           6       0.91      0.92      0.91        52

    accuracy                           0.81       197
   macro avg       0.79      0.75      0.76       197
weighted avg       0.82      0.81      0.81       197



5. Quadratic Discriminant Analysis:

In [147]:
# Train the QDA classifier
qda_classifier = QuadraticDiscriminantAnalysis()
qda_classifier.fit(X_norm, y_train)

# Evaluate the classifier
y_pred = qda_classifier.predict(X_norm_test)
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print("Accuracy:", accuracy)
print("Classification Report:\n", report)



Accuracy: 0.467005076142132
Classification Report:
               precision    recall  f1-score   support

           0       0.43      0.33      0.38        27
           1       0.50      0.33      0.40         9
           2       0.50      0.53      0.52        32
           3       0.38      0.48      0.42        23
           4       0.45      0.50      0.47        34
           5       0.27      0.50      0.35        20
           6       0.78      0.48      0.60        52

    accuracy                           0.47       197
   macro avg       0.47      0.45      0.45       197
weighted avg       0.52      0.47      0.48       197



6. Random Forest:

In [148]:
# Train the rf classifier
rf_classifier = RandomForestClassifier()
rf_classifier.fit(X_norm, y_train)

# Evaluate the classifier
y_pred = rf_classifier.predict(X_norm_test)
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print("Accuracy:", accuracy)
print("Classification Report:\n", report)

Accuracy: 0.9593908629441624
Classification Report:
               precision    recall  f1-score   support

           0       0.96      0.89      0.92        27
           1       1.00      0.78      0.88         9
           2       0.91      1.00      0.96        32
           3       1.00      0.87      0.93        23
           4       0.94      1.00      0.97        34
           5       0.91      1.00      0.95        20
           6       1.00      1.00      1.00        52

    accuracy                           0.96       197
   macro avg       0.96      0.93      0.94       197
weighted avg       0.96      0.96      0.96       197



## 3.2 - Train the on dataset with triangulation
As implemented in the corresponding class, we need to detect landmarks of each image and use the detected points to train our model and give the results: 
> Note: We have used standard scalar normalization that leads to better results


We first build a function to process landmarks for rach image and save it to save time:

In [149]:
# def extract_landmark_features(images):
#     landmarks_list = []
#     feature_lengths = []
#     landmarks_vector = []
#     maxX = len(images)
#     x = 0
#     for image in images:
#         features, lands  = extract_features(image)
#         x +=1 
#         print(x/maxX)
#         landmarks_list.append(features.tolist())
#         landmarks_vector.append(np.array(lands).flatten())
#         feature_lengths.append(len(features))

#     max_features = max(feature_lengths)

    
#     for i in range(len(images)):
#         current_length = len(landmarks_list[i])
#         if current_length < max_features:
#             padding = [0] * (max_features - current_length)
#             landmarks_list[i].extend(padding)

#     head = np.arange(max_features)
#     data_combined = pd.DataFrame(landmarks_list, columns=head)
#     data_combined.to_csv('features.csv', index=False)
#     data_landmarks = pd.DataFrame(landmarks_vector)
#     data_landmarks.to_csv('landmarks.csv',index=False)
    
#     return np.array(landmarks_list)

# landmark_features = extract_landmark_features()

### Pre-Process data:
we use previously stored data for training:

In [150]:
df = pd.read_csv('landmarks.csv')
# mask = [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 21, 22, 23, 25, 27, 29, 30, 31, 35, 36, 39, 42, 45, 48, 51, 54, 57]
# landmarks = [landmarks[i] for i in mask]

In [179]:
landmark_features = df.values

# Initialize the scalar
scalar = StandardScaler()

# Split dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(landmark_features, labels, test_size=0.1, random_state=42)


X_norm = scalar.fit_transform(X_train)
X_norm_test = scalar.transform(X_test)

X_norm 

array([[12, 44, 14, ..., 79, 49, 79],
       [ 9, 43,  9, ..., 88, 44, 88],
       [ 6, 42,  6, ..., 79, 45, 78],
       ...,
       [ 6, 39,  7, ..., 76, 47, 75],
       [ 9, 47, 10, ..., 91, 46, 90],
       [ 7, 46,  8, ..., 83, 43, 83]])

#### Training phase: 
1. SVM Model:

In [183]:
# Train the SVM classifier
svm_classifier_tri = SVC(kernel='rbf', C=1.0, gamma='scale')
svm_classifier_tri.fit(X_norm, y_train)

# Evaluate the classifier
y_pred = svm_classifier_tri.predict(X_norm_test)
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)


y_train2 = svm_classifier_tri.predict(X_norm)
print("Acccccc ", accuracy_score(y_train,y_train2))
print("Accuracy:", accuracy)
print("Classification Report:\n", report)

Acccccc  0.6417233560090703
Accuracy: 0.5353535353535354
Classification Report:
               precision    recall  f1-score   support

           0       0.00      0.00      0.00        15
           1       0.00      0.00      0.00         5
           2       0.40      1.00      0.57        18
           3       0.00      0.00      0.00        13
           4       0.64      0.94      0.76        17
           5       0.00      0.00      0.00        12
           6       0.83      1.00      0.90        19

    accuracy                           0.54        99
   macro avg       0.27      0.42      0.32        99
weighted avg       0.34      0.54      0.41        99



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


2. K-Nearest-Neighbors model:

In [164]:
# Train the KNN classifier
knn_classifier_tri = KNeighborsClassifier(n_neighbors=5)
knn_classifier_tri.fit(X_norm, y_train)

# Evaluate the classifier
y_pred = knn_classifier_tri.predict(X_norm_test)
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print("Accuracy:", accuracy)
print("Classification Report:\n", report)

Accuracy: 0.6666666666666666
Classification Report:
               precision    recall  f1-score   support

           0       0.50      0.60      0.55        15
           1       0.60      0.60      0.60         5
           2       0.53      0.56      0.54        18
           3       0.78      0.54      0.64        13
           4       0.75      0.88      0.81        17
           5       0.50      0.33      0.40        12
           6       0.90      0.95      0.92        19

    accuracy                           0.67        99
   macro avg       0.65      0.64      0.64        99
weighted avg       0.67      0.67      0.66        99



3. Naive Bayesian Classification:

In [154]:
# Train the GNB classifier
gnb_classifier_tri = GaussianNB()
gnb_classifier_tri.fit(X_norm, y_train)

# Evaluate the classifier
y_pred = gnb_classifier_tri.predict(X_norm_test)
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print("Accuracy:", accuracy)
print("Classification Report:\n", report)

Accuracy: 0.7474747474747475
Classification Report:
               precision    recall  f1-score   support

           0       0.73      0.53      0.62        15
           1       0.36      0.80      0.50         5
           2       0.64      0.89      0.74        18
           3       0.73      0.62      0.67        13
           4       0.88      0.82      0.85        17
           5       0.86      0.50      0.63        12
           6       1.00      0.95      0.97        19

    accuracy                           0.75        99
   macro avg       0.74      0.73      0.71        99
weighted avg       0.79      0.75      0.75        99



4. Decision Tree:

In [155]:
# Train the Tree classifier
tree_classifier_tri = DecisionTreeClassifier() 
tree_classifier_tri.fit(X_norm, y_train)

# Evaluate the classifier
y_pred = tree_classifier_tri.predict(X_norm_test)
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print("Accuracy:", accuracy)
print("Classification Report:\n", report)

Accuracy: 0.8181818181818182
Classification Report:
               precision    recall  f1-score   support

           0       0.75      0.60      0.67        15
           1       0.50      0.80      0.62         5
           2       0.86      1.00      0.92        18
           3       1.00      0.77      0.87        13
           4       0.93      0.82      0.88        17
           5       0.64      0.58      0.61        12
           6       0.86      1.00      0.93        19

    accuracy                           0.82        99
   macro avg       0.79      0.80      0.78        99
weighted avg       0.83      0.82      0.82        99



5. Quadratic Discriminant Analysis:

In [160]:
# Train the QDA classifier
qda_classifier_tri = QuadraticDiscriminantAnalysis(reg_param=sklearn.metrics)
qda_classifier_tri.fit(X_norm, y_train)

y_pred = qda_classifier_tri.predict(X_norm)
accuracy = accuracy_score(y_train, y_pred)
print("train Accuracy:", accuracy)

# Evaluate the classifier
y_pred = qda_classifier_tri.predict(X_norm_test)
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print("Accuracy:", accuracy)
print("Classification Report:\n", report)

train Accuracy: 0.9943310657596371
Accuracy: 0.3434343434343434
Classification Report:
               precision    recall  f1-score   support

           0       0.00      0.00      0.00        15
           1       0.00      0.00      0.00         5
           2       1.00      0.06      0.11        18
           3       0.00      0.00      0.00        13
           4       0.78      0.82      0.80        17
           5       0.00      0.00      0.00        12
           6       0.24      1.00      0.38        19

    accuracy                           0.34        99
   macro avg       0.29      0.27      0.18        99
weighted avg       0.36      0.34      0.23        99



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


6. Random Forest:

In [157]:
# Train the rf classifier
rf_classifier_tri = RandomForestClassifier()
rf_classifier_tri.fit(X_norm, y_train)

y_pred = rf_classifier_tri.predict(X_norm)
accuracy = accuracy_score(y_train, y_pred)
print("train Accuracy:", accuracy)

# Evaluate the classifier
y_pred = rf_classifier_tri.predict(X_norm_test)
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print("test Accuracy:", accuracy)
print("Classification Report:\n", report)

train Accuracy: 1.0
test Accuracy: 0.9494949494949495
Classification Report:
               precision    recall  f1-score   support

           0       1.00      0.73      0.85        15
           1       1.00      0.80      0.89         5
           2       0.86      1.00      0.92        18
           3       1.00      1.00      1.00        13
           4       1.00      1.00      1.00        17
           5       0.86      1.00      0.92        12
           6       1.00      1.00      1.00        19

    accuracy                           0.95        99
   macro avg       0.96      0.93      0.94        99
weighted avg       0.96      0.95      0.95        99



So based on the results:
- some models
- some other


Now we introduce our own Neural Network model to outperform others:

### Neural Network:

In [158]:
X_train, X_test, y_train, y_test = train_test_split(landmark_features, labels, test_size=0.1, random_state=42)

X_train.shape

(882, 136)