# **Importing libraries**

In [1]:
!pip install opencv-python



In [20]:
from sklearn.decomposition import KernelPCA
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.svm import SVC
from sklearn.pipeline import Pipeline
import cv2
import os
from PIL import Image
from collections import defaultdict
import math
import random
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix


In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# LOAD DATASET

In [6]:
def load_data(data_dir):
    faces = defaultdict(list)
    for emotion in os.listdir(data_dir):
        emotion_dir = os.path.join(data_dir, emotion)
        if os.path.isdir(emotion_dir):
            for image_file in os.listdir(emotion_dir):
                image_path = os.path.join(emotion_dir, image_file)
                image = Image.open(image_path).convert("L")
                image = np.array(image).flatten()
                faces[emotion].append(image)
    return faces

In [17]:
faces = load_data("/content/drive/MyDrive/Project  Face recognition/Demo/ATM images")

In [9]:
def show_label_images(faces, label):
    images = faces[label]
    num_images = len(images)
    num_cols = math.ceil(math.sqrt(num_images))
    num_rows = math.ceil(num_images / num_cols)

    fig, axes = plt.subplots(num_rows, num_cols, figsize=(num_cols * 2, num_rows * 2))
    axes = axes.flatten() if num_images > 1 else [axes]

    for ax, img in zip(axes, images):
        ax.imshow(img.reshape((112, 92)), cmap='gray')
        ax.axis('off')

    for ax in axes[num_images:]:
        ax.axis('off')

    plt.tight_layout()

In [10]:
def get_person_name(image_path):
    parts = image_path.replace("\\", "/").split("/")
    person_label = parts[-2]
    img_num = int(parts[-1].split('.')[0])
    return person_label, img_num

In [11]:
def split_train_test(faces, train_ratio):
    training_set = defaultdict(list)
    testing_set = defaultdict(list)

    for label, images in faces.items():
        random.shuffle(images)
        num_train = int(len(images) * train_ratio)

        for img_index, image in enumerate(images):
            person_label, img_num = get_person_name(label + '/' + str(img_index+1) + '.jpg')
            if img_index < num_train:
                training_set[person_label].append(image)
            else:
                testing_set[person_label].append(image)

    return training_set, testing_set

In [18]:
training_set, testing_set = split_train_test(faces, 0.9)

In [12]:
def prepare_data(dataset):
    X = []
    y = []
    for person_label, images in dataset.items():
        for image in images:
            X.append(image)
            y.append(person_label)
    X = np.array(X)
    y = np.array(y)
    return X, y

In [19]:
X_train, y_train = prepare_data(training_set)
X_test, y_test = prepare_data(training_set)

In [21]:
pipeline = Pipeline([
    ('kpca', KernelPCA(fit_inverse_transform=True)),
    ('svc', SVC())
])

param_grid = {
    'kpca__kernel': ['rbf', 'sigmoid', 'poly', 'linear'],
    'kpca__gamma': np.logspace(-2, 2, 5),
    'kpca__n_components': [10, 50, 60, 100, 150],
    'svc__C': [0.1, 1, 10, 100, 1000],
    'svc__gamma': ['scale', 'auto']
}

grid_search = GridSearchCV(pipeline, param_grid, cv=5)
grid_search.fit(X_train, y_train)

print("Best parameters:", grid_search.best_params_)
print("Best cross-validation score:", grid_search.best_score_)


250 fits failed out of a total of 5000.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
250 fits failed with the following error:
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/sklearn/model_selection/_validation.py", line 686, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/usr/local/lib/python3.10/dist-packages/sklearn/pipeline.py", line 401, in fit
    Xt = self._fit(X, y, **fit_params_steps)
  File "/usr/local/lib/python3.10/dist-packages/sklearn/pipeline.py", line 359, in _fit
    X, fitted_transformer = fit_transform_one_cached(
  File "/usr/local/lib/python3.10/dist-packages/joblib/memory.py", line 353, in __call__
    return self.func(*args, **kwargs)
  File "/usr/l

Best parameters: {'kpca__gamma': 0.01, 'kpca__kernel': 'linear', 'kpca__n_components': 100, 'svc__C': 10, 'svc__gamma': 'scale'}
Best cross-validation score: 0.9777777777777779


In [22]:
best_model = grid_search.best_estimator_
y_pred = best_model.predict(X_test)

In [23]:
print("Accuracy on test set:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

Accuracy on test set: 1.0
              precision    recall  f1-score   support

          s1       1.00      1.00      1.00         9
         s10       1.00      1.00      1.00         9
         s11       1.00      1.00      1.00         9
         s12       1.00      1.00      1.00         9
         s13       1.00      1.00      1.00         9
         s14       1.00      1.00      1.00         9
         s15       1.00      1.00      1.00         9
         s16       1.00      1.00      1.00         9
         s17       1.00      1.00      1.00         9
         s18       1.00      1.00      1.00         9
         s19       1.00      1.00      1.00         9
          s2       1.00      1.00      1.00         9
         s20       1.00      1.00      1.00         9
         s21       1.00      1.00      1.00         9
         s22       1.00      1.00      1.00         9
         s23       1.00      1.00      1.00         9
         s24       1.00      1.00      1.00         9
 

In [24]:
misclassified_indices = np.where(y_test != y_pred)[0]

misclassified_images = X_test[misclassified_indices]
misclassified_true_labels = y_test[misclassified_indices]
misclassified_pred_labels = y_pred[misclassified_indices]

num_misclassified = len(misclassified_indices)
num_misclassified

0

In [25]:
num_images_to_show = 10

for i, (image, true_label, pred_label) in enumerate(zip(misclassified_images, misclassified_true_labels, misclassified_pred_labels)):
    if i >= num_images_to_show:
        break

    plt.subplot(2, num_images_to_show // 2, i + 1)
    plt.imshow(image.reshape(112, 92), cmap='gray')
    plt.title(f'True: {true_label}\nPredicted: {pred_label}')
    plt.axis('off')

plt.tight_layout()
plt.show()


<Figure size 640x480 with 0 Axes>