# Pattern Recognition Course
## Lab 2: Face Recognition

---

## Import Libraries

In [None]:
import kagglehub
import os
from PIL import Image
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
from scipy.stats import mode
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, f1_score
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression, LogisticRegressionCV
from math import log2

RANDOM_SEED = 42
np.random.seed(42)

## Prepare Dataset

- ### Read Images

In [None]:
# Download latest version
path = kagglehub.dataset_download("kasikrit/att-database-of-faces")
images = []
labels = []

# Loop through each subject folder (s1 to s40)
for subject in sorted(os.listdir(path)):
    subject_path = os.path.join(path, subject)
    
    # Skip if not a directory
    if not os.path.isdir(subject_path):
        continue

    # Get the subject number from folder name (s1 -> 0, s2 -> 1, etc.)
    subject_num = int(subject[1:]) - 1
    
    # Loop through each image in the subject folder
    for image_file in sorted(os.listdir(subject_path)):
        if image_file.endswith('.pgm'):  # AT&T uses PGM format
            image_path = os.path.join(subject_path, image_file)
            
            # Open and convert to numpy array
            img = Image.open(image_path)
            img_array = np.array(img)
            
            images.append(img_array)
            labels.append(subject_num)

images, labels = np.array(images), np.array(labels)

print(f"Dataset loaded with {len(images)} images")
print(f"Image shape: {images[0].shape}")
print(f"Number of classes: {len(np.unique(labels))}")

- ### Generate the Data Matrix

In [None]:
# Calculating number of pixels (features) for each image
row_pixels, col_pixels = images[0].shape
pixels_number = row_pixels * col_pixels

# Initialize data matrix and target vector
X = np.ndarray((len(images), pixels_number))
y = np.array(labels)

# Fill the data matrix
for i, img in enumerate(images):
    X[i] = img.flatten()

print(f'Data Matrix Shape{X.shape}')
print(f'Target Vector Length {y.shape}')

- ### Generate Training and Test Sets

In [None]:
# Even rows for testing, Odds for training
X_train, X_test = X[1::2], X[::2]
y_train, y_test = y[1::2], y[::2]
print(f'Training Set Matrix Shape {X_train.shape}, Training Labels Length {y_train.shape}')
print(f'Test Set Matrix Shape {X_test.shape}, Test Labels Length {y_test.shape}')

## PCA Implementation 