In [31]:
import os
import numpy as np
import cv2
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA
from sklearn.preprocessing import LabelEncoder
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score
import warnings
from sklearn.exceptions import ConvergenceWarning

def load_face_database(path, target_size=(100, 100)):
    images = []
    labels = []
    label_names = sorted(os.listdir(path))
    print(f"Found labels: {label_names}")
    for label in label_names:
        label_path = os.path.join(path, label)
        if not os.path.isdir(label_path):
            continue
        for filename in os.listdir(label_path):
            if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.pgm')):
                img_path = os.path.join(label_path, filename)
                img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
                if img is not None:
                    # Resize to fixed size (important!)
                    img_resized = cv2.resize(img, target_size)
                    images.append(img_resized)
                    labels.append(label)
                else:
                    print(f"Warning: failed to load image {img_path}")
    # Check all images have the same shape before converting to np.array
    unique_shapes = set(img.shape for img in images)
    print(f"Unique image shapes loaded: {unique_shapes}")
    images = np.array(images)
    labels = np.array(labels)
    print(f"Loaded {len(images)} images with labels")
    return images, labels

def display_sample_faces(images, labels, num_samples=10):
    print(f"\nDisplaying {num_samples} sample face images...")
    for i in range(min(num_samples, len(images))):
        img = images[i]
        label = labels[i]
        cv2.imshow(f"Face - {label}", img)
        cv2.waitKey(1000)  # Display each face for 1000 ms (1 second)
        cv2.destroyAllWindows()

def evaluate_model_with_different_k(face_db, labels, k_values, test_size=0.4, random_state=42):
    # Flatten images for PCA input
    X = np.array([img.flatten() for img in face_db])
    # Encode labels as integers
    le = LabelEncoder()
    y = le.fit_transform(labels)
    
    # Split dataset into train/test
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=test_size, random_state=random_state, stratify=y)
    
    print(f"\nTraining set size: {X_train.shape[0]} samples")
    print(f"Testing set size: {X_test.shape[0]} samples")
    
    accuracies = []
    
    # Suppress convergence warnings for cleaner output
    warnings.filterwarnings("ignore", category=ConvergenceWarning)
    
    for k in k_values:
        print(f"\nEvaluating with PCA components: {k}")
        
        # Apply PCA with k components
        pca = PCA(n_components=k, whiten=True, random_state=random_state)
        X_train_pca = pca.fit_transform(X_train)
        X_test_pca = pca.transform(X_test)
        
        # Train a simple ANN (MLPClassifier) on PCA features
        ann = MLPClassifier(hidden_layer_sizes=(100,), max_iter=1000, early_stopping=True, random_state=random_state)
        ann.fit(X_train_pca, y_train)
        
        # Predict on test set
        y_pred = ann.predict(X_test_pca)
        acc = accuracy_score(y_test, y_pred)
        print(f"Accuracy: {acc:.4f}")
        accuracies.append(acc)
    
    # Return all results and models of last run
    return k_values, accuracies, pca, ann, X_test, y_test, le

# ---- Main Execution ----

dataset_path = 'dataset/att_faces'  

face_db, labels = load_face_database(dataset_path)

if len(face_db) == 0:
    print("No images loaded! Please check your dataset path and contents.")
else:
    # Show some sample faces with labels
    display_sample_faces(face_db, labels, num_samples=10)
    
    # Evaluate model with different PCA component values
    k_values = list(range(20, 81, 10))  # 20, 30, ..., 80
    k_values, accuracies, pca_model, ann_model, X_test, y_test, label_encoder = evaluate_model_with_different_k(face_db, labels, k_values)
    
    print("\nSummary of accuracies for different PCA component values:")
    for k, acc in zip(k_values, accuracies):
        print(f"PCA components: {k} - Accuracy: {acc:.4f}")


Found labels: ['.DS_Store', 'Aamir', 'Ajay', 'Akshay', 'Alia', 'Amitabh', 'Deepika', 'Disha', 'Farhan', 'Ileana']
Unique image shapes loaded: {(100, 100)}
Loaded 450 images with labels

Displaying 10 sample face images...


2025-05-23 16:00:20.522 python3.10[45348:1145419] not in fullscreen state



Training set size: 270 samples
Testing set size: 180 samples

Evaluating with PCA components: 20
Accuracy: 0.4111

Evaluating with PCA components: 30
Accuracy: 0.4000

Evaluating with PCA components: 40
Accuracy: 0.1722

Evaluating with PCA components: 50
Accuracy: 0.4611

Evaluating with PCA components: 60
Accuracy: 0.6222

Evaluating with PCA components: 70
Accuracy: 0.2889

Evaluating with PCA components: 80
Accuracy: 0.4889

Summary of accuracies for different PCA component values:
PCA components: 20 - Accuracy: 0.4111
PCA components: 30 - Accuracy: 0.4000
PCA components: 40 - Accuracy: 0.1722
PCA components: 50 - Accuracy: 0.4611
PCA components: 60 - Accuracy: 0.6222
PCA components: 70 - Accuracy: 0.2889
PCA components: 80 - Accuracy: 0.4889
