# Simple LFW Dataset Analysis

This notebook demonstrates basic PCA analysis on the LFW dataset.

In [None]:
import sys
sys.path.append('../src')

import numpy as np
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from utils import load_dataset, plot_eigenfaces

%matplotlib inline

In [None]:
# Settings
IMAGE_SIZE = (50, 50)
N_COMPONENTS = 50
MAX_PEOPLE = 10

# Load dataset
print("Loading dataset...")
face_data, face_names = load_dataset(
    '../lfw-deepfunneled',
    '../peopleDevTrain',
    max_people=MAX_PEOPLE,
    image_size=IMAGE_SIZE
)
print(f"Loaded {len(face_data)} images")

In [None]:
# Perform PCA
pca = PCA(n_components=N_COMPONENTS)
face_pca = pca.fit_transform(face_data)

# Plot explained variance
plt.figure(figsize=(10, 4))
plt.plot(np.cumsum(pca.explained_variance_ratio_))
plt.xlabel('Number of Components')
plt.ylabel('Cumulative Explained Variance')
plt.title('Explained Variance vs Components')
plt.grid(True)
plt.show()

# Show eigenfaces
eigenfaces_grid = plot_eigenfaces(pca.components_, IMAGE_SIZE)
plt.figure(figsize=(10, 10))
plt.imshow(eigenfaces_grid, cmap='gray')
plt.axis('off')
plt.title('First 4 Eigenfaces')
plt.show()