In [3]:
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.pipeline import Pipeline
from sklearn.svm import SVC
from sklearn.decomposition import PCA
from sklearn.metrics import classification_report
import pickle

def load_image(image_path):
    print(f"Attempting to load image: {image_path}")
    image = cv2.imread(image_path)
    if image is None:
        print(f'Error: Could not load image at {image_path}')
        return None, None
    print(f"Successfully loaded image with shape: {image.shape}")
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    return image, gray

# Load dataset
dataset_dir = 'content/images'  # Changed to relative path
print(f"Looking for images in directory: {os.path.abspath(dataset_dir)}")

if not os.path.exists(dataset_dir):
    raise FileNotFoundError(f"Dataset directory not found at: {dataset_dir}")

images = []
labels = []

print("Loading images from dataset...")
for root, dirs, files in os.walk(dataset_dir):
    print(f"\nProcessing directory: {root}")
    print(f"Found {len(files)} files")
    if len(files) == 0:
        print("Skipping empty directory")
        continue
    person_name = os.path.basename(root)
    print(f"Processing images for {person_name}...")
    for f in files:
        if not f.lower().endswith(('.png', '.jpg', '.jpeg')):
            print(f"Skipping non-image file: {f}")
            continue
        image_path = os.path.join(root, f)
        print(f"Processing image: {f}")
        image, gray = load_image(image_path)
        if image is None:
            continue
        images.append(gray)  # Store grayscale images directly
        labels.append(person_name)
        print(f"Added image for {person_name}")

if len(images) == 0:
    raise ValueError("No images found in the dataset directory. Please check your image paths and formats.")

print(f"\nSuccessfully loaded {len(images)} images")
print(f"Unique labels: {set(labels)}")

# Initialize face detector
print("\nInitializing face detector...")
face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
if face_cascade.empty():
    raise ValueError("Could not load face cascade classifier. Please check your OpenCV installation.")
print("Face detector initialized successfully")

def detect_faces(image_gray, scale_factor=1.05, min_neighbors=3, min_size=(20, 20)):
    # More lenient parameters for face detection
    faces = face_cascade.detectMultiScale(
        image_gray,
        scaleFactor=scale_factor,
        minNeighbors=min_neighbors,
        minSize=min_size
    )
    return faces

def crop_faces(image_gray, faces, return_all=False):
    cropped_faces = []
    selected_faces = []
    if len(faces) > 0:
        if return_all:
            for x, y, w, h in faces:
                selected_faces.append((x, y, w, h))
                cropped_faces.append(image_gray[y:y+h, x:x+w])
        else:
            x, y, w, h = max(faces, key=lambda rect: rect[2] * rect[3])
            selected_faces.append((x, y, w, h))
            cropped_faces.append(image_gray[y:y+h, x:x+w])
    return cropped_faces, selected_faces

# Preprocess faces
face_size = (128, 128)
def resize_and_flatten(face):
    face_resized = cv2.resize(face, face_size)
    face_flattened = face_resized.flatten()
    return face_flattened

# Prepare training data
print("\nPreprocessing faces...")
X = []
y = []
for i, (image, label) in enumerate(zip(images, labels)):
    print(f"\nProcessing image {i+1}/{len(images)} for {label}")
    faces = detect_faces(image)
    print(f"Found {len(faces)} faces in image")
    cropped_faces, _ = crop_faces(image, faces)
    if len(cropped_faces) > 0:
        face_flattened = resize_and_flatten(cropped_faces[0])
        X.append(face_flattened)
        y.append(label)
        print("Successfully processed face")
    else:
        print(f"No face detected in image {i+1} for {label}")

if len(X) == 0:
    raise ValueError("No faces were detected in any of the images. Please check your images and face detection parameters.")

X = np.array(X)
y = np.array(y)

print(f"\nPreprocessed {len(X)} faces")
print(f"Final dataset shape: {X.shape}")

# Verify we have enough samples for each class
min_samples_per_class = 2
class_counts = {label: sum(y == label) for label in set(y)}
for label, count in class_counts.items():
    if count < min_samples_per_class:
        raise ValueError(f"Class {label} has only {count} samples. Need at least {min_samples_per_class} samples per class.")

# Split data
print("\nSplitting data into train and test sets...")
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=177, stratify=y)
print(f"Training set size: {len(X_train)}")
print(f"Test set size: {len(X_test)}")

# Custom transformer for mean centering
class MeanCentering(BaseEstimator, TransformerMixin):
    def fit(self, X, y=None):
        self.mean_face = np.mean(X, axis=0)
        return self
    
    def transform(self, X):
        return X - self.mean_face

# Create pipeline
print("\nCreating pipeline...")
pipe = Pipeline([
    ('centering', MeanCentering()),
    ('pca', PCA(svd_solver='randomized', whiten=True, random_state=177)),
    ('svc', SVC(kernel='linear', random_state=177))
])

# Train and evaluate
print("\nTraining model...")
pipe.fit(X_train, y_train)
y_pred = pipe.predict(X_test)
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

# Visualize eigenfaces
print("\nVisualizing eigenfaces...")
n_components = len(pipe[1].components_)
ncol = 4
nrow = (n_components + ncol - 1) // ncol
fig, axes = plt.subplots(nrow, ncol, figsize=(10, 2.5*nrow), subplot_kw={'xticks':[], 'yticks':[]})
eigenfaces = pipe[1].components_.reshape((n_components, X_train.shape[1]))
for i, ax in enumerate(axes.flat):
    ax.imshow(eigenfaces[i].reshape(face_size), cmap='gray')
    ax.set_title(f'Eigenface {i+1}')
plt.tight_layout()
plt.show()

# Save pipeline
print("\nSaving model...")
with open('eigenface_pipeline.pkl', 'wb') as f:
    pickle.dump(pipe, f)
print("Model saved successfully!")

def get_eigenface_score(X):
    X_pca = pipe[:2].transform(X)
    eigenface_scores = np.max(pipe[2].decision_function(X_pca), axis=1)
    return eigenface_scores

def eigenface_prediction(image_gray):
    faces = detect_faces(image_gray)
    cropped_faces, selected_faces = crop_faces(image_gray, faces)
    if len(cropped_faces) == 0:
        return 'No face detected.'
    
    X_face = []
    for face in cropped_faces:
        face_flattened = resize_and_flatten(face)
        X_face.append(face_flattened)
    
    X_face = np.array(X_face)
    labels = pipe.predict(X_face)
    scores = get_eigenface_score(X_face)
    return scores, labels, selected_faces

def draw_text(image, label, score, font=cv2.FONT_HERSHEY_SIMPLEX, pos=(0, 0),
             font_scale=0.6, font_thickness=2, text_color=(0, 0, 0),
             text_color_bg=(0, 255, 0)):
    x, y = pos
    score_text = f'Score: {score:.2f}'
    (w1, h1), _ = cv2.getTextSize(score_text, font, font_scale, font_thickness)
    (w2, h2), _ = cv2.getTextSize(label, font, font_scale, font_thickness)
    cv2.rectangle(image, (x, y-h1-h2-25), (x + max(w1, w2)+20, y), text_color_bg, -1)
    cv2.putText(image, label, (x+10, y-10), font, font_scale, text_color, font_thickness)
    cv2.putText(image, score_text, (x+10, y-h2-15), font, font_scale, text_color, font_thickness)

def draw_result(image, scores, labels, coords):
    result_image = image.copy()
    for (x, y, w, h), label, score in zip(coords, labels, scores):
        cv2.rectangle(result_image, (x, y), (x + w, y + h), (0, 255, 0), 2)
        draw_text(result_image, label, score, pos=(x, y))
    return result_image

ValueError: With n_samples=0, test_size=0.3 and train_size=None, the resulting train set will be empty. Adjust any of the aforementioned parameters.