In [None]:
# Import necessary libraries
import kagglehub
import numpy as np
import matplotlib.pyplot as plt
import cv2
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from keras.utils import to_categorical
from tensorflow.keras.preprocessing.image import img_to_array

In [None]:
# Download the dataset
path = kagglehub.dataset_download("atulanandjha/lfwpeople")
print("Path to dataset files:", path)

Downloading from https://www.kaggle.com/api/v1/datasets/download/atulanandjha/lfwpeople?dataset_version_number=3...


100%|██████████| 232M/232M [00:02<00:00, 116MB/s]

Extracting files...





Path to dataset files: /root/.cache/kagglehub/datasets/atulanandjha/lfwpeople/versions/3


In [None]:
import os

# List the contents of the main path directory
print("Contents of main dataset path:")
for item in os.listdir(path):
    item_path = os.path.join(path, item)
    if os.path.isdir(item_path):
        print(f"Directory: {item}")
    else:
        print(f"File: {item}")


Contents of main dataset path:
File: pairsDevTest.txt
Directory: lfw_funneled
File: pairsDevTrain.txt
File: pairs.txt
File: lfw-funneled.tgz


In [None]:
# Verify the structure within the lfw_funneled directory
verify_directory_structure(os.path.join(path, "lfw_funneled"))


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Found label directory: Harrison_Ford
  Images found in Harrison_Ford: 12
Found label directory: Lazaro_Castro
  Images found in Lazaro_Castro: 1
Found label directory: Daniel_Chin
  Images found in Daniel_Chin: 1
Found label directory: Tex_Ritter
  Images found in Tex_Ritter: 1
Found label directory: Paul-Henri_Mathieu
  Images found in Paul-Henri_Mathieu: 3
Found label directory: Brandon_Robinson
  Images found in Brandon_Robinson: 1
Found label directory: Bing_Crosby
  Images found in Bing_Crosby: 1
Found label directory: Sandra_Banning
  Images found in Sandra_Banning: 1
Found label directory: Baz_Luhrmann
  Images found in Baz_Luhrmann: 1
Found label directory: Shania_Twain
  Images found in Shania_Twain: 1
Found label directory: Nicole_Kidman
  Images found in Nicole_Kidman: 19
Found label directory: Leszek_Miller
  Images found in Leszek_Miller: 3
Found label directory: William_Hochul
  Images found in William_Hochu

In [None]:
# Define paths and parameters
image_size = (64, 64)  # Resize images to 64x64 pixels
base_path = os.path.join(path, "lfw_funneled")

In [None]:
# Load images and labels
def load_images_and_labels(base_folder):
    images = []
    labels = []
    for subdir in os.listdir(base_folder):
        subpath = os.path.join(base_folder, subdir)
        if os.path.isdir(subpath):
            for filename in os.listdir(subpath):
                img_path = os.path.join(subpath, filename)
                img = cv2.imread(img_path)
                if img is not None:
                    img = cv2.resize(img, image_size)  # Resize image
                    img = img_to_array(img) / 255.0  # Normalize pixel values
                    images.append(img)
                    labels.append(subdir)  # Use directory name as label
    return np.array(images), labels

In [None]:
# Load dataset
images, labels = load_images_and_labels(base_path)

In [None]:
# Encode labels
le = LabelEncoder()
labels_encoded = le.fit_transform(labels)
labels_encoded = to_categorical(labels_encoded)

In [None]:
# Split dataset into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(images, labels_encoded, test_size=0.2, random_state=42)


In [None]:
# Build CNN model
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(64, 64, 3)),
    MaxPooling2D(2, 2),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D(2, 2),
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D(2, 2),
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(len(le.classes_), activation='softmax')
])

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [None]:
# Train the model
history = model.fit(X_train, y_train, epochs=2, validation_data=(X_val, y_val), batch_size=32)


Epoch 1/2
[1m331/331[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m90s[0m 271ms/step - accuracy: 0.0391 - loss: 7.8164 - val_accuracy: 0.0416 - val_loss: 8.0626
Epoch 2/2
[1m331/331[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m137s[0m 257ms/step - accuracy: 0.0375 - loss: 7.7554 - val_accuracy: 0.0416 - val_loss: 8.1039


In [None]:
# Evaluate model
print("Validation accuracy:", model.evaluate(X_val, y_val)[1])


[1m83/83[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 84ms/step - accuracy: 0.0491 - loss: 8.0430
Validation accuracy: 0.041556477546691895


In [None]:
# Apply Dimensionality Reduction using PCA
from sklearn.decomposition import PCA

# Flatten images for PCA
X_train_flat = X_train.reshape(len(X_train), -1)
X_val_flat = X_val.reshape(len(X_val), -1)

In [None]:
# Apply PCA to reduce dimensions to 100 components
pca = PCA(n_components=100)
X_train_pca = pca.fit_transform(X_train_flat)
X_val_pca = pca.transform(X_val_flat)

In [None]:
# Convert back to 64x64 (or adapt CNN to accept flattened input)
X_train_pca = X_train_pca.reshape(-1, 10, 10, 1)
X_val_pca = X_val_pca.reshape(-1, 10, 10, 1)

In [None]:
# Re-train model on reduced data (update input shape to 10x10x1 if required)
model_reduced = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(10, 10, 1)),
    MaxPooling2D(2, 2),
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(len(le.classes_), activation='softmax')
])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [None]:
model_reduced.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
history_reduced = model_reduced.fit(X_train_pca, y_train, epochs=2, validation_data=(X_val_pca, y_val), batch_size=32)


Epoch 1/2
[1m331/331[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 19ms/step - accuracy: 0.0318 - loss: 8.2968 - val_accuracy: 0.0416 - val_loss: 8.0135
Epoch 2/2
[1m331/331[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 23ms/step - accuracy: 0.0324 - loss: 7.8246 - val_accuracy: 0.0416 - val_loss: 8.0926


In [None]:
# Compare accuracy
print("Validation accuracy after dimensionality reduction:", model_reduced.evaluate(X_val_pca, y_val)[1])

[1m83/83[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.0491 - loss: 8.0183
Validation accuracy after dimensionality reduction: 0.041556477546691895
