<a href="https://colab.research.google.com/github/Sabastain-Wakoyi/Wakoyi-Tolulope/blob/main/Sabastain_Tolulope.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install torchvision



In [None]:
!pip install scikit-learn



In [None]:
import torch
import torch.nn as nn
from torchvision import models, transforms, datasets
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import normalize
from sklearn.svm import SVC
import numpy as np
from collections import Counter

In [None]:
#Load pre-trained ResNet model
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
resnet = models.resnet50(pretrained=True)
resnet.fc = nn.Identity()  # Removed the final fully connected layer to get embeddings
resnet = resnet.eval().to(device)

#Prepare the LFW Dataset with sufficient samples per class
min_faces_per_person = 20  # Ensure at least 20 images per person
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # ResNet requires 224x224 input
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(15),  # Add slight rotation
    transforms.ColorJitter(brightness=0.2, contrast=0.2),  # Vary brightness/contrast
    transforms.ToTensor(),
    # Change to single channel normalization:
    transforms.Lambda(lambda x: x.repeat(3, 1, 1)), # Repeat greyscale across 3 channels
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # ImageNet normalization
])

# Used fetch_lfw_people instead of LFWPeople to apply min_faces_per_person
from sklearn.datasets import fetch_lfw_people # Import fetch_lfw_people from sklearn.datasets

lfw_people = fetch_lfw_people(min_faces_per_person=min_faces_per_person, resize=0.4,
                              data_home='./', download_if_missing=True)

# Created a custom dataset using the data and target from fetch_lfw_people
class CustomDataset(torch.utils.data.Dataset):
    def __init__(self, images, targets, transform=None):
        self.images = images
        self.targets = targets
        self.transform = transform

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        image = self.images[idx]
        target = self.targets[idx]

        # Convert the image to PIL Image if it is a NumPy array
        if isinstance(image, np.ndarray):
            image = transforms.ToPILImage()(image)

        if self.transform:
            image = self.transform(image)
        return image, target

lfw_dataset = CustomDataset(lfw_people.images, lfw_people.target, transform=transform)

# Check dataset distribution
label_counts = Counter(lfw_dataset.targets)
print(f"Number of people: {len(label_counts)}")
print(f"Min faces per person: {min(label_counts.values())}")
print(f"Max faces per person: {max(label_counts.values())}")
print(f"Average faces per person: {np.mean(list(label_counts.values())):.2f}")


# Extract images and labels
X = []
y = []
for idx, (img, label) in enumerate(lfw_dataset):
    X.append(img)
    y.append(label)

X = torch.stack(X)
y = np.array(y)

#Split dataset into train and test sets with stratification
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.25, stratify=y, random_state=42
)

#Extracted embeddings using ResNet
def extract_embeddings(images):
    embeddings = []
    with torch.no_grad():
        for img in images:
            img = img.unsqueeze(0).to(device)  # Add batch dimension
            embedding = resnet(img).cpu().numpy().flatten()  # Extract embedding
            embeddings.append(embedding)
    return np.array(embeddings)

X_train_embeddings = extract_embeddings(X_train)
X_test_embeddings = extract_embeddings(X_test)

#Normalize the embeddings
X_train_embeddings = normalize(X_train_embeddings)
X_test_embeddings = normalize(X_test_embeddings)

#Train the classifier with hyperparameter tuning
param_grid = {
    'C': [0.1, 1, 10, 100],
    'gamma': ['scale', 'auto', 0.01],
    'kernel': ['rbf']
}

grid = GridSearchCV(
    SVC(class_weight='balanced', probability=True, random_state=42),
    param_grid,
    cv=5,
    scoring='accuracy',
    n_jobs=-1
)

grid.fit(X_train_embeddings, y_train)

clf = grid.best_estimator_

#Evaluatin the model using cross-validation
cv_scores = cross_val_score(clf, X_train_embeddings, y_train, cv=5, scoring='accuracy')
print(f"Cross-validated accuracy: {np.mean(cv_scores):.4f} ± {np.std(cv_scores):.4f}")

#Make predictions on the test set
y_pred = clf.predict(X_test_embeddings)

#Calculate and display metrics
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.4f}")

print("\nDetailed classification report:")
# Access classes from the original lfw_people object
print(classification_report(y_test, y_pred, target_names=lfw_people.target_names))



Number of people: 62
Min faces per person: 20
Max faces per person: 530
Average faces per person: 48.76
Cross-validated accuracy: 0.4879 ± 0.0205
Accuracy: 0.4934

Detailed classification report:
                           precision    recall  f1-score   support

         Alejandro Toledo       0.42      0.50      0.45        10
             Alvaro Uribe       0.62      0.56      0.59         9
          Amelie Mauresmo       0.50      0.60      0.55         5
             Andre Agassi       1.00      0.56      0.71         9
           Angelina Jolie       0.00      0.00      0.00         5
             Ariel Sharon       0.56      0.79      0.65        19
    Arnold Schwarzenegger       0.33      0.27      0.30        11
     Atal Bihari Vajpayee       0.33      0.33      0.33         6
             Bill Clinton       0.00      0.00      0.00         7
             Carlos Menem       0.00      0.00      0.00         5
             Colin Powell       0.56      0.71      0.63        59

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

#Calculate and display general metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')  # Weighted by class support
recall = recall_score(y_test, y_pred, average='weighted')  # Weighted by class support
f1 = f1_score(y_test, y_pred, average='weighted')  # Weighted by class support

print(f"Overall Metrics:")
print(f"Accuracy: {accuracy:.2f}")
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1 Score: {f1:.2f}")


Overall Metrics:
Accuracy: 0.49
Precision: 0.48
Recall: 0.49
F1 Score: 0.46


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
