# Setup

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms
import torchvision.models as models
from PIL import Image
import os
import numpy as np
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

# Data manipulation

In [None]:
class FaceDataset(Dataset):
    def __init__(self, image_paths, labels, transform=None):
        self.image_paths = image_paths
        self.labels = labels
        self.transform = transform
    
    def __len__(self):
        return len(self.image_paths)
    
    def __getitem__(self, idx):
        image = Image.open(self.image_paths[idx]).convert('RGB')
        label = self.labels[idx]
        
        if self.transform:
            image = self.transform(image)
        
        return image, label

In [None]:
def get_transforms():
    train_transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.RandomHorizontalFlip(p=0.5),
        transforms.RandomRotation(degrees=10),
        transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], 
                           std=[0.229, 0.224, 0.225])
    ])
    
    val_transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], 
                           std=[0.229, 0.224, 0.225])
    ])
    
    return train_transform, val_transform


In [None]:
def load_face_dataset(data_dir):
    """
    Assumes directory structure:
    data_dir/
    ├── person1/
    │   ├── img1.jpg
    │   ├── img2.jpg
    │   └── ...
    ├── person2/
    │   ├── img1.jpg
    │   └── ...
    """
    image_paths = []
    labels = []
    class_names = []
    
    for idx, person_folder in enumerate(sorted(os.listdir(data_dir))):
        person_path = os.path.join(data_dir, person_folder)
        if os.path.isdir(person_path):
            class_names.append(person_folder)
            for img_file in os.listdir(person_path):
                if img_file.lower().endswith(('.png', '.jpg', '.jpeg')):
                    image_paths.append(os.path.join(person_path, img_file))
                    labels.append(idx)
    
    return image_paths, labels, class_names


# Model Help Func

In [None]:
class VGGFace2Model(nn.Module):
    def __init__(self, num_classes, pretrained=True):
        super(VGGFace2Model, self).__init__()
        
        # Load pretrained ResNet-50
        self.backbone = models.resnet50(pretrained=pretrained)
        
        # Remove the final classification layer
        self.backbone = nn.Sequential(*list(self.backbone.children())[:-1])
        
        # Add custom classification head
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Dropout(0.5),
            nn.Linear(2048, 512),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(512, num_classes)
        )
    
    def forward(self, x):
        features = self.backbone(x)
        output = self.classifier(features)
        return output
    
    def get_embeddings(self, x):
        """Extract feature embeddings without classification"""
        features = self.backbone(x)
        embeddings = self.classifier[:-1](features)  # Exclude final linear layer
        return embeddings

In [None]:
def train_model(model, train_loader, val_loader, num_epochs=30, lr=0.001):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model.to(device)
    
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=1e-4)
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)
    
    train_losses = []
    val_accuracies = []
    
    for epoch in range(num_epochs):
        # Training phase
        model.train()
        running_loss = 0.0
        
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item()
        
        # Validation phase
        model.eval()
        correct = 0
        total = 0
        
        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
        
        val_accuracy = 100 * correct / total
        epoch_loss = running_loss / len(train_loader)
        
        train_losses.append(epoch_loss)
        val_accuracies.append(val_accuracy)
        
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}, Val Acc: {val_accuracy:.2f}%')
        
        scheduler.step()
    
    return train_losses, val_accuracies

# Running it all

In [None]:
def verify_faces(model, img1_path, img2_path, threshold=0.6):
    """
    Verify if two face images belong to the same person
    """
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model.eval()
    
    _, val_transform = get_transforms()
    
    # Load and preprocess images
    img1 = Image.open(img1_path).convert('RGB')
    img2 = Image.open(img2_path).convert('RGB')
    
    img1_tensor = val_transform(img1).unsqueeze(0).to(device)
    img2_tensor = val_transform(img2).unsqueeze(0).to(device)
    
    with torch.no_grad():
        # Get embeddings
        emb1 = model.get_embeddings(img1_tensor)
        emb2 = model.get_embeddings(img2_tensor)
        
        # Calculate cosine similarity
        similarity = torch.nn.functional.cosine_similarity(emb1, emb2)
        
        is_same_person = similarity.item() > threshold
        
    return is_same_person, similarity.item()

In [None]:
def main():
    DATA_DIR = "path/to/your/face/dataset"  # Update this path
    BATCH_SIZE = 16
    NUM_EPOCHS = 30
    LEARNING_RATE = 0.001
    
    print("Loading dataset...")
    image_paths, labels, class_names = load_face_dataset(DATA_DIR)
    print(f"Found {len(image_paths)} images across {len(class_names)} people")
    print(f"Classes: {class_names}")

    train_paths, val_paths, train_labels, val_labels = train_test_split(
        image_paths, labels, test_size=0.2, stratify=labels, random_state=42
    )
    
    train_transform, val_transform = get_transforms()
    train_dataset = FaceDataset(train_paths, train_labels, train_transform)
    val_dataset = FaceDataset(val_paths, val_labels, val_transform)
    
    train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)
    num_classes = len(class_names)
    model = VGGFace2Model(num_classes=num_classes)
    
    print(f"Training model with {num_classes} classes...")

    train_losses, val_accuracies = train_model(
        model, train_loader, val_loader, 
        num_epochs=NUM_EPOCHS, lr=LEARNING_RATE
    )
    
    torch.save(model.state_dict(), 'vggface2_finetuned.pth')
    print("Model saved as 'vggface2_finetuned.pth'")
    
    plt.figure(figsize=(12, 4))
    
    plt.subplot(1, 2, 1)
    plt.plot(train_losses)
    plt.title('Training Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    
    plt.subplot(1, 2, 2)
    plt.plot(val_accuracies)
    plt.title('Validation Accuracy')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy (%)')
    
    plt.tight_layout()
    plt.savefig('training_progress.png')
    plt.show()

# Example usage for face verification

In [None]:
"""
# Load trained model
model = VGGFace2Model(num_classes=2)  # Adjust based on your number of people
model.load_state_dict(torch.load('vggface2_finetuned.pth'))

# Verify two faces
is_same, similarity = verify_faces(model, 'path/to/img1.jpg', 'path/to/img2.jpg')
print(f"Same person: {is_same}, Similarity: {similarity:.3f}")
"""