In [1]:
from google.colab import drive
drive.mount('/content/drive/', force_remount=True)
drive_dir = '/content/drive/MyDrive'

Mounted at /content/drive/


In [2]:
!pip install facenet-pytorch

Collecting facenet-pytorch
  Downloading facenet_pytorch-2.5.3-py3-none-any.whl (1.9 MB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/1.9 MB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.4/1.9 MB[0m [31m11.8 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.9/1.9 MB[0m [31m34.6 MB/s[0m eta [36m0:00:00[0m
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch==2.2.1->torchvision->facenet-pytorch)
  Downloading nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m23.7/23.7 MB[0m [31m67.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting nvidia-cuda-runtime-cu12==12.1.105 (from torch==2.2.1->torchvision->facenet-pytorch)
  Downloading nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [None]:
!ls /content/drive/MyDrive/
!mkdir /content/data

In [4]:
!!unrar x /content/drive/MyDrive/all_images.rar /content/data -idq

[]

In [5]:
%matplotlib inline

import cv2
import math
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.gridspec import GridSpec
import pickle
import random
import json
import time
import os

SEED = 42

import torch
import torchvision.transforms.functional as F
import torch.nn as nn
import torch.optim as optim
import torchvision
from torch.optim.lr_scheduler import OneCycleLR
import torchvision.transforms as transforms
from torchvision.models import resnet50, ResNet50_Weights

# Set random seed for PyTorch
torch.manual_seed(SEED)
if torch.cuda.is_available():
    torch.cuda.manual_seed(SEED)

# Set random seed for NumPy
np.random.seed(SEED)
random.seed(42)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

base_dir = "/content/data/all_images"

In [6]:
from torch.utils.data import Dataset
from PIL import Image

class AgeDataset(Dataset):
    # image_paths: list of image paths as strings
    # labels: list of labels as integers
    # resize: (channels, width, height) new image shape
    def __init__(self, image_paths, labels, resize=None, augmentations=None):
        self.num_samples = len(image_paths)
        self.image_paths = image_paths
        self.transform = augmentations
        self.labels = labels

        self.resize = resize
        if resize is not None:
            resize = list(resize)
            self.resize = [resize[x] for x in (1,2,0)]


    def __len__(self):
        return self.num_samples


    def __getitem__(self, item):
        image = Image.open(self.image_paths[item])
        image = image.convert("RGB")
        label = self.labels[item]

        if self.resize is not None:
            image = image.resize(
                (self.resize[1], self.resize[0]), resample=Image.BILINEAR
            )

        if self.transform:
            image = self.transform(image)
        else:
            image = np.array(image)

            # transpose from 32x32x3 to 3x32x32
            image = np.transpose(image, (2, 0, 1)).astype(np.float32)
            # image = image.permute()

        return image, label

In [7]:
with open('/content/drive/MyDrive/age_intervals_binary.json') as f:
  current_config = json.load(f)

classes = [str(x) for x in current_config.values()]

classes

['[6, 17]', '[18, 120]']

In [8]:
with open('/content/drive/MyDrive/initial_splits_binary.json') as f:
    initial_splits = json.load(f)

with open('/content/drive/MyDrive/dataframe.pkl', 'rb') as f:
    df = pickle.load(f)

df = df.drop(columns=['hog_features'])
print(df)

                              name  age
0                       000002.jpg   80
1                       000003.jpg   50
2                       000004.jpg   17
3                       000005.jpg   27
4                       000006.jpg   24
...                            ...  ...
26846  9_1_0_20170110224621441.jpg    9
26847  9_1_0_20170117172655681.jpg    9
26848  9_1_0_20170117180006484.jpg    9
26849  9_1_1_20170109201837354.jpg    9
26850  9_1_1_20170117105556810.jpg    9

[26851 rows x 2 columns]


In [9]:
from sklearn.model_selection import train_test_split

def create_label_dict(config):
    label_dict = {}
    for label, age_interval in config.items():
        age_interval = list(age_interval)
        age_interval[1] += 1
        for age in range(*age_interval):
            label_dict[age] = int(label)

    return label_dict

age_to_label = create_label_dict(current_config)
print(age_to_label)

{6: 0, 7: 0, 8: 0, 9: 0, 10: 0, 11: 0, 12: 0, 13: 0, 14: 0, 15: 0, 16: 0, 17: 0, 18: 1, 19: 1, 20: 1, 21: 1, 22: 1, 23: 1, 24: 1, 25: 1, 26: 1, 27: 1, 28: 1, 29: 1, 30: 1, 31: 1, 32: 1, 33: 1, 34: 1, 35: 1, 36: 1, 37: 1, 38: 1, 39: 1, 40: 1, 41: 1, 42: 1, 43: 1, 44: 1, 45: 1, 46: 1, 47: 1, 48: 1, 49: 1, 50: 1, 51: 1, 52: 1, 53: 1, 54: 1, 55: 1, 56: 1, 57: 1, 58: 1, 59: 1, 60: 1, 61: 1, 62: 1, 63: 1, 64: 1, 65: 1, 66: 1, 67: 1, 68: 1, 69: 1, 70: 1, 71: 1, 72: 1, 73: 1, 74: 1, 75: 1, 76: 1, 77: 1, 78: 1, 79: 1, 80: 1, 81: 1, 82: 1, 83: 1, 84: 1, 85: 1, 86: 1, 87: 1, 88: 1, 89: 1, 90: 1, 91: 1, 92: 1, 93: 1, 94: 1, 95: 1, 96: 1, 97: 1, 98: 1, 99: 1, 100: 1, 101: 1, 102: 1, 103: 1, 104: 1, 105: 1, 106: 1, 107: 1, 108: 1, 109: 1, 110: 1, 111: 1, 112: 1, 113: 1, 114: 1, 115: 1, 116: 1, 117: 1, 118: 1, 119: 1, 120: 1}


In [10]:
train_indices = initial_splits['train']
test_indices = initial_splits['test']

X_train = [os.path.join(base_dir, df['name'][idx]) for idx in train_indices]
y_train = [age_to_label[df['age'][idx]] for idx in train_indices]
X_test = [os.path.join(base_dir, df['name'][idx]) for idx in test_indices]
y_test = [age_to_label[df['age'][idx]] for idx in test_indices]

In [11]:
class RandomBoundingBoxJitter:
    def __init__(self, magnitude=0.45):
        self.magnitude = magnitude

    def __call__(self, img):
        width, height = img.size
        x_min = int(random.uniform(-self.magnitude, self.magnitude) * width)
        y_min = int(random.uniform(-self.magnitude, self.magnitude) * height)
        x_max = width + int(random.uniform(-self.magnitude, self.magnitude) * width)
        y_max = height + int(random.uniform(-self.magnitude, self.magnitude) * height)

        # Ensure the bounding box doesn't go out of bounds
        x_min = max(0, x_min)
        y_min = max(0, y_min)
        x_max = min(width, x_max)
        y_max = min(height, y_max)

        # Apply the bounding box jitter
        img = F.crop(img, y_min, x_min, y_max - y_min, x_max - x_min)
        img = F.resize(img, (height, width))  # Resize back to original size

        return img

In [12]:
class EarlyStopping:
    def __init__(self, patience=5, delta=0, path='checkpoint.pt'):
        self.patience = patience
        self.delta = delta
        self.path = path
        self.counter = 0
        self.best_score = None
        self.early_stop = False

    def __call__(self, val_loss, model, epoch):
        if self.best_score is None:
            self.best_score = val_loss
            self.save_checkpoint(val_loss, model, epoch)
        elif val_loss > self.best_score + self.delta:
            self.counter += 1
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = val_loss
            self.save_checkpoint(val_loss, model, epoch)
            self.counter = 0

    def save_checkpoint(self, val_loss, model, epoch):
        state = {
            'net': model.state_dict(),
            'epoch': epoch,
        }
        torch.save(state, self.path)
        self.best_score = val_loss

In [13]:
root_path = os.path.join(drive_dir, "architectures", 'FaceNet_binary')
os.makedirs(root_path, exist_ok=True)
os.makedirs(os.path.join(root_path, 'intermediate_checkpoints'), exist_ok=True)
print(root_path)
IMAGE_SHAPE = (3, 160, 160)

/content/drive/MyDrive/architectures/FaceNet_binary


In [14]:
from facenet_pytorch import MTCNN, InceptionResnetV1, fixed_image_standardization, training
batch_size = 128
max_learning_rate = 0.001
weight_decay = 1e-4
epochs = 50

transform_train = transforms.Compose([
    transforms.RandAugment(2, 22),
    RandomBoundingBoxJitter(0.45),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.ToTensor(),
    transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
])

# Normalize the test set same as training set without augmentation
transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
])

trainset = AgeDataset(
    image_paths=X_train,
    labels=y_train,
    resize=IMAGE_SHAPE,
    augmentations=transform_train,
)

testset = AgeDataset(
    image_paths=X_test,
    labels=y_test,
    resize=IMAGE_SHAPE,
    augmentations=transform_test,
)

# dataloaders - creating batches and shuffling the data
trainloader = torch.utils.data.DataLoader(
    trainset, batch_size=batch_size, shuffle=True, num_workers=2)
testloader = torch.utils.data.DataLoader(
    testset, batch_size=batch_size, shuffle=False, num_workers=2)

# device - cpu or gpu?
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# loss criterion
criterion = nn.CrossEntropyLoss()

# model
model = InceptionResnetV1(
    classify=True,
    pretrained='vggface2',
    num_classes=len(classes)).to(device)

# optimizer and scheduler
optimizer = torch.optim.Adam(model.parameters(), lr=max_learning_rate)
scheduler = OneCycleLR(optimizer, max_lr=max_learning_rate, epochs=epochs, steps_per_epoch=len(trainloader))


  0%|          | 0.00/107M [00:00<?, ?B/s]

In [15]:
def calculate_accuracy(model, dataloader, device):
    model.eval() # put in evaluation mode
    total_correct = 0
    total_images = 0
    confusion_matrix = np.zeros([len(classes),len(classes)], int)
    with torch.no_grad():
        for data in dataloader:
            images, labels = data
            images = images.to(device)
            labels = labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            # import pdb; pdb.set_trace()
            total_images += labels.size(0)
            total_correct += (predicted == labels).sum().item()
            for i, l in enumerate(labels):
                confusion_matrix[l.item(), predicted[i].item()] += 1

    model_accuracy = total_correct / total_images * 100
    return model_accuracy, confusion_matrix

In [None]:
best_acc = 0
counter = 0

In [None]:
# training loop
loss_list = []
train_acc_list = []
test_acc_list = []

for epoch in range(1, epochs + 1):
    model.train()  # put in training mode
    running_loss = 0.0
    epoch_time = time.time()
    for i, data in enumerate(trainloader, 0):
        # get the inputs
        inputs, labels = data
        # send them to device
        inputs = inputs.to(device)
        labels = labels.to(device)

        # forward + backward + optimize
        outputs = model(inputs)  # forward pass
        loss = criterion(outputs, labels)  # calculate the loss
        # always the same 3 steps
        optimizer.zero_grad()  # zero the parameter gradients
        loss.backward()  # backpropagation
        optimizer.step()  # update parameters

        # print statistics
        running_loss += loss.data.item()

    # Normalizing the loss by the total number of train batches
    running_loss /= len(trainloader)

    # Calculate training/test set accuracy of the existing model
    train_accuracy, _ = calculate_accuracy(model, trainloader, device)
    test_accuracy, _ = calculate_accuracy(model, testloader, device)

    # append the losses
    loss_list.append(running_loss)
    train_acc_list.append(train_accuracy)
    test_acc_list.append(test_accuracy)

    # modify learning rate
    scheduler.step()

    # save every 5 models
    # we comment because the model stopped before 20 epochs, so need to train for full 50
    #if epoch % 5 == 0:
     # print('==> Saving model ...')
      #state = {
       #   'net': model.state_dict(),
        #  'epoch': epoch,
      #}

      #torch.save(state, os.path.join(root_path, 'intermediate_checkpoints', f'model_epoch_{epoch}.pth'))

    # compute validation loss to check if we should stop
    model.eval()  # switch to evaluation mode
    test_loss = 0.0
    total_test_samples = 0

    with torch.no_grad():  # No need to compute gradients during validation
      for data in testloader:
        inputs, labels = data
        inputs = inputs.to(device)
        labels = labels.to(device)

        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        # Update validation loss and total samples
        test_loss += loss.item() * inputs.size(0)
        total_test_samples += inputs.size(0)

    # Calculate average validation loss
    test_loss /= total_test_samples

    log = "Epoch: {} | Training Loss: {:.4f} | Test Loss: {:.4f} | Training accuracy: {:.3f}% | Test accuracy: {:.3f}% | ".format(epoch, running_loss, test_loss, train_accuracy, test_accuracy)
    epoch_time = time.time() - epoch_time
    log += "Epoch Time: {:.2f} secs".format(epoch_time)
    print(log)

    state = {
        'net': model.state_dict(),
        'epoch': epoch,
    }
    torch.save(state, os.path.join(root_path, f'model_{epoch}.pth'))

    if test_accuracy > best_acc:
      counter = 0
      best_acc = test_accuracy
      torch.save(state, os.path.join(root_path, f'best_model.pth'))
    else:
      counter += 1

    if counter > 5:
      break

print('==> Finished Training ...')

Epoch: 1 | Training Loss: 0.6318 | Test Loss: 0.5477 | Training accuracy: 87.020% | Test accuracy: 90.896% | Epoch Time: 67.39 secs
Epoch: 2 | Training Loss: 0.5307 | Test Loss: 0.5783 | Training accuracy: 90.759% | Test accuracy: 93.018% | Epoch Time: 66.65 secs
Epoch: 3 | Training Loss: 0.4881 | Test Loss: 0.5206 | Training accuracy: 87.821% | Test accuracy: 90.039% | Epoch Time: 66.78 secs
Epoch: 4 | Training Loss: 0.4266 | Test Loss: 0.2200 | Training accuracy: 91.257% | Test accuracy: 94.005% | Epoch Time: 67.67 secs
Epoch: 5 | Training Loss: 0.4219 | Test Loss: 0.2604 | Training accuracy: 90.857% | Test accuracy: 94.042% | Epoch Time: 67.10 secs
Epoch: 6 | Training Loss: 0.3697 | Test Loss: 0.4036 | Training accuracy: 91.876% | Test accuracy: 93.484% | Epoch Time: 66.62 secs
Epoch: 7 | Training Loss: 0.3402 | Test Loss: 0.1650 | Training accuracy: 91.974% | Test accuracy: 94.433% | Epoch Time: 66.74 secs
Epoch: 8 | Training Loss: 0.2940 | Test Loss: 0.1818 | Training accuracy: 91

In [16]:
best_model = InceptionResnetV1(
    classify=True,
    pretrained='vggface2',
    num_classes=len(classes)).to(device)

# Load the saved model state dictionary
checkpoint = torch.load(os.path.join(root_path, 'best_model.pth'))

# Load the model state dictionary
best_model.load_state_dict(checkpoint['net'])
best_model.to(device)

transform_testing_purposes = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
])

trainset = AgeDataset(
    image_paths=X_train,
    labels=y_train,
    resize=IMAGE_SHAPE,
    augmentations=transform_testing_purposes,
)

testset = AgeDataset(
    image_paths=X_test,
    labels=y_test,
    resize=IMAGE_SHAPE,
    augmentations=transform_testing_purposes,
)

# Create a DataLoader for the training set without data augmentation
trainloader_no_aug = torch.utils.data.DataLoader(
    trainset, batch_size=batch_size, shuffle=False, num_workers=2)
testloader_for_testing = torch.utils.data.DataLoader(
    testset, batch_size=batch_size, shuffle=False, num_workers=2)

no_aug_train_acc, confusion_matrix_train = calculate_accuracy(best_model, trainloader_no_aug, device)
test_acc, confusion_matrix_test = calculate_accuracy(best_model, testloader_for_testing, device)

print(f"Accuracy on Training Set: {no_aug_train_acc}%")
print(f"Accuracy on Test Set: {test_acc}%")

Accuracy on Training Set: 97.38826815642459%
Accuracy on Test Set: 96.03425805250419%


In [18]:
from sklearn.metrics import classification_report

# Create a DataLoader for the training set without data augmentation
trainloader_no_aug = torch.utils.data.DataLoader(
    trainset, batch_size=batch_size, shuffle=False, num_workers=2)
testloader_for_testing = torch.utils.data.DataLoader(
    testset, batch_size=batch_size, shuffle=False, num_workers=2)

# Function to get predictions from dataloader
def get_predictions(model, dataloader, device):
    model.eval()
    predictions = []
    true_labels = []

    with torch.no_grad():
        for inputs, labels in dataloader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            predictions.extend(preds.cpu().numpy())
            true_labels.extend(labels.cpu().numpy())

    return predictions, true_labels

# Get predictions for the training and test sets
train_predictions, train_true_labels = get_predictions(best_model, trainloader_no_aug, device)
test_predictions, test_true_labels = get_predictions(best_model, testloader_for_testing, device)

# Generate classification report
print("Classification Report for Training Set:")
print(classification_report(train_true_labels, train_predictions, digits=4))

print("Classification Report for Test Set:")
print(classification_report(test_true_labels, test_predictions, digits=4))

Classification Report for Training Set:
              precision    recall  f1-score   support

           0     0.9461    0.7959    0.8645      2249
           1     0.9766    0.9947    0.9855     19231

    accuracy                         0.9739     21480
   macro avg     0.9613    0.8953    0.9250     21480
weighted avg     0.9734    0.9739    0.9729     21480

Classification Report for Test Set:
              precision    recall  f1-score   support

           0     0.8975    0.7011    0.7872       562
           1     0.9659    0.9906    0.9781      4809

    accuracy                         0.9603      5371
   macro avg     0.9317    0.8459    0.8827      5371
weighted avg     0.9588    0.9603    0.9582      5371

