# Face detection and recognition training pipeline

The following example illustrates how to fine-tune an InceptionResnetV1 model on your own dataset. This will mostly follow standard pytorch training patterns.

In [1]:
from facenet_pytorch import MTCNN, InceptionResnetV1, fixed_image_standardization, training
import torch
from torch.utils.data import DataLoader, SubsetRandomSampler
from torch import optim
from torch.optim.lr_scheduler import MultiStepLR
from torch.utils.tensorboard import SummaryWriter
from torchvision import datasets, transforms
import numpy as np
import os

  from .autonotebook import tqdm as notebook_tqdm


#### Define run parameters

The dataset should follow the VGGFace2/ImageNet-style directory layout. Modify `data_dir` to the location of the dataset on wish to finetune on.

In [2]:
data_dir = '../hard_images_copy'

batch_size = 32
epochs = 8
workers = 0 if os.name == 'nt' else 4 # 8

#### Determine if an nvidia GPU is available

In [3]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print('Running on device: {}'.format(device))

Running on device: cuda:0


#### Define MTCNN module

See `help(MTCNN)` for more details.

In [8]:
mtcnn = MTCNN(
    image_size=160, margin=0, min_face_size=20,
    thresholds=[0.6, 0.7, 0.7], factor=0.709, post_process=True,
    device=device
)

#### Perfom MTCNN facial detection

Iterate through the DataLoader object and obtain cropped faces.

In [4]:
%load_ext autoreload
%autoreload 2

In [4]:
dataset = datasets.ImageFolder(data_dir, transform=transforms.Resize((512, 512)))
dataset.idx_to_class = {i:c for c, i in dataset.class_to_idx.items()}
dataset.idx_to_class

{0: 'Ayumi_Hamasaki',
 1: 'HK_Source',
 2: 'Haruka_Imai',
 3: 'Haruna_Kawaguchi',
 4: 'Other_Source',
 5: 'Yui_Aragaki'}

In [7]:
dataset.samples = [
    (p, p.replace(data_dir, data_dir + '_cropped'))
        for p, _ in dataset.samples
]

# loader = DataLoader(
#     dataset,
#     num_workers=workers,
#     batch_size=batch_size,
#     collate_fn=training.collate_pil
# )
loader = DataLoader(dataset, collate_fn=training.collate_pil, num_workers=workers)

# x: image object
# y: save path of corresponding object
for i, (x, y) in enumerate(loader):
    mtcnn(x, save_path=y)
    print('\rBatch {} of {}'.format(i + 1, len(loader)), end='')
    
# Remove mtcnn to reduce GPU memory usage
del mtcnn

Batch 1475 of 1475

#### Define Inception Resnet V1 module

See `help(InceptionResnetV1)` for more details.

In [7]:
# set random seed for reproducibility
np.random.seed(0)
resnet = InceptionResnetV1(
    classify=True,
    pretrained='casia-webface',
    num_classes=len(dataset.class_to_idx)
).to(device)

#### Define optimizer, scheduler, dataset, and dataloader

In [8]:
optimizer = optim.Adam(resnet.parameters(), lr=0.001)
scheduler = MultiStepLR(optimizer, [5, 10])

trans = transforms.Compose([
    np.float32,
    transforms.ToTensor(),
    fixed_image_standardization
])
train_data_dir = '../base_images'
val_data_dir = '../test_images'
train_dataset = datasets.ImageFolder(train_data_dir, transform=trans)
val_dataset = datasets.ImageFolder(val_data_dir, transform=trans)
train_loader = DataLoader(
    dataset,
    num_workers=workers,
    batch_size=batch_size,
)
val_loader = DataLoader(
    dataset,
    num_workers=workers,
    batch_size=batch_size,
)

# freeze all layers except the final classification layer
for name, param in resnet.named_parameters():
    if name not in ['logits.weight', 'logits.bias']:
        param.requires_grad = False
        


#### Define loss and evaluation functions

In [11]:
import torch
from torchmetrics import F1Score
from torchmetrics.functional.classification import binary_f1_score

def fscore(preds, target):
    """
    Custom F-score metric function
    """
    fscore = F1Score(num_classes=2)  # Replace num_classes with the actual number of classes
    return fscore(preds, target)

loss_fn = torch.nn.CrossEntropyLoss()
metrics = {
    'fps': training.BatchTimer(),
    'acc': training.accuracy,
    'fscore': binary_f1_score
}

#### Train model

In [12]:
writer = SummaryWriter()
writer.iteration, writer.interval = 0, 10

print('\n\nInitial')
print('-' * 10)
resnet.eval()
training.pass_epoch(
    resnet, loss_fn, val_loader,
    batch_metrics=metrics, show_running=True, device=device,
    writer=writer
)

for epoch in range(epochs):
    print('\nEpoch {}/{}'.format(epoch + 1, epochs))
    print('-' * 10)

    resnet.train()
    training.pass_epoch(
        resnet, loss_fn, train_loader, optimizer, scheduler,
        batch_metrics=metrics, show_running=True, device=device,
        writer=writer
    )

    resnet.eval()
    training.pass_epoch(
        resnet, loss_fn, val_loader,
        batch_metrics=metrics, show_running=True, device=device,
        writer=writer
    )

writer.close()



Initial
----------


RuntimeError: Predictions and targets are expected to have the same shape, but got torch.Size([32, 6]) and torch.Size([32]).