# Face detection and recognition training pipeline

The following example illustrates how to fine-tune an InceptionResnetV1 model on your own dataset. This will mostly follow standard pytorch training patterns.

In [2]:
!pip install facenet_pytorch
!pip install MTCNN
!pip install torch
!pip install -U openmim
!mim install mmcv

Collecting facenet_pytorch
  Downloading facenet_pytorch-2.5.3-py3-none-any.whl (1.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.9/1.9 MB[0m [31m9.2 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: facenet_pytorch
Successfully installed facenet_pytorch-2.5.3
Collecting MTCNN
  Downloading mtcnn-0.1.1-py3-none-any.whl (2.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.3/2.3 MB[0m [31m22.3 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: MTCNN
Successfully installed MTCNN-0.1.1
Collecting openmim
  Downloading openmim-0.3.9-py2.py3-none-any.whl (52 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m52.7/52.7 kB[0m [31m1.4 MB/s[0m eta [36m0:00:00[0m
Collecting colorama (from openmim)
  Downloading colorama-0.4.6-py2.py3-none-any.whl (25 kB)
Collecting model-index (from openmim)
  Downloading model_index-0.1.11-py3-none-any.whl (34 kB)
Collecting opendatalab (from openmim)
  Downloading open

In [3]:
from facenet_pytorch import MTCNN, InceptionResnetV1, fixed_image_standardization, training
import torch
from torch.utils.data import DataLoader, SubsetRandomSampler
from torch import optim
from torch.optim.lr_scheduler import MultiStepLR
from torch.utils.tensorboard import SummaryWriter
from torchvision import datasets, transforms
import numpy as np
import os

#### Define run parameters

The dataset should follow the VGGFace2/ImageNet-style directory layout. Modify `data_dir` to the location of the dataset on wish to finetune on.

In [4]:
!git clone https://github.com/timesler/facenet-pytorch.git facenet_pytorch

Cloning into 'facenet_pytorch'...
remote: Enumerating objects: 1306, done.[K
remote: Counting objects: 100% (57/57), done.[K
remote: Compressing objects: 100% (32/32), done.[K
remote: Total 1306 (delta 25), reused 49 (delta 23), pack-reused 1249[K
Receiving objects: 100% (1306/1306), 22.89 MiB | 30.01 MiB/s, done.
Resolving deltas: 100% (635/635), done.


In [6]:
data_dir = '/content/facenet_pytorch/data/test_images'

batch_size = 32
epochs = 8
workers = 0 if os.name == 'nt' else 8

#### Determine if an nvidia GPU is available

In [7]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print('Running on device: {}'.format(device))

Running on device: cpu


#### Define MTCNN module

See `help(MTCNN)` for more details.

In [8]:
mtcnn = MTCNN(
    image_size=160, margin=0, min_face_size=20,
    thresholds=[0.6, 0.7, 0.7], factor=0.709, post_process=True,
    device=device
)

#### Perfom MTCNN facial detection

Iterate through the DataLoader object and obtain cropped faces.

In [9]:
dataset = datasets.ImageFolder(data_dir, transform=transforms.Resize((512, 512)))
dataset.samples = [
    (p, p.replace(data_dir, data_dir + '_cropped'))
        for p, _ in dataset.samples
]

loader = DataLoader(
    dataset,
    num_workers=workers,
    batch_size=batch_size,
    collate_fn=training.collate_pil
)

for i, (x, y) in enumerate(loader):
    mtcnn(x, save_path=y)
    print('\rBatch {} of {}'.format(i + 1, len(loader)), end='')

# Remove mtcnn to reduce GPU memory usage
del mtcnn



Batch 1 of 1

#### Define Inception Resnet V1 module

See `help(InceptionResnetV1)` for more details.

In [10]:
resnet = InceptionResnetV1(
    classify=True,
    pretrained='vggface2',
    num_classes=len(dataset.class_to_idx)
).to(device)

  0%|          | 0.00/107M [00:00<?, ?B/s]

#### Define optimizer, scheduler, dataset, and dataloader

In [11]:
optimizer = optim.Adam(resnet.parameters(), lr=0.001)
scheduler = MultiStepLR(optimizer, [5, 10])

trans = transforms.Compose([
    np.float32,
    transforms.ToTensor(),
    fixed_image_standardization
])
dataset = datasets.ImageFolder(data_dir + '_cropped', transform=trans)
img_inds = np.arange(len(dataset))
np.random.shuffle(img_inds)
train_inds = img_inds[:int(0.8 * len(img_inds))]
val_inds = img_inds[int(0.8 * len(img_inds)):]

train_loader = DataLoader(
    dataset,
    num_workers=workers,
    batch_size=batch_size,
    sampler=SubsetRandomSampler(train_inds)
)
val_loader = DataLoader(
    dataset,
    num_workers=workers,
    batch_size=batch_size,
    sampler=SubsetRandomSampler(val_inds)
)

#### Define loss and evaluation functions

In [12]:
loss_fn = torch.nn.CrossEntropyLoss()
metrics = {
    'fps': training.BatchTimer(),
    'acc': training.accuracy
}

#### Train model

In [14]:
writer = SummaryWriter()
writer.iteration, writer.interval = 0, 10

print('\n\nInitial')
print('-' * 10)
resnet.eval()
training.pass_epoch(
    resnet, loss_fn, val_loader,
    batch_metrics=metrics, show_running=True, device=device,
    writer=writer
)

for epoch in range(epochs):
    print('\nEpoch {}/{}'.format(epoch + 1, epochs))
    print('-' * 10)

    resnet.train()
    training.pass_epoch(
        resnet, loss_fn, train_loader, optimizer, scheduler,
        batch_metrics=metrics, show_running=True, device=device,
        writer=writer
    )

    resnet.eval()
    training.pass_epoch(
        resnet, loss_fn, val_loader,
        batch_metrics=metrics, show_running=True, device=device,
        writer=writer
    )

writer.close()



Initial
----------
Valid |     1/1    | loss:    1.9901 | fps:    0.0356 | acc:    0.0000   

Epoch 1/8
----------
Train |     1/1    | loss:    0.0023 | fps:    2.5355 | acc:    1.0000   
Valid |     1/1    | loss:    2.0231 | fps:    2.2911 | acc:    0.0000   

Epoch 2/8
----------
Train |     1/1    | loss:    0.0011 | fps:    2.4966 | acc:    1.0000   
Valid |     1/1    | loss:    2.0460 | fps:    2.0951 | acc:    0.0000   

Epoch 3/8
----------
Train |     1/1    | loss:    0.0011 | fps:    2.4918 | acc:    1.0000   
Valid |     1/1    | loss:    2.0590 | fps:    2.2615 | acc:    0.0000   

Epoch 4/8
----------
Train |     1/1    | loss:    0.0011 | fps:    2.5020 | acc:    1.0000   
Valid |     1/1    | loss:    2.0401 | fps:    2.1913 | acc:    0.0000   

Epoch 5/8
----------
Train |     1/1    | loss:    0.0009 | fps:    1.9144 | acc:    1.0000   
Valid |     1/1    | loss:    2.0145 | fps:    1.3881 | acc:    0.0000   

Epoch 6/8
----------
Train |     1/1    | loss:    0.0