# Face detection and recognition training pipeline

The following example illustrates how to fine-tune an InceptionResnetV1 model on your own dataset. This will mostly follow standard pytorch training patterns.

In [1]:


from models.mtcnn import MTCNN, fixed_image_standardization
from models.inception_resnet_v1 import  InceptionResnetV1
from models.utils import training
import torch
from torch.utils.data import DataLoader, SubsetRandomSampler
from torch import optim
from torch.optim.lr_scheduler import MultiStepLR
from torch.utils.tensorboard import SummaryWriter
from torchvision import datasets, transforms
import numpy as np
import os

#### Define run parameters

The dataset should follow the VGGFace2/ImageNet-style directory layout. Modify `data_dir` to the location of the dataset on wish to finetune on.

In [2]:
data_dir = './data/test_no_mask'

batch_size = 32
epochs = 8
workers = 0 if os.name == 'nt' else 8

#### Determine if an nvidia GPU is available

In [3]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print('Running on device: {}'.format(device))

Running on device: cuda:0


#### Define MTCNN module

See `help(MTCNN)` for more details.

In [5]:
mtcnn = MTCNN(
    image_size=160, margin=0, min_face_size=20,
    thresholds=[0.6, 0.7, 0.7], factor=0.709, post_process=True,
    device=device
)

#### Perfom MTCNN facial detection

Iterate through the DataLoader object and obtain cropped faces.

In [4]:
dataset = datasets.ImageFolder(data_dir, transform=transforms.Resize((512, 512)))
dataset.samples = [
    (p, p.replace(data_dir, data_dir + '_cropped'))
        for p, _ in dataset.samples
]
        
if os.path.isdir(data_dir + '_cropped') == False:
    print('Generate cropped faces')
    loader = DataLoader(
             dataset,
             num_workers=workers,
             batch_size=batch_size,
             collate_fn=training.collate_pil
    )

    for i, (x, y) in enumerate(loader):
        mtcnn(x, save_path=y)
        print('\rBatch {} of {}'.format(i + 1, len(loader)), end='')
    
    # Remove mtcnn to reduce GPU memory usage
    del mtcnn

#### Define Inception Resnet V1 module

See `help(InceptionResnetV1)` for more details.

In [7]:
resnet = InceptionResnetV1(
    classify=True,
    pretrained='vggface2',
    num_classes=len(dataset.class_to_idx)
).to(device)

#### Define optimizer, scheduler, dataset, and dataloader

In [8]:
optimizer = optim.Adam(resnet.parameters(), lr=0.001)
scheduler = MultiStepLR(optimizer, [5, 10])

trans = transforms.Compose([
    np.float32,
    transforms.ToTensor(),
    fixed_image_standardization
])
dataset = datasets.ImageFolder(data_dir + '_cropped', transform=trans)
img_inds = np.arange(len(dataset))
np.random.shuffle(img_inds)
train_inds = img_inds[:int(0.8 * len(img_inds))]
val_inds = img_inds[int(0.8 * len(img_inds)):int(0.9 * len(img_inds))]
test_inds = img_inds[int(0.9 * len(img_inds)):]

train_loader = DataLoader(
    dataset,
    num_workers=workers,
    batch_size=batch_size,
    sampler=SubsetRandomSampler(train_inds),
    drop_last = True
)
val_loader = DataLoader(
    dataset,
    num_workers=workers,
    batch_size=batch_size,
    sampler=SubsetRandomSampler(val_inds),
    drop_last = True
)
test_loader = DataLoader(
    dataset,
    num_workers=workers,
    batch_size=batch_size,
    sampler=SubsetRandomSampler(test_inds),
    drop_last = True
)

#### Define loss and evaluation functions

In [9]:
loss_fn = torch.nn.CrossEntropyLoss()
metrics = {
    'fps': training.BatchTimer(),
    'acc': training.accuracy
}

#### Train model

In [10]:
writer = SummaryWriter()
writer.iteration, writer.interval = 0, 10

print('\n\nInitial')
print('-' * 10)
resnet.eval()
training.pass_epoch(
    resnet, loss_fn, val_loader,
    batch_metrics=metrics, show_running=True, device=device,
    writer=writer
)

for epoch in range(epochs):
    print('\nEpoch {}/{}'.format(epoch + 1, epochs))
    print('-' * 10)

    resnet.train()
    training.pass_epoch(
        resnet, loss_fn, train_loader, optimizer, scheduler,
        batch_metrics=metrics, show_running=True, device=device,
        writer=writer
    )

    resnet.eval()
    training.pass_epoch(
        resnet, loss_fn, val_loader,
        batch_metrics=metrics, show_running=True, device=device,
        writer=writer
    )

writer.close()



Initial
----------
Valid |    47/47   | loss:    3.9165 | fps:  543.8356 | acc:    0.0140   

Epoch 1/8
----------
Train |   376/376  | loss:    1.2750 | fps:  245.5544 | acc:    0.6459   
Valid |    47/47   | loss:    0.7463 | fps:  540.1214 | acc:    0.7806   

Epoch 2/8
----------
Train |   376/376  | loss:    0.6277 | fps:  238.6343 | acc:    0.8208   
Valid |    47/47   | loss:    0.6507 | fps:  513.1065 | acc:    0.8019   

Epoch 3/8
----------
Train |   376/376  | loss:    0.4017 | fps:  236.4558 | acc:    0.8797   
Valid |    47/47   | loss:    0.6059 | fps:  533.7905 | acc:    0.8265   

Epoch 4/8
----------
Train |   376/376  | loss:    0.3031 | fps:  238.1981 | acc:    0.9086   
Valid |    47/47   | loss:    0.4766 | fps:  551.3198 | acc:    0.8703   

Epoch 5/8
----------
Train |   376/376  | loss:    0.2265 | fps:  236.4282 | acc:    0.9298   
Valid |    47/47   | loss:    0.5196 | fps:  537.7040 | acc:    0.8584   

Epoch 6/8
----------
Train |   376/376  | loss:    0.0

In [12]:
print('\n Test set ')
print('-' * 10)
resnet.eval()
training.pass_epoch(
    resnet, loss_fn, test_loader,
    batch_metrics=metrics, show_running=True, device=device,
    writer=writer
)


 Test set 
----------
Valid |    47/47   | loss:    0.2324 | fps:  537.3851 | acc:    0.9388   


(tensor(0.2324), {'fps': tensor(537.3851), 'acc': tensor(0.9388)})

In [11]:
# Specify a path
PATH = "./data/model_no_mask.pt"

# Save
torch.save(resnet, PATH)

In [17]:
dataset = datasets.ImageFolder('./data/test_mask_cropped', transform=trans)
img_inds = np.arange(len(dataset))
np.random.shuffle(img_inds)
train_inds = img_inds[:int(0.8 * len(img_inds))]
val_inds = img_inds[int(0.8 * len(img_inds)):int(0.9 * len(img_inds))]
test_inds = img_inds[int(0.9 * len(img_inds)):]

train_loader = DataLoader(
    dataset,
    num_workers=workers,
    batch_size=batch_size,
    sampler=SubsetRandomSampler(train_inds),
    drop_last = True
)
val_loader = DataLoader(
    dataset,
    num_workers=workers,
    batch_size=batch_size,
    sampler=SubsetRandomSampler(val_inds),
    drop_last = True
)
test_loader = DataLoader(
    dataset,
    num_workers=workers,
    batch_size=batch_size,
    sampler=SubsetRandomSampler(test_inds),
    drop_last = True
)

In [14]:

print('\n Masked Face Test with model trained without augmentation')
print('-' * 10)
resnet.eval()
training.pass_epoch(
    resnet, loss_fn, test_loader,
    batch_metrics=metrics, show_running=True, device=device,
    writer=writer
)


 Masked Face Test set 
----------
Valid |    82/82   | loss:    0.7180 | fps:  475.3347 | acc:    0.8117   


(tensor(0.7180), {'fps': tensor(475.3347), 'acc': tensor(0.8117)})