# Face detection and recognition training pipeline

The following example illustrates how to fine-tune an InceptionResnetV1 model on your own dataset. This will mostly follow standard pytorch training patterns.

In [2]:
from facenet_pytorch import MTCNN, InceptionResnetV1, fixed_image_standardization, training
import torch
from torch.utils.data import DataLoader, SubsetRandomSampler
from torch import optim
from torch.optim.lr_scheduler import MultiStepLR
from torch.utils.tensorboard import SummaryWriter
from torchvision import datasets, transforms
import numpy as np
import os
import pandas as pd
import glob 
from PIL import Image
import matplotlib.pyplot as plt
from IPython.display import display

#### Define run parameters

The dataset should follow the VGGFace2/ImageNet-style directory layout. Modify `data_dir` to the location of the dataset on wish to finetune on.

In [3]:
data_dir = '/export2/obasit/50024/kaggle_data/train/train_mtcnn'

batch_size = 150
epochs = 7
workers = 0 if os.name == 'nt' else 16

#### Determine if an nvidia GPU is available

In [4]:
device = torch.device('cuda:1' if torch.cuda.is_available() else 'cpu')
# device = torch.device('cpu')
print('Running on device: {}'.format(device))

Running on device: cuda:1


#### Define MTCNN module

See `help(MTCNN)` for more details.

In [29]:
mtcnn = MTCNN(
    image_size=160, margin=40, min_face_size=40,
    thresholds=[0.6, 0.7, 0.7], factor=0.709, post_process=True,
    device=device, 
    selection_method='largest_over_threshold', keep_all=True
)

RuntimeError: CUDA error: device-side assert triggered
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.


#### Perfom MTCNN facial detection

Iterate through the DataLoader object and obtain cropped faces.

In [5]:
def filter_RGB(image):
    if image.mode != 'RGB':
        image = image.convert('RGB')
    return image

class FilterRGB(object):
    def __call__(self, img):
        filtered_img = filter_RGB(img)
        if filtered_img is None:
            return None
        return filtered_img
    
transform = transforms.Compose([
    transforms.Resize((512, 512)),
    FilterRGB(),
])

dataset = datasets.ImageFolder(data_dir, transform=transform)
class_to_idx = {class_name: idx for idx, class_name in enumerate(dataset.classes)}
idx_to_class = {idx: class_name for class_name, idx in class_to_idx.items()}
dataset.samples = [
    (p, p.replace(data_dir, data_dir + '_cropped'))
        for p, _ in dataset.samples
]
        
loader = DataLoader(
    dataset,
    num_workers=workers,
    batch_size=128,
    collate_fn=training.collate_pil
)

#### Define Inception Resnet V1 module

See `help(InceptionResnetV1)` for more details.

In [46]:
resnet = InceptionResnetV1(
    classify=True,
    # pretrained='vggface2',
    pretrained='casia-webface',
    num_classes=100
).to(device)

#### Define optimizer, scheduler, dataset, and dataloader

In [47]:
optimizer = optim.Adam(resnet.parameters(), lr=0.001)
scheduler = MultiStepLR(optimizer, [3, 4])

trans = transforms.Compose([
    np.float32,
    transforms.ToTensor(),
    fixed_image_standardization
])
dataset = datasets.ImageFolder('/home/obasit/50024/kaggle/data/export2_data/kaggle_data/train/train_mtcnn_inlcuding_grey_512_20_smallest_face_prob_90_single_faces', transform=trans)
class_to_idx = {class_name: idx for idx, class_name in enumerate(dataset.classes)}
idx_to_class = {idx: class_name for class_name, idx in class_to_idx.items()}

img_inds = np.arange(len(dataset))
np.random.shuffle(img_inds)
train_inds = img_inds[:int(0.85 * len(img_inds))]
val_inds = img_inds[int(0.85 * len(img_inds)):]

train_loader = DataLoader(
    dataset,
    num_workers=workers,
    batch_size=batch_size,
    sampler=SubsetRandomSampler(train_inds)
)
val_loader = DataLoader(
    dataset,
    num_workers=workers,
    batch_size=batch_size,
    sampler=SubsetRandomSampler(val_inds)
)



In [85]:
train_inds1 = img_inds[:int(0.33 * len(img_inds))]
train_inds2 = img_inds[int(0.33 * len(img_inds)):int(0.66 * len(img_inds))]
train_inds3 = img_inds[int(0.66 * len(img_inds)):]

train_loader1 = DataLoader(
    dataset,
    num_workers=workers,
    batch_size=batch_size,
    sampler=SubsetRandomSampler(np.concatenate([train_inds1, train_inds2]))
)
val_loader1 = DataLoader(
    dataset,
    num_workers=workers,
    batch_size=batch_size,
    sampler=SubsetRandomSampler(train_inds3)
)

train_loader2 = DataLoader(
    dataset,
    num_workers=workers,
    batch_size=batch_size,
    sampler=SubsetRandomSampler(np.concatenate([train_inds2, train_inds3[:-5]]))
)
val_loader2 = DataLoader(
    dataset,
    num_workers=workers,
    batch_size=batch_size,
    sampler=SubsetRandomSampler(train_inds1)
)

train_loader3 = DataLoader(
    dataset,
    num_workers=workers,
    batch_size=batch_size,
    sampler=SubsetRandomSampler(np.concatenate([train_inds3, train_inds1[:-5]]))
)
val_loader3 = DataLoader(
    dataset,
    num_workers=workers,
    batch_size=batch_size,
    sampler=SubsetRandomSampler(train_inds2)
)

#### Define loss and evaluation functions

In [48]:
loss_fn = torch.nn.CrossEntropyLoss()
metrics = {
    'fps': training.BatchTimer(),
    'acc': training.accuracy
}

#### Train model

In [49]:
writer = SummaryWriter()
writer.iteration, writer.interval = 0, 10

print('\n\nInitial')
print('-' * 10)
resnet.eval()
training.pass_epoch(
    resnet, loss_fn, val_loader,
    batch_metrics=metrics, show_running=True, device=device,
    writer=writer
)

for epoch in range(5):
    print('\nEpoch {}/{}'.format(epoch + 1, epochs))
    print('-' * 10)

    resnet.train()
    training.pass_epoch(
        resnet, loss_fn, train_loader, optimizer, scheduler,
        batch_metrics=metrics, show_running=True, device=device,
        writer=writer
    )

    resnet.eval()
    training.pass_epoch(
        resnet, loss_fn, val_loader,
        batch_metrics=metrics, show_running=True, device=device,
        writer=writer
    )


writer.close()

torch.save(resnet.state_dict(), 'facenet_model_train_mtcnn_inlcuding_grey_512_20_smallest_face_prob_80_single_faces_casia.pth')




Initial
----------
Valid |   241/241  | loss:    4.7904 | fps: 1254.8101 | acc:    0.0106   

Epoch 1/7
----------
Train |  1366/1366 | loss:    2.0594 | fps:  536.8164 | acc:    0.5443   
Valid |   241/241  | loss:    1.5744 | fps: 1334.1681 | acc:    0.6696   

Epoch 2/7
----------
Train |  1366/1366 | loss:    1.2984 | fps:  542.9641 | acc:    0.7248   
Valid |   241/241  | loss:    1.3563 | fps: 1277.0341 | acc:    0.7191   

Epoch 3/7
----------
Train |  1366/1366 | loss:    1.0091 | fps:  558.0370 | acc:    0.7799   
Valid |   241/241  | loss:    1.2327 | fps: 1328.6702 | acc:    0.7535   

Epoch 4/7
----------
Train |  1366/1366 | loss:    0.5190 | fps:  540.1210 | acc:    0.8840   
Valid |   241/241  | loss:    0.9475 | fps: 1327.7325 | acc:    0.8195   

Epoch 5/7
----------
Train |  1366/1366 | loss:    0.3403 | fps:  509.6160 | acc:    0.9230   
Valid |   241/241  | loss:    0.9428 | fps: 1214.0343 | acc:    0.8201   


### Testing

In [50]:
preprocess = transforms.Compose([
    np.float32,
    transforms.ToTensor(),
    fixed_image_standardization
])

resize_t = transforms.Compose([
    transforms.Resize((160, 160)),
])

class CelebrityDatasetTest(torch.utils.data.Dataset):
    def __init__(self, images_dir, transform=None):
        self.images_dir = images_dir
        self.transform = transform

    def __len__(self):
        count = 0
        for name in glob.glob('/home/obasit/50024/kaggle/data/test/test/' + '*.jpg'):
            count += 1
        return count
    
    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        img_name = os.path.join(self.images_dir + str(idx) + '.jpg')
        
        try:
            image = Image.open(img_name)
        except:
            try:
                img_name = os.path.join('/home/obasit/50024/kaggle/data/export2_data/kaggle_data/test/test_dfsd_inlcuding_grey/' + str(idx) + '.jpg')
                image = Image.open(img_name)
                if image.mode != 'RGB':
                    image = image.convert('RGB')
                print("found in here", img_name)
            except:
                img_name = os.path.join('/home/obasit/50024/kaggle/data/test/test/' + str(idx) + '.jpg')
                image = Image.open(img_name)
                image = resize_t(image) 
                if image.mode != 'RGB':
                    image = image.convert('RGB')
                print("found in here", img_name)
            

        image = preprocess(image)  

        sample = {'image': image, 'Id': idx}

        if self.transform:
            sample = self.transform(sample)

        return sample
    
batch_size = 32

testset = CelebrityDatasetTest(images_dir='/home/obasit/50024/kaggle/data/export2_data/kaggle_data/test/test_mtcnn_inlcuding_grey_512_20_smallest_face_prob_98/')
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
                                         shuffle=False, num_workers=1)

df = pd.read_csv('/home/obasit/50024/kaggle/data/category.csv')
classes = df['Category'].to_list()

In [51]:


resnet.to(device)

categories_df = pd.read_csv('/home/obasit/50024/kaggle/data/category.csv', index_col=0)
bigger_df = pd.DataFrame(columns=['Id', 'Category'])

# since we're not training, we don't need to calculate the gradients for our outputs
with torch.no_grad():
    for data in testloader:
        images, Id = data['image'].to(device), data['Id'].to(device)

        # calculate outputs by running images through the network
        outputs = resnet(images)
        
        # the class with the highest energy is what we choose as prediction
        _, predicted = torch.max(outputs.data, 1)

        one_id = {
            'Id': Id.tolist(),
            'Category': [idx_to_class[idx] for idx in predicted.tolist()]
        }

        one_id_df = pd.DataFrame.from_dict(one_id)
        
        bigger_df = pd.concat([bigger_df, one_id_df], ignore_index=True)
        



bigger_df['Category'] = bigger_df['Category'].str.replace('_', ' ')
print(bigger_df)
bigger_df.to_csv('facenet_model_train_mtcnn_inlcuding_grey_512_20_smallest_face_prob_80_single_faces_casia.csv')

found in here /home/obasit/50024/kaggle/data/export2_data/kaggle_data/test/test_dfsd_inlcuding_grey/5.jpg
found in here /home/obasit/50024/kaggle/data/export2_data/kaggle_data/test/test_dfsd_inlcuding_grey/63.jpg
found in here /home/obasit/50024/kaggle/data/export2_data/kaggle_data/test/test_dfsd_inlcuding_grey/75.jpg
found in here /home/obasit/50024/kaggle/data/export2_data/kaggle_data/test/test_dfsd_inlcuding_grey/107.jpg
found in here /home/obasit/50024/kaggle/data/export2_data/kaggle_data/test/test_dfsd_inlcuding_grey/112.jpg
found in here /home/obasit/50024/kaggle/data/export2_data/kaggle_data/test/test_dfsd_inlcuding_grey/123.jpg
found in here /home/obasit/50024/kaggle/data/export2_data/kaggle_data/test/test_dfsd_inlcuding_grey/132.jpg
found in here /home/obasit/50024/kaggle/data/export2_data/kaggle_data/test/test_dfsd_inlcuding_grey/133.jpg
found in here /home/obasit/50024/kaggle/data/export2_data/kaggle_data/test/test_dfsd_inlcuding_grey/166.jpg
found in here /home/obasit/50024