# Importing 
Importing necessary libraries and loading the PreTrained MobileNetV2 model

In [8]:
import torch
import torch.nn as nn
import numpy as np
import os
import glob
from skimage import io
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from torch.utils.data import Dataset, DataLoader, random_split
from torchvision import transforms, utils
from PIL import Image
import torch.nn.functional as F
from torch.utils.tensorboard import SummaryWriter

model = torch.hub.load('pytorch/vision:v0.10.0', 'mobilenet_v2', pretrained=True)

Using cache found in /root/.cache/torch/hub/pytorch_vision_v0.10.0


# Custom loss functions

Following the paper's description of loss function as wrong_NLL()
Proposing our own description of loss function as NLL()

The first 140 elements are the flattened landmarks and the last 70 values correspond to the uncertainities

In [4]:
def wrong_NLL(output, # Tensor of shape [b_sze, 210 (70 landmarks by 3)]
             target, # Tensor of shape [b_sze,140]
             ):

    """
    The output structure is essentially a 2d tensor of of shape [b_sze, 210 (70 landmarks by 3)].
    Each instance (210 length vector) are a sequence of (x,y) locations for 70 landmarks which makes the first 136 elements. The rest 70 elements of the vector 
    are the standard deviation of the probabilistic regression.
    """
    b_sze = output.size()[0]

    crit = nn.MSELoss(reduction='none')
    loss = crit(output[:,:140].view(b_sze,2,-1), target.view(b_sze,2,-1))
    loss = torch.sum(loss,dim=1)
    loss = loss/(2*output[:,140:]**2)
    
    return torch.sum(torch.log(output[:,140:]**2)) + torch.sum(loss)


def NLL(output, # Tensor of shape [b_sze, 210 (70 landmarks by 3)]
        target, # Tensor of shape [b_sze,140]
             ):

    """
    The output structure is essentially a 2d tensor of of shape [b_sze, 210 (70 landmarks by 3)].
    Each instance (210 length vector) are a sequence of (x,y) locations for 70 landmarks which makes the first 140 elements. The rest 70 elements of the vector 
    are the standard deviation of the probabilistic regression.
    """
    
    b_sze = output.size()[0]

    crit = nn.MSELoss(reduction='none')
    loss = crit(output[:,:140].view(b_sze,2,-1), target.view(b_sze,2,-1))
    loss = torch.sum(loss,dim=1)
    loss = loss/(2*output[:,140:]**2)
    
    return torch.sum(0.5*(torch.log(output[:,140:]**2))) + torch.sum(loss)

# Custom Dataset Class

The class read root directory location and generates normalised values of images and landmarks

The image is mapped to range [0,1] after being resized to 224x224

The landmarks is mapped to range [-1,1] 

In [5]:
class FaceLandmarksDataset(Dataset):
    def __init__(self,root_dir):
        self.root_dir = root_dir
        self.len = 0
        
    def __len__(self):
        files = os.listdir(self.root_dir)
        self.len = int(len(files)/3)
        return self.len
        
    def __getitem__(self,idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()
        item_name = str(idx)
        item_name = item_name.zfill(6)
        
        img_name = self.root_dir+item_name+".png"
        
        ldmks_file_name = self.root_dir+item_name+"_ldmks.txt"
        with open(ldmks_file_name) as f:
            landmarks = np.loadtxt(f)
        landmarks = torch.tensor(landmarks)
        landmarks = torch.reshape(landmarks,(140,))
        landmarks = landmarks/256 - 1
        
        img = Image.open(img_name)
        resizer = transforms.Compose([transforms.Resize((224,224)),transforms.ToTensor()])
        img = resizer(img)
        
        sample = {"image":img,"landmarks":landmarks}
        
        return sample

# Dataloading

Created a split of 90% to 10% of training and test with a batch size of 8

In [6]:
dataset = FaceLandmarksDataset("/workspace/EECE7370-Final/Dataset/")
training_data,testing_data = random_split(dataset,[900,100]) 
train_dataloader = DataLoader(training_data,batch_size=8,shuffle=True)
test_dataloader = DataLoader(testing_data,batch_size=8,shuffle=True)
device = "cuda" if torch.cuda.is_available() else "cpu"

# Facial Landmark Detector Model

Used pretrained weights of MobileNetV2 CNN feature extractor and converting the final layer to output 210 values(70 x and y coordinates and 70 uncertainities

In [7]:
feature_extractor = model.features
class Mobile_LandmarkDetector(nn.Module):
    def __init__(self,feature_extractor_model):
            super().__init__()
            self.feature_extractor_model = feature_extractor_model
            self.regressor_op = nn.Sequential(nn.Dropout(p=0.5),nn.Linear(in_features=1280,out_features=210,bias=True))
    
    def forward(self,x):
        x = self.feature_extractor_model(x)
        x = nn.functional.adaptive_avg_pool2d(x, (1, 1))
        x = torch.flatten(x,1)
        x = self.regressor_op(x)
        
        return x

our_model = Mobile_LandmarkDetector(feature_extractor_model=feature_extractor)

# Training and Optimization

Will be only training the last layer and freeze the feature extractor layers

With AdamW optimizer and 100 epochs

In [9]:
writer = SummaryWriter()

our_model.to(device)
for param in our_model.feature_extractor_model.parameters():
    param.requires_grad = False

optimizer = torch.optim.AdamW(our_model.regressor_op.parameters())
mse = nn.MSELoss(reduction="mean") #NME
best_model_NLL = our_model
best_loss = np.inf
#Training
for q in range(100):
    our_model.train()
    print(f"epoch {q}")
    for i,(batch) in enumerate(train_dataloader):
        ip,op = batch["image"],batch["landmarks"]
        ip = ip.to(device)
        op = op.float().to(device)
        optimizer.zero_grad()
        pred_op = our_model(ip)
        crit = nn.GaussianNLLLoss()
        landmarks = pred_op[:,:140]
        landmarks = torch.reshape(landmarks,(-1,70,2))
        var = pred_op[:,140:]**2
        op = torch.reshape(op,(-1,70,2))
        loss = crit(landmarks,op,var)
        loss.backward()
        optimizer.step()
        writer.add_scalar("Training Loss:", loss, q)
        
    our_model.eval()    
    with torch.no_grad():
        tot_loss = 0
        for i_val,(batch_val) in enumerate(test_dataloader):
            ip_test,op_test = batch_val["image"],batch_val["landmarks"]
            ip_test = ip_test.to(device)
            op_test = op_test.float().to(device)
            pred_op = our_model(ip_test)
            #print(pred_op.shape)
            #minie = torch.min(pred_op)
            #maxie = torch.max(pred_op)
            #pred_op = (pred_op-minie)/(maxie-minie)
            #pred_op = pred_op*2 - 1
            l = mse(pred_op[:,:140],op_test)
            tot_loss += l.item()
            
        tot_loss = tot_loss/testing_data.__len__()
        writer.add_scalar("Testing Loss",tot_loss,q)
        
    if tot_loss < best_loss:
        best_loss = tot_loss
        best_model_NLL = our_model

    print(f"Training loss is {loss.item()}, validation loss is {tot_loss}")

writer.flush()

epoch 0
Training loss is 1.805535912513733, validation loss is 0.05068821787834168
epoch 1
Training loss is 0.9869672656059265, validation loss is 0.04473572731018066
epoch 2
Training loss is 0.9664744734764099, validation loss is 0.03702181696891785
epoch 3
Training loss is 1.0021789073944092, validation loss is 0.03585353374481201
epoch 4
Training loss is 1.0111545324325562, validation loss is 0.03808571100234985
epoch 5
Training loss is 0.9801980257034302, validation loss is 0.047182943820953366
epoch 6
Training loss is 1.018977165222168, validation loss is 0.04826064825057983
epoch 7
Training loss is 1.0246772766113281, validation loss is 0.04417385578155517
epoch 8
Training loss is 1.0144410133361816, validation loss is 0.04389870882034302
epoch 9
Training loss is 0.947803258895874, validation loss is 0.038990523219108585
epoch 10
Training loss is 1.0743993520736694, validation loss is 0.03848869919776916
epoch 11
Training loss is 1.05697500705719, validation loss is 0.03758422076

# Saving the model

In [196]:
path = "./Model/final_model.pt"
torch.save(best_model_NLL.state_dict(),path)
