In [48]:
import torch
import torch.nn as nn
import numpy as np
import os
import glob
from skimage import io
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils
import torch.nn.functional as F

model = torch.hub.load('pytorch/vision:v0.10.0', 'mobilenet_v2', pretrained=True)
#model.eval()

Using cache found in /root/.cache/torch/hub/pytorch_vision_v0.10.0


In [49]:
def wrong_NLL(output, # Tensor of shape [b_sze, 210 (70 landmarks by 3)]
             target, # Tensor of shape [b_sze,140]
             ):

    """
    The output structure is essentially a 2d tensor of of shape [b_sze, 210 (70 landmarks by 3)].
    Each instance (210 length vector) are a sequence of (x,y) locations for 70 landmarks which makes the first 136 elements. The rest 70 elements of the vector 
    are the standard deviation of the probabilistic regression.
    """
    b_sze = output.size()[0]

    crit = nn.MSELoss(reduction='none')
    loss = crit(output[:,:140].view(b_sze,2,-1), target.view(b_sze,2,-1))
    loss = torch.sum(loss,dim=1)
    loss = loss/(2*output[:,140:]**2)
    
    return torch.sum(torch.log(output[:,140:]**2)) + torch.sum(loss)


def NLL(output, # Tensor of shape [b_sze, 204 (70 landmarks by 3)]
             target, # Tensor of shape [b_sze,136]
             ):

    """
    The output structure is essentially a 2d tensor of of shape [b_sze, 210 (70 landmarks by 3)].
    Each instance (210 length vector) are a sequence of (x,y) locations for 70 landmarks which makes the first 136 elements. The rest 68 elements of the vector 
    are the standard deviation of the probabilistic regression.
    """
    
    b_sze = output.size()[0]

    crit = nn.MSELoss(reduction='none')
    loss = crit(output[:,:140].view(b_sze,2,-1), target.view(b_sze,2,-1))
    loss = torch.sum(loss,dim=1)
    loss = loss/(2*output[:,140:]**2)
    
    return torch.sum(0.5*(torch.log(output[:,140:]**2))) + torch.sum(loss)

In [29]:
class FaceLandmarksDataset(Dataset):
    def __init__(self,root_dir):
        self.root_dir = root_dir
        
    def __len__(self):
        files = os.listdir(self.root_dir)
        return int(len(files)/3)
        
    def __getitem__(self,idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()
        item_name = str(idx)
        item_name = item_name.zfill(6)
        img_name = self.root_dir+item_name+".png"
        ldmks_file_name = self.root_dir+item_name+"_ldmks.txt"
        with open(ldmks_file_name) as f:
            landmarks = np.loadtxt(f)
        landmarks = torch.tensor(landmarks)
        landmarks = torch.reshape(landmarks,(140,))
        img = io.imread(img_name)
        
        sample = {"image":img,"landmarks":landmarks}
        
        return sample

In [56]:
dataset = FaceLandmarksDataset("/workspace/EECE7370-Final/Dataset/")
dataloader = DataLoader(dataset,batch_size=4,shuffle=True)
device = "cuda" if torch.cuda.is_available() else "cpu"

In [55]:
feature_extractor = model.features
class Mobile_LandmarkDetector(nn.Module):
    def __init__(self,feature_extractor_model):
            super().__init__()
            self.feature_extractor_model = feature_extractor_model
            self.regressor_op = nn.Sequential(nn.Flatten(),nn.Dropout(p=0.2,inplace=False),
                                              nn.Linear(in_features=1280,out_features=600,bias=True),nn.Dropout(p=0.2),
                                             nn.Linear(in_features=600,out_features=140,bias=True))
    
    def forward(self,x):
        x = self.feature_extractor_model(x)
        x = regressor_op(x)
        return x

our_model = Mobile_LandmarkDetector(feature_extractor_model=feature_extractor)
for params in our_model.feature_extractor_model.parameters():
    param.requires_grad = False
    

In [None]:
optimizer = torch.optim.SGD(our_model.parameters(), lr=0.001, momentum=0.9)

#Training
for i in range(50):
    our_model.train()
    for input,output in dataloader:
        input = input.to(device)
        output = output.to(device)
        optimizer.zero_grad()
        
        pred_op = our_model(input)
        loss = NLL(output,pred_op)
        loss.backward()
        optimizer.step()