In [1]:
import torch
import torch.nn as nn
import numpy as np
import os
import glob
from skimage import io
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from torch.utils.data import Dataset, DataLoader, random_split
from torchvision import transforms, utils
from PIL import Image
import torch.nn.functional as F

model = torch.hub.load('pytorch/vision:v0.10.0', 'mobilenet_v2', pretrained=True)
#model.eval()

  from .collection import imread_collection_wrapper
Using cache found in /root/.cache/torch/hub/pytorch_vision_v0.10.0


In [15]:
def wrong_NLL(output, # Tensor of shape [b_sze, 210 (70 landmarks by 3)]
             target, # Tensor of shape [b_sze,140]
             ):

    """
    The output structure is essentially a 2d tensor of of shape [b_sze, 210 (70 landmarks by 3)].
    Each instance (210 length vector) are a sequence of (x,y) locations for 70 landmarks which makes the first 136 elements. The rest 70 elements of the vector 
    are the standard deviation of the probabilistic regression.
    """
    b_sze = output.size()[0]

    crit = nn.MSELoss(reduction='none')
    loss = crit(output[:,:140].view(b_sze,2,-1), target.view(b_sze,2,-1))
    loss = torch.sum(loss,dim=1)
    loss = loss/(2*output[:,140:]**2)
    
    return torch.sum(torch.log(output[:,140:]**2)) + torch.sum(loss)


def NLL(output, # Tensor of shape [b_sze, 210 (70 landmarks by 3)]
             target, # Tensor of shape [b_sze,140]
             ):

    """
    The output structure is essentially a 2d tensor of of shape [b_sze, 210 (70 landmarks by 3)].
    Each instance (210 length vector) are a sequence of (x,y) locations for 70 landmarks which makes the first 136 elements. The rest 68 elements of the vector 
    are the standard deviation of the probabilistic regression.
    """
    
    b_sze = output.size()[0]

    crit = nn.MSELoss(reduction='none')
    loss = crit(output[:,:140].view(b_sze,2,-1), target.view(b_sze,2,-1))
    loss = torch.sum(loss,dim=1)
    loss = loss/(2*output[:,140:]**2)
    val = torch.sum(0.5*(torch.log(output[:,140:]**2))) + torch.sum(loss)
    val = val.float()
    return val

In [116]:
class FaceLandmarksDataset(Dataset):
    def __init__(self,root_dir):
        self.root_dir = root_dir
        self.len = 0
        
    def __len__(self):
        files = os.listdir(self.root_dir)
        self.len = int(len(files)/3)
        return self.len
        
    def __getitem__(self,idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()
        item_name = str(idx)
        item_name = item_name.zfill(6)
        img_name = self.root_dir+item_name+".png"
        ldmks_file_name = self.root_dir+item_name+"_ldmks.txt"
        with open(ldmks_file_name) as f:
            landmarks = np.loadtxt(f)
        landmarks = torch.tensor(landmarks)
        landmarks = torch.reshape(landmarks,(140,))
        img = Image.open(img_name)
        resizer = transforms.Compose([transforms.Resize((224,224)),transforms.ToTensor()])
        img = resizer(img)
        #mean, std = img.mean([1,2]), img.std([1,2])
        #normalizing = transforms.Normalize(mean,std)
        #img = normalizing(img)
        #to_tensor = transforms.ToTensor()
        #img = to_tensor(img)
        sample = {"image":img,"landmarks":landmarks}
        
        return sample

In [117]:
dataset = FaceLandmarksDataset("/workspace/EECE7370-Final/Dataset/")
training_data,testing_data = random_split(dataset,[900,100]) 
train_dataloader = DataLoader(training_data,batch_size=4,shuffle=True)
test_dataloader = DataLoader(testing_data,batch_size=4,shuffle=True)
device = "cuda" if torch.cuda.is_available() else "cpu"

In [132]:
feature_extractor = model.features
class Mobile_LandmarkDetector(nn.Module):
    def __init__(self,feature_extractor_model):
            super().__init__()
            self.feature_extractor_model = feature_extractor_model
            self.regressor_op = nn.Sequential(nn.Linear(in_features=62720,out_features=1000,bias=True),nn.ReLU(),nn.BatchNorm1d(1000),
                                             nn.Dropout(p=0.4,inplace=False),
                                            nn.Linear(in_features=1000,out_features=600,bias=True),nn.ReLU(),nn.BatchNorm1d(600),
                                              nn.Dropout(p=0.4,inplace=False),
                                              nn.Linear(in_features=600,out_features=210,bias=True))
    
    def forward(self,x):
        x = self.feature_extractor_model(x)
        x = torch.flatten(x,1)
        x = self.regressor_op(x)
        return x

our_model = Mobile_LandmarkDetector(feature_extractor_model=feature_extractor)
for param in our_model.feature_extractor_model.parameters():
    param.requires_grad = False
    

In [130]:
our_model.to(device)
our_model.eval()
print(our_model(dataset[6]["image"].to(device).unsqueeze(0)))
#print(torch.min(dataset[344]["image"]))

tensor([[ 2.6312e-02,  4.3774e-02, -1.4872e-01,  2.0566e-02, -6.7021e-02,
          3.6945e-02, -5.7770e-02, -1.7208e-02, -6.4069e-02,  7.2887e-02,
          9.5071e-02, -6.3518e-02,  3.4955e-02, -7.3671e-02, -2.7077e-02,
          2.0253e-03,  8.9487e-02,  1.3833e-02,  2.8738e-02,  2.0771e-02,
         -5.3610e-02,  7.8733e-03,  1.2034e-01, -5.3338e-02, -1.1853e-01,
          8.6134e-02,  1.0712e-01,  1.6624e-02, -4.4952e-02, -9.3355e-02,
         -2.4779e-02, -1.9064e-01,  3.9919e-02,  3.1690e-02, -2.7848e-02,
          1.8180e-02,  1.0803e-01,  1.2506e-01, -7.6952e-02, -2.6176e-02,
         -3.1056e-01, -4.4673e-02, -3.1119e-02, -3.6114e-02,  7.1997e-03,
          1.1159e-01, -1.8721e-02,  6.2948e-02,  1.1520e-02, -8.7984e-03,
         -6.1053e-02, -2.5888e-02, -1.6172e-01,  1.6308e-02,  2.0130e-01,
          5.9937e-02, -7.8118e-02, -3.5907e-02,  4.6437e-02, -1.2567e-01,
         -6.8683e-02,  3.3732e-02,  2.3587e-01, -1.1235e-01,  8.3278e-02,
         -1.7914e-01,  1.0349e-02, -1.

In [138]:
our_model.to(device)
optimizer = torch.optim.Adam(our_model.parameters())
mse = nn.MSELoss(reduction="sum")
best_model_NLL = our_model
best_loss = np.inf
#Training
for q in range(50):
    our_model.train()
    print(f"epoch {q}")
    for i,(batch) in enumerate(train_dataloader):
        ip,op = batch["image"],batch["landmarks"]
        ip = ip.to(device)
        op = op.float().to(device)
        optimizer.zero_grad()
        pred_op = our_model(ip)
       # print(pred_op)
        loss = NLL(pred_op,op)
        loss.backward()
        optimizer.step()
        
    our_model.eval()    
    with torch.no_grad():
        tot_loss = 0
        for i_val,(batch_val) in enumerate(test_dataloader):
            ip_test,op_test = batch_val["image"],batch_val["landmarks"]
            ip_test = ip_test.to(device)
            op_test = op_test.float().to(device)
            
            pred_op = our_model(ip_test)
            l = mse(pred_op[:,:140],op_test)
            tot_loss += l.item()/(68*100)
        
    if tot_loss < best_loss:
        best_loss = tot_loss
        best_model_NLL = our_model

    print(f"Training loss is {loss.item()}, validation loss is {tot_loss}")

epoch 0


OutOfMemoryError: CUDA out of memory. Tried to allocate 240.00 MiB (GPU 0; 6.00 GiB total capacity; 5.08 GiB already allocated; 0 bytes free; 5.31 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

In [91]:
model.eval()

MobileNetV2(
  (features): Sequential(
    (0): ConvBNActivation(
      (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU6(inplace=True)
    )
    (1): InvertedResidual(
      (conv): Sequential(
        (0): ConvBNActivation(
          (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
          (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (2): ReLU6(inplace=True)
        )
        (1): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (2): InvertedResidual(
      (conv): Sequential(
        (0): ConvBNActivation(
          (0): Conv2d(16, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (1): BatchNorm2d(96, eps=1e-05, momen

In [78]:
optimizer = torch.optim.SGD(our_model.parameters(), lr=0.001)
mse = nn.MSELoss(reduction="sum")
best_model_NLL = our_model
best_loss = np.inf

#Training
for i in range(1):
    our_model.train()
    for i,batch in enumerate(train_dataloader):
        ip,op = batch["image"],batch["landmarks"]
        #ip = ip.to(device)
        #op = op.to(device)
        optimizer.zero_grad()
        #our_model.to(device)
        pred_op = our_model(ip)
        pred_op = pred_op.float()
        op = op.float()
        loss = NLL(pred_op,op)
        loss.backward()
        optimizer.step()
        
    with torch.no_grad():
        tot_loss = 0
        for i_val,(batch_val) in enumerate(test_dataloader):
            ip_test,op_test = batch_val["image"],batch_val["landmarks"]
            #ip_test = ip_test.to(device)
            #op_test = op_test.float().to(device)
            op_test = op_test.float()
            pred_op = our_model(ip_test)
            pred_op = pred_op.float()
            l = mse(pred_op[:,:140],op_test)
            tot_loss += l.item()/(68*100)
        
    if tot_loss < best_loss:
        best_loss = tot_loss
        best_model_NLL = our_model

    print(f"Training loss is {loss.item()}, validation loss is {tot_loss}")

RuntimeError: Input type (torch.FloatTensor) and weight type (torch.cuda.FloatTensor) should be the same or input should be a MKLDNN tensor and weight is a dense tensor