In [1]:
import torch
import torch.nn as nn
import numpy as np
import os
import glob
from skimage import io
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from torch.utils.data import Dataset, DataLoader, random_split
from torchvision import transforms, utils
from PIL import Image
import torch.nn.functional as F

model = torch.hub.load('pytorch/vision:v0.10.0', 'mobilenet_v2', pretrained=True)
#model.eval()

  from .collection import imread_collection_wrapper
Using cache found in /root/.cache/torch/hub/pytorch_vision_v0.10.0


In [91]:
def wrong_NLL(output, # Tensor of shape [b_sze, 210 (70 landmarks by 3)]
             target, # Tensor of shape [b_sze,140]
             ):

    """
    The output structure is essentially a 2d tensor of of shape [b_sze, 210 (70 landmarks by 3)].
    Each instance (210 length vector) are a sequence of (x,y) locations for 70 landmarks which makes the first 136 elements. The rest 70 elements of the vector 
    are the standard deviation of the probabilistic regression.
    """
    b_sze = output.size()[0]

    crit = nn.MSELoss(reduction='none')
    loss = crit(output[:,:140].view(b_sze,2,-1), target.view(b_sze,2,-1))
    loss = torch.sum(loss,dim=1)
    loss = loss/(2*output[:,140:]**2)
    
    return torch.sum(torch.log(output[:,140:]**2)) + torch.sum(loss)


def NLL(output, # Tensor of shape [b_sze, 210 (70 landmarks by 3)]
             target, # Tensor of shape [b_sze,140]
             ):

    """
    The output structure is essentially a 2d tensor of of shape [b_sze, 210 (70 landmarks by 3)].
    Each instance (210 length vector) are a sequence of (x,y) locations for 70 landmarks which makes the first 140 elements. The rest 70 elements of the vector 
    are the standard deviation of the probabilistic regression.
    """
    
    b_sze = output.size()[0]

    crit = nn.MSELoss(reduction='none')
    loss = crit(output[:,:140].view(b_sze,2,-1), target.view(b_sze,2,-1))
    loss = torch.sum(loss,dim=1)
    loss = loss/(2*output[:,140:]**2)
    val = torch.sum(0.5*(torch.log(output[:,140:]**2))) + torch.sum(loss)
    val = val.float()
    return val

In [120]:
class FaceLandmarksDataset(Dataset):
    def __init__(self,root_dir):
        self.root_dir = root_dir
        self.len = 0
        
    def __len__(self):
        files = os.listdir(self.root_dir)
        self.len = int(len(files)/3)
        return self.len
        
    def __getitem__(self,idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()
        item_name = str(idx)
        item_name = item_name.zfill(6)
        img_name = self.root_dir+item_name+".png"
        ldmks_file_name = self.root_dir+item_name+"_ldmks.txt"
        with open(ldmks_file_name) as f:
            landmarks = np.loadtxt(f)
        landmarks = torch.tensor(landmarks)
        landmarks = torch.reshape(landmarks,(140,))
        landmarks = landmarks/256 - 1
        #meanie = int(torch.mean(landmarks))
        #stdie = int(torch.std(landmarks))
        #landmarks = (landmarks-meanie)/stdie
        
        #landmarks = landmarks*2
        #landmarks = landmarks - 1
        img = Image.open(img_name)
        resizer = transforms.Compose([transforms.Resize((224,224)),transforms.ToTensor()])
        img = resizer(img)
        #mean, std = img.mean([1,2]), img.std([1,2])
        #normalizing = transforms.Normalize(mean,std)
        #img = normalizing(img)
        #to_tensor = transforms.ToTensor()
        #img = to_tensor(img)
        sample = {"image":img,"landmarks":landmarks}
        
        return sample

In [121]:
dataset = FaceLandmarksDataset("/workspace/EECE7370-Final/Dataset/")
training_data,testing_data = random_split(dataset,[900,100]) 
train_dataloader = DataLoader(training_data,batch_size=16,shuffle=True)
test_dataloader = DataLoader(testing_data,batch_size=16,shuffle=True)
device = "cuda" if torch.cuda.is_available() else "cpu"

In [122]:
feature_extractor = model.features
class Mobile_LandmarkDetector(nn.Module):
    def __init__(self,feature_extractor_model):
            super().__init__()
            self.feature_extractor_model = feature_extractor_model
            self.regressor_op = nn.Sequential(nn.Dropout(p=0.4),nn.Linear(in_features=1280,out_features=210,bias=True))#,nn.BatchNorm1d(520),
                                             #nn.ReLU(),nn.Dropout(p=0.4),
                                             #nn.Linear(in_features=520,out_features=210,bias=False))
          #  self.regressor_op = nn.Sequential(nn.Linear(in_features=62720,out_features=1000,bias=True),nn.ReLU(),nn.BatchNorm1d(1000),
          #                                   nn.Dropout(p=0.4,inplace=False),
                                        #      nn.Linear(in_features=5000,out_features=1000,bias=True),nn.ReLU(),nn.BatchNorm1d(1000),
          #                                  nn.Linear(in_features=1000,out_features=600,bias=False),nn.Sigmoid(),nn.BatchNorm1d(600),#nn.BatchNorm1d(600),
          #                                    nn.Dropout(p=0.4,inplace=False),
          #                                    nn.Linear(in_features=600,out_features=210,bias=True))
    
    
    def forward(self,x):
        x = self.feature_extractor_model(x)
        x = nn.functional.adaptive_avg_pool2d(x, (1, 1))
        x = torch.flatten(x,1)
        x = self.regressor_op(x)
        
        return x

our_model = Mobile_LandmarkDetector(feature_extractor_model=feature_extractor)
#for param in our_model.feature_extractor_model.parameters():
#    param.requires_grad = False
    

In [144]:
from torch.utils.tensorboard import SummaryWriter
writer = SummaryWriter()

In [145]:
our_model.to(device)
optimizer = torch.optim.Adam(our_model.regressor_op.parameters())
mse = nn.MSELoss(reduction="sum")
best_model_NLL = our_model
best_loss = np.inf
#Training
for q in range(50):
    our_model.train()
    print(f"epoch {q}")
    for i,(batch) in enumerate(train_dataloader):
        ip,op = batch["image"],batch["landmarks"]
        ip = ip.to(device)
        op = op.float().to(device)
        optimizer.zero_grad()
        pred_op = our_model(ip)
        minie = torch.min(pred_op)
        maxie = torch.max(pred_op)
        pred_op = (pred_op-minie)/(maxie-minie)
        pred_op = pred_op*2 - 1
        #meanie = torch.mean(pred_op)
        #stdie = torch.std(pred_op)
        #pred_op = (pred_op - meanie)/stdie
        #pred_op = pred_op + 1
        #pred_op = pred_op/2
        pred_op = F.log_softmax(pred_op,dim=1)
        loss = NLL(pred_op,op)
        loss.backward()
        optimizer.step()
        writer.add_scalar("Loss/train", loss, q)
        
    our_model.eval()    
    with torch.no_grad():
        tot_loss = 0
        for i_val,(batch_val) in enumerate(test_dataloader):
            ip_test,op_test = batch_val["image"],batch_val["landmarks"]
            ip_test = ip_test.to(device)
            op_test = op_test.float().to(device)
            pred_op = our_model(ip_test)
            minie = torch.min(pred_op)
            maxie = torch.max(pred_op)
            pred_op = (pred_op-minie)/(maxie-minie)
            pred_op = pred_op*2 - 1
            writer.add_scalar("Loss/val",loss,q)
            l = mse(pred_op[:,:140],op_test)
            tot_loss += l.item()/(70*100)
        
    if tot_loss < best_loss:
        best_loss = tot_loss
        best_model_NLL = our_model

    print(f"Training loss is {loss.item()}, validation loss is {tot_loss}")

writer.flush()

epoch 0
Training loss is 689.496826171875, validation loss is 0.5990105503627232
epoch 1
Training loss is 690.9400634765625, validation loss is 0.5853193860735212
epoch 2
Training loss is 693.48388671875, validation loss is 0.6627021048409598
epoch 3
Training loss is 689.818359375, validation loss is 0.63000193132673
epoch 4
Training loss is 693.9735717773438, validation loss is 0.6291334054129465
epoch 5
Training loss is 690.93310546875, validation loss is 0.6508181457519531
epoch 6
Training loss is 693.5901489257812, validation loss is 0.5627679879324777
epoch 7
Training loss is 693.4259033203125, validation loss is 0.5969905809674944
epoch 8
Training loss is 690.3843994140625, validation loss is 0.5868107299804688
epoch 9
Training loss is 695.16455078125, validation loss is 0.5975168195452009
epoch 10
Training loss is 694.9708251953125, validation loss is 0.620442587716239
epoch 11
Training loss is 692.3648071289062, validation loss is 0.5952584097726004
epoch 12
Training loss is 69

In [138]:
best_model_NLL.eval()
q = best_model_NLL(dataset[617]["image"].unsqueeze(0).to(device))
minie = torch.max(q)
maxie = torch.min(q)
q = (q-minie)/(maxie-minie)
print(torch.max(q))
print(torch.min(q))

tensor(1., device='cuda:0', grad_fn=<MaxBackward1>)
tensor(-0., device='cuda:0', grad_fn=<MinBackward1>)


In [52]:
best_model_NLL.eval()
maxb = 0
minb = 0
for t in test_dataloader:
    q = best_model_NLL.feature_extractor_model(t["image"].to(device))
#q = torch.tanh(q)
    q = nn.functional.adaptive_avg_pool2d(q, (1, 1)) 
    maxa = int(torch.max(q))
    mina = int(torch.min(q))
    
    if maxa > maxb:
        maxb = maxa
    if mina < minb:
        minb = mina
        
print(maxb)
print(minb)

4
0


In [97]:
dataset = FaceLandmarksDataset("/workspace/EECE7370-Final/Dataset/")
op = dataset[6]["landmarks"]
t1 = F.normalize(op, p=1.0, dim = 0)
t2 = F.normalize(op, p=2.0, dim = 0)

print(op)
print(torch.max(op))
print(t1)
print(t2)

tensor([199.2880, 263.0710, 198.6850, 288.4830, 199.5730, 309.0610, 207.6470,
        333.1010, 218.0160, 351.5190, 239.5870, 368.5540, 264.6420, 384.2490,
        289.4260, 395.1560, 311.2890, 397.6530, 326.9350, 388.3930, 333.8160,
        373.2060, 336.9720, 354.0820, 337.0300, 336.9720, 334.2650, 319.3750,
        331.1890, 298.8690, 329.3120, 281.7240, 328.8930, 259.9870, 245.3160,
        248.7480, 258.2950, 245.3060, 273.2460, 243.6810, 286.3940, 244.8460,
        298.8000, 248.3550, 337.0580, 247.0650, 345.2810, 242.8680, 351.5970,
        241.4370, 355.5890, 242.1830, 355.5950, 245.5710, 319.8430, 262.2910,
        324.1780, 280.8380, 330.1780, 297.8250, 334.4750, 316.6940, 294.3200,
        321.1610, 307.9510, 329.5800, 321.3440, 331.3090, 329.5020, 327.2070,
        333.3070, 319.0400, 256.0030, 267.3470, 265.0080, 263.1880, 277.4720,
        262.5520, 285.7780, 268.5940, 279.3920, 272.3360, 267.7250, 273.4420,
        329.8760, 267.0460, 338.5210, 260.1220, 347.6640, 259.36

In [78]:
optimizer = torch.optim.SGD(our_model.parameters(), lr=0.001)
mse = nn.MSELoss(reduction="sum")
best_model_NLL = our_model
best_loss = np.inf

#Training
for i in range(1):
    our_model.train()
    for i,batch in enumerate(train_dataloader):
        ip,op = batch["image"],batch["landmarks"]
        #ip = ip.to(device)
        #op = op.to(device)
        optimizer.zero_grad()
        #our_model.to(device)
        pred_op = our_model(ip)
        pred_op = pred_op.float()
        op = op.float()
        loss = NLL(pred_op,op)
        loss.backward()
        optimizer.step()
        
    with torch.no_grad():
        tot_loss = 0
        for i_val,(batch_val) in enumerate(test_dataloader):
            ip_test,op_test = batch_val["image"],batch_val["landmarks"]
            #ip_test = ip_test.to(device)
            #op_test = op_test.float().to(device)
            op_test = op_test.float()
            pred_op = our_model(ip_test)
            pred_op = pred_op.float()
            l = mse(pred_op[:,:140],op_test)
            tot_loss += l.item()/(68*100)
        
    if tot_loss < best_loss:
        best_loss = tot_loss
        best_model_NLL = our_model

    print(f"Training loss is {loss.item()}, validation loss is {tot_loss}")

RuntimeError: Input type (torch.FloatTensor) and weight type (torch.cuda.FloatTensor) should be the same or input should be a MKLDNN tensor and weight is a dense tensor

In [10]:
print(model)

MobileNetV2(
  (features): Sequential(
    (0): Conv2dNormActivation(
      (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU6(inplace=True)
    )
    (1): InvertedResidual(
      (conv): Sequential(
        (0): Conv2dNormActivation(
          (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
          (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (2): ReLU6(inplace=True)
        )
        (1): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (2): InvertedResidual(
      (conv): Sequential(
        (0): Conv2dNormActivation(
          (0): Conv2d(16, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (1): BatchNorm2d(96, eps=

In [12]:
model.to(device)
model.eval()
t = model.features(dataset[6]["image"].unsqueeze(0).to(device))
print(t.shape)

torch.Size([1, 1280, 7, 7])


In [119]:
t = dataset[6]["landmarks"]
meanie = torch.mean(t)
stdie = torch.std(t)
q = (t-meanie)/stdie

z = t/256 - 1

print(t)
print(q)
print(z)

tensor([199.2880, 263.0710, 198.6850, 288.4830, 199.5730, 309.0610, 207.6470,
        333.1010, 218.0160, 351.5190, 239.5870, 368.5540, 264.6420, 384.2490,
        289.4260, 395.1560, 311.2890, 397.6530, 326.9350, 388.3930, 333.8160,
        373.2060, 336.9720, 354.0820, 337.0300, 336.9720, 334.2650, 319.3750,
        331.1890, 298.8690, 329.3120, 281.7240, 328.8930, 259.9870, 245.3160,
        248.7480, 258.2950, 245.3060, 273.2460, 243.6810, 286.3940, 244.8460,
        298.8000, 248.3550, 337.0580, 247.0650, 345.2810, 242.8680, 351.5970,
        241.4370, 355.5890, 242.1830, 355.5950, 245.5710, 319.8430, 262.2910,
        324.1780, 280.8380, 330.1780, 297.8250, 334.4750, 316.6940, 294.3200,
        321.1610, 307.9510, 329.5800, 321.3440, 331.3090, 329.5020, 327.2070,
        333.3070, 319.0400, 256.0030, 267.3470, 265.0080, 263.1880, 277.4720,
        262.5520, 285.7780, 268.5940, 279.3920, 272.3360, 267.7250, 273.4420,
        329.8760, 267.0460, 338.5210, 260.1220, 347.6640, 259.36

In [75]:
our_model.to(device)
our_model.eval()
#print(our_model(dataset[6]["image"].to(device).unsqueeze(0)))
#print(torch.min(dataset[344]["image"]))
features = our_model.feature_extractor_model(dataset[6]["image"].to(device).unsqueeze(0))
features = torch.flatten(features,1)
children = [i for i in our_model.regressor_op.children()]
listed = [(i,j) for (i,j) in enumerate(children)]
print(listed)
for i,j in enumerate(children):
    if i < 6:
        features = j(features)
    if i == 6:
        print(features)
        features = j(features)
        print(features)
    if i == 7:
        features = j(features)
        print(features)
        

[(0, Dropout(p=0.4, inplace=False)), (1, Linear(in_features=327680, out_features=210, bias=True)), (2, Sigmoid())]
