In [28]:
import data_loader

import torch
import torch.nn as nn

import random
import normalizer
from PIL import Image, ImageDraw, ImageFont

import models
import modelutils

In [29]:
import idealface

In [30]:
class RMSELoss(nn.Module):
    def __init__(self):
        super().__init__()
        self.mse = nn.MSELoss()
        
    def forward(self, yhat, y):
        return torch.sqrt(self.mse(yhat, y))




train = data_loader.load("I:/NSU/CV/tests/torch/data/train/coords",
                         "I:/NSU/CV/tests/torch/data/train/images", 
                        firstn = 14000, batchSize = 16, shuffle = True, 
                        displace = True, size = 400, show = False)



scaler = normalizer.MinMaxNormalizer()
scaler.fit([y for _, y in train])

print("Number of batches:", len(train))
for x, y in train:
   print(x.shape, y.shape)
 
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print("devise is: ", device)



condcords07638_3d.txt dataimg07638.jpeg batch 1 from 875.0
400
0.78125 0.12890625 0.134765625
condcords15680_3d.txt dataimg15680.jpeg batch 1 from 875.0
400
0.78125 0.1875 0.03125
condcords25697_3d.txt dataimg25697.jpeg batch 1 from 875.0
400
0.78125 0.15625 0.146484375
condcords13234_3d.txt dataimg13234.jpeg batch 1 from 875.0
400
0.78125 0.20703125 0.03515625
condcords15801_3d.txt dataimg15801.jpeg batch 1 from 875.0
400
0.78125 0.1875 0.01171875
condcords22935_3d.txt dataimg22935.jpeg batch 1 from 875.0
400
0.78125 0.115234375 0.130859375
condcords03741_3d.txt dataimg03741.jpeg batch 1 from 875.0
400
0.78125 0.13671875 0.19921875
condcords25206_3d.txt dataimg25206.jpeg batch 1 from 875.0
400
0.78125 0.119140625 0.017578125
condcords13404_3d.txt dataimg13404.jpeg batch 1 from 875.0
400
0.78125 0.154296875 0.064453125
condcords00897_3d.txt dataimg00897.jpeg batch 1 from 875.0
400
0.78125 0.197265625 0.01953125
condcords25089_3d.txt dataimg25089.jpeg batch 1 from 875.0
400
0.78125 0.13

KeyboardInterrupt: 

Fitted normalizer: min=0.4104, max=2.9544


devise is:  cuda:0


In [4]:
train[0][0].shape

torch.Size([16, 3, 400, 400])

In [3]:
class SelectiveRMSELoss(nn.Module):
    def __init__(self, pointlist, reverse, device, l2_lambda = 0.0):
        super().__init__()
        self.reverse = reverse
        self.pointlist = pointlist
        self.device = device
        self.l2_lambda = l2_lambda
        
    def forward(self, x, y, parameters = None):
        ls = (x-y)**2
        losslist = []
        if(self.reverse):
            self.pointlist = list(set(range(68)) - set(self.pointlist))
        sm = torch.tensor(0.0).to(device)
        k = torch.tensor(1.0).to(device)
        for i in range(x.shape[0]):
            for j in self.pointlist:
                sm += (ls[i][j].mean())
                k += 1
        if parameters is None:
            return torch.sqrt(sm / float(k))
        
        pk = 0.0
        smp = 0.0
        if parameters is not None:
            for param in parameters:
                smp += (param**2).mean()
                pk += 1

        return torch.sqrt(sm / float(k)) + (smp / float(pk)) * self.l2_lambda

In [4]:
mouth_pointlist = [44, 7, 33, 14, 2, 31, 49, 15, 42, 32, 9, 51, 38, 61,
    18, 23, 12, 47, 67, 1, 2]
mouth_boundaries = [7, 14, 15, 67]
eye_L_pointlist = [62, 65, 0, 13, 34, 64]
eye_R_pointlist = [16, 36, 54, 55, 53, 63]

In [5]:
def get_mean_coords(landmarks_list, tensor):
    x = torch.zeros(tensor.shape[0]).to(device)
    y = torch.zeros(tensor.shape[0]).to(device)
    for id in landmarks_list:
        x += tensor[:,id,0]
        y += tensor[:,id,1]
    x /= len(landmarks_list)
    y /= len(landmarks_list)
    return x, y


def crop_mouth(image_tensor_array, landmarks_tensor_array, eye_L_pointlist, eye_R_pointlist):
    # Get mouth landmarks

    x_l_ar, y_l_ar = get_mean_coords(eye_L_pointlist, landmarks_tensor_array)
    x_r_ar, y_r_ar = get_mean_coords(eye_R_pointlist, landmarks_tensor_array)

    ans_tens_list = []

    for imnum, x_l, y_l, x_r, y_r in zip(range(image_tensor_array.shape[0]), x_l_ar, y_l_ar, x_r_ar, y_r_ar):
        
        image_tensor = image_tensor_array[imnum]
        mouth_x = (x_l + x_r) / 2 + (y_r - y_l)
        eyl = torch.sqrt((y_r - y_l)**2 + (x_r - x_l)**2)

        eyl_s = (torch.sqrt(1 - ((y_r - y_l)**2) / eyl**2)) * eyl

        mouth_y = (y_l + y_r) / 2 + eyl_s

        # mouth_x = (x_l + x_r) / 2
        # mouth_y = (y_l + y_r) / 2
        
        size = image_tensor.shape[2]

        x1 = int(mouth_x * size - 100)
        x2 = int(mouth_x * size + 100)
        y1 = int(mouth_y * size - 64)
        y2 = int(mouth_y * size + 64)
        
        if x1 < 0:
            x1 = 0
        if y1 < 0:
            y1 = 0
        if x1 > size:
            x1 = size
        if y1 > size:
            y1 = size
        
        # Crop image
        image_tensor = image_tensor[:, y1:y2, x1:x2]
        
        ans_tens_list.append(image_tensor)
        # Get bounding box of mouth
    return torch.stack(ans_tens_list)


In [6]:
class GlobalDetector(nn.Module):
    def __init__(self, device, eyes_detector, eye_L_pointlist, eye_R_pointlist, baseline):
        super(GlobalDetector, self).__init__()
        self.eyes_detector = eyes_detector
        self.idealface = baseline
        self.device = device
        for param in self.eyes_detector.parameters():
            param.requires_grad = False
        self.eye_L_pointlist = eye_L_pointlist
        self.eye_R_pointlist = eye_R_pointlist
        self.last_detector_size = 128 

        self.adpool = nn.AdaptiveAvgPool2d((128, 128)).to(device) 
        self.conv1 = nn.Conv2d(3, 16, 3, padding = 1).to(device)
        self.pool = nn.MaxPool2d(2, 2).to(device)
        self.conv2 = nn.Conv2d(16, 32, 3, padding = 1).to(device)
        self.conv3 = nn.Conv2d(32, 64, 3, padding = 1).to(device)
        self.conv4 = nn.Conv2d(64, self.last_detector_size, 3, padding = 1).to(device)
        fcsize = 356
        self.fc1 = nn.Linear(self.last_detector_size*16*16, fcsize).to(device)
        self.fc2 = nn.Linear(fcsize, fcsize).to(device)
        self.prelast = nn.Linear(fcsize, fcsize).to(device)
        self.fc_last = nn.Linear(fcsize, 3 * 68).to(device)
        self.act = nn.ReLU().to(device)
        self.sigm = nn.Sigmoid().to(device)
        
    
    def forward(self, x, needshow = False):
        # Input: [batch, 3, H, W]
        
        x = self.adpool(x)
        x = self.pool(self.act(self.conv1(x)))
        if(needshow):
            xshow = modelutils.show_tensor(x[0:2], landmarks=None, nolandmarks=True)
            xshow.show()
        x = self.pool(self.act(self.conv2(x)))
        x = self.pool(self.act(self.conv3(x)))
        x = self.act(self.conv4(x))
        x = x.view(-1, self.last_detector_size*16*16)       
        x = self.act(self.fc1(x))
        x = self.act(self.fc2(x))
        x = self.act(self.prelast(x))
        x = self.fc_last(x)
        x = x.view(-1, 68, 3)

        # eye_l_p = get_mean_coords(self.eye_L_pointlist, x)
        # eye_r_p = get_mean_coords(self.eye_R_pointlist, x)
        # for i in range(x.shape[0]):
        #     eye_l = torch.tensor([eye_l_p[0][i], eye_l_p[1][i]], dtype=torch.float32)
        #     eye_r = torch.tensor([eye_r_p[0][i], eye_r_p[1][i]], dtype=torch.float32)
        #     baseline = idealface.apply_face(eye_l, eye_r, self.eye_L_pointlist, self.eye_R_pointlist, self.idealface)
        #     zeros = torch.zeros((baseline.shape[0], 1), dtype=torch.float32)
        #     baseline = torch.cat([baseline, zeros], dim=1)
        #     baseline = baseline.to(self.device)
        #     x[i] += baseline
        return x

In [7]:
class MouthPointsDetector(nn.Module):
    def __init__(self, device, eyes_detector, eye_L_pointlist, eye_R_pointlist):
        super(MouthPointsDetector, self).__init__()
        self.eyes_detector = eyes_detector
        for param in self.eyes_detector.parameters():
            param.requires_grad = False
        
        self.last_detector_size = 32
        self.eye_L_pointlist = eye_L_pointlist
        self.eye_R_pointlist = eye_R_pointlist
        
        self.conv1 = nn.Conv2d(3, 40, 3, padding = 1).to(device)
        self.pool = nn.MaxPool2d(2, 2).to(device)
        self.conv2 = nn.Conv2d(40, 30, 3, padding = 1).to(device)
        self.conv3 = nn.Conv2d(30, 20, 3, padding = 1).to(device)
        self.conv4 = nn.Conv2d(25, 20, 3, padding = 1).to(device)
        self.conv5 = nn.Conv2d(3, 6, 3, padding = 1).to(device)
        self.conv6 = nn.Conv2d(20, self.last_detector_size, 3, padding = 1).to(device)
        fcsize = 400
        
        self.adpool_size = 54, 30
        self.adpool = nn.AdaptiveAvgPool2d(self.adpool_size).to(device) 

        self.fc1 = nn.Linear(self.last_detector_size*self.adpool_size[0]*self.adpool_size[1],
                             fcsize).to(device)
        self.fc_list = nn.ModuleList([nn.Linear(fcsize, fcsize).to(device) for _ in range(12)])
        self.prelast = nn.Linear(fcsize, fcsize).to(device)
        self.fc_last = nn.Linear(fcsize, 3 * 68).to(device)
        self.act = nn.ReLU().to(device)
        self.tanh = nn.Tanh().to(device)
        
    
    def forward(self, x, needshow = False):
        # Input: [batch, 3, H, W]
        ans_with_eyes = self.eyes_detector(x)

        # if(needshow):
        #     for i in range(x.shape[0]):
        #         xshow = modelutils.show_tensor(x[i], ans_with_eyes[i], nolandmarks=False)
        #         xshow.show()
        #         if i == 3:
        #             break

        x = crop_mouth(x, ans_with_eyes, self.eye_L_pointlist, self.eye_R_pointlist)
        
        x = self.tanh(self.conv1(x))
        x = self.pool(self.tanh(self.conv2(x)))
        x = self.pool(self.tanh(self.conv3(x)))
        # x = self.pool(self.act(self.conv4(x)))
        # x = self.pool(self.act(self.conv5(x)))
        x = self.tanh(self.conv6(x))  
        if(needshow):
            for i in range(x.shape[0]):
                y = x.clone()
                y.clamp_(0.0, 1.0)
                s = y.mean()
                y *= 1 / s
                xshow = modelutils.show_tensor(y[i][1:3], ans_with_eyes[i], nolandmarks=True)
                xshow.show()
                print(x.shape)
                if i == 3:
                    break
        #print(x.shape)
        
        #x = self.act(self.conv6(x))
        x = self.adpool(x)
        
        x = x.view(-1, self.last_detector_size*self.adpool_size[0]*self.adpool_size[1])       
        x = self.act(self.fc1(x))
        for i in range(len(self.fc_list)):
            x = self.tanh(self.fc_list[i](x))
        x = self.tanh(self.prelast(x))
        x = self.fc_last(x)
        x = x.view(-1, 68, 3)
        return x


In [13]:
class EyesDetector(nn.Module):
    def __init__(self, device):
        super(EyesDetector, self).__init__()
        self.last_detector_size = 16 
        self.adpool = nn.AdaptiveAvgPool2d((96, 96)).to(device) 
        self.conv1 = nn.Conv2d(3, 16, 3, padding = 1).to(device)
        self.pool = nn.AvgPool2d(2, 2).to(device)
        self.conv2 = nn.Conv2d(16, 32, 3, padding = 1).to(device)
        self.conv3 = nn.Conv2d(32, 64, 3, padding = 1).to(device)
        self.conv4 = nn.Conv2d(64, self.last_detector_size, 3, padding = 1).to(device)
        fcsize = 500
        self.fc1 = nn.Linear(self.last_detector_size*12*12, fcsize).to(device)
        self.fc_last = nn.Linear(fcsize, 3 * 68).to(device)
        self.act = nn.ReLU().to(device)
        self.sigm = nn.Sigmoid().to(device)
        
    
    def forward(self, x, needshow = False):
        # Input: [batch, 3, H, W]
        x = self.adpool(x)
        x = self.pool(self.act(self.conv1(x)))
        if(needshow):
            xshow = modelutils.show_tensor(x[0:2], landmarks=None, nolandmarks=True)
            xshow.show()
        x = self.pool(self.act(self.conv2(x)))
        x = self.pool()

        x = x.view(-1, self.last_detector_size*12*12)       
        x = self.act(self.fc1(x))
        x = self.fc_last(x)
        x = x.view(-1, 68, 3)
        return x

In [19]:
model = GlobalDetector(device, modelEyeDetector, 
                            eye_L_pointlist, eye_R_pointlist, ideal_face_tensor).to(device)
state = torch.load('globalDetector.pth')
modelGlobalDetector.load_state_dict(state['model_state_dict'])
modelGlobalDetector.to(device)
modelGlobalDetector.eval()

GlobalDetector(
  (eyes_detector): EyesDetector(
    (adpool): AdaptiveAvgPool2d(output_size=(96, 96))
    (conv1): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (pool): AvgPool2d(kernel_size=2, stride=2, padding=0)
    (conv2): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (conv3): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (conv4): Conv2d(64, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fc1): Linear(in_features=2304, out_features=500, bias=True)
    (fc_last): Linear(in_features=500, out_features=204, bias=True)
    (act): ReLU()
    (sigm): Sigmoid()
  )
  (adpool): AdaptiveAvgPool2d(output_size=(128, 128))
  (conv1): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv3): Conv2d(32, 64, kernel_size=(3, 3), stride=(1

In [14]:
modelEyeDetector = EyesDetector(device)
state = torch.load('eyeDetector.pth')
modelEyeDetector.load_state_dict(state['model_state_dict'])
modelEyeDetector.to(device)
modelEyeDetector.eval()

ideal_face_tensor = idealface.loadIdeal(loadimage=False)

In [22]:
model = GlobalDetector(device, modelEyeDetector, 
                            eye_L_pointlist, eye_R_pointlist, ideal_face_tensor).to(device)

#model = EyesDetector(device)

#criterion = SelectiveRMSELoss(mouth_pointlist, False, device, l2_lambda = 0.00)
#criterion = SelectiveRMSELoss(eye_L_pointlist + eye_R_pointlist, False, device, l2_lambda = 0.0001)
criterion = RMSELoss()

learning_rate = 0.0001
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

Batch 19, Loss: 0.58533
Batch 39, Loss: 0.57724
Batch 59, Loss: 0.57672
Batch 79, Loss: 0.56976
Batch 99, Loss: 0.55959
Batch 119, Loss: 0.55461
Batch 139, Loss: 0.54749
Batch 159, Loss: 0.53515
Batch 179, Loss: 0.52224
Batch 199, Loss: 0.50827
Batch 219, Loss: 0.49373
Batch 239, Loss: 0.47654
Batch 259, Loss: 0.46528
Batch 279, Loss: 0.45457
Batch 299, Loss: 0.44676
Batch 319, Loss: 0.44145
Batch 339, Loss: 0.42843
Batch 359, Loss: 0.41599
Batch 379, Loss: 0.40054
Batch 399, Loss: 0.38855
Batch 419, Loss: 0.38045
Batch 439, Loss: 0.36269
Batch 459, Loss: 0.36107
Batch 479, Loss: 0.34677
Batch 499, Loss: 0.33408
Batch 519, Loss: 0.31977
Batch 539, Loss: 0.31239
Batch 559, Loss: 0.29799
Batch 579, Loss: 0.28819
Batch 599, Loss: 0.27815
Batch 619, Loss: 0.26343
Batch 639, Loss: 0.25023
Batch 659, Loss: 0.23765
Batch 679, Loss: 0.22366
Batch 699, Loss: 0.21271
Batch 719, Loss: 0.19853
Batch 739, Loss: 0.18651
Batch 759, Loss: 0.17695
Batch 779, Loss: 0.16848
Batch 799, Loss: 0.15843
Batch

In [24]:


# learning loop
epoch_loss = 0
for epoch in range(7):
    epoch_loss = 0
    step_loss = 0
    step_count = 0
    random.shuffle(train)
    for batch_idx, (inputs, answers) in enumerate(train):
        needshow = torch.tensor(False).to(device)
        if(batch_idx % 10000 == 60):
           needshow = False
        inputs = inputs.to(device)
        answers = answers.to(device)
        answers = scaler.transform(answers)
        outputs = model(inputs)
        outputs = scaler.inverse_transform(outputs)
        
        if(needshow):
            xshow = modelutils.show_tensor(inputs[0], outputs[0], nolandmarks=False)
            xshow.show()

        loss = criterion(outputs, answers)
        step_loss += loss
        step_count += 1
        if(step_count == 20):
            optimizer.zero_grad()
            step_loss.backward()
            optimizer.step()
            print(f'Batch {batch_idx}, Loss: {step_loss.item() / step_count:.5f}')
            step_count = 0
            step_loss = 0

        epoch_loss += loss.item() 
        

        if batch_idx == 1600:
            break
    
    #learning_rate /= 10.0
    #optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    print(f'shape {inputs.shape}, Epoch {epoch + 1}, Loss: {epoch_loss/len(train):.5f}')


Batch 19, Loss: 0.02872
Batch 39, Loss: 0.03014
Batch 59, Loss: 0.03046
Batch 79, Loss: 0.02915
Batch 99, Loss: 0.02984
Batch 119, Loss: 0.02970
Batch 139, Loss: 0.02911
Batch 159, Loss: 0.03004
Batch 179, Loss: 0.02875
Batch 199, Loss: 0.02858
Batch 219, Loss: 0.03106
Batch 239, Loss: 0.03013
Batch 259, Loss: 0.02939
Batch 279, Loss: 0.02965
Batch 299, Loss: 0.03084
Batch 319, Loss: 0.02969
Batch 339, Loss: 0.03101
Batch 359, Loss: 0.03022
Batch 379, Loss: 0.03119
Batch 399, Loss: 0.03066
Batch 419, Loss: 0.03122
Batch 439, Loss: 0.03385
Batch 459, Loss: 0.03044
Batch 479, Loss: 0.02879
Batch 499, Loss: 0.02922
Batch 519, Loss: 0.02896
Batch 539, Loss: 0.03030
Batch 559, Loss: 0.02993
Batch 579, Loss: 0.02999
Batch 599, Loss: 0.02841
Batch 619, Loss: 0.03279
Batch 639, Loss: 0.03028
Batch 659, Loss: 0.02749
Batch 679, Loss: 0.03028
Batch 699, Loss: 0.03009
Batch 719, Loss: 0.02874
Batch 739, Loss: 0.03136
Batch 759, Loss: 0.02944
Batch 779, Loss: 0.03015
Batch 799, Loss: 0.02960
Batch

In [17]:
# testloop
criterion = SelectiveRMSELoss(eye_L_pointlist + eye_R_pointlist, False, device)
epoch_loss = 0
test = data_loader.load("I:/NSU/CV/tests/torch/data/test/coords",
                        "I:/NSU/CV/tests/torch/data/test/images",  
                        firstn = 2000, batchSize = 16, shuffle = True, displace = True, 
                        size = 400, show = False)
random.shuffle(test)
                        


condcords27382_3d.txt dataimg27382.jpeg batch 1 from 125.0
400
0.78125 0.123046875 0.146484375
condcords28724_3d.txt dataimg28724.jpeg batch 1 from 125.0
400
0.78125 0.1328125 0.1953125
condcords27678_3d.txt dataimg27678.jpeg batch 1 from 125.0
400
0.78125 0.205078125 0.01171875
condcords28082_3d.txt dataimg28082.jpeg batch 1 from 125.0
400
0.78125 0.181640625 0.125
condcords28649_3d.txt dataimg28649.jpeg batch 1 from 125.0
400
0.78125 0.189453125 0.1171875
condcords27497_3d.txt dataimg27497.jpeg batch 1 from 125.0
400
0.78125 0.125 0.0390625
condcords29323_3d.txt dataimg29323.jpeg batch 1 from 125.0
400
0.78125 0.1328125 0.111328125
condcords28790_3d.txt dataimg28790.jpeg batch 1 from 125.0
400
0.78125 0.212890625 0.0
condcords27912_3d.txt dataimg27912.jpeg batch 1 from 125.0
400
0.78125 0.185546875 0.15625
condcords29154_3d.txt dataimg29154.jpeg batch 1 from 125.0
400
0.78125 0.109375 0.0
condcords27198_3d.txt dataimg27198.jpeg batch 1 from 125.0
400
0.78125 0.146484375 0.126953125
c

In [91]:
model

mouthBoundDetector(
  (adpool): AdaptiveAvgPool2d(output_size=(128, 128))
  (conv1): Conv2d(3, 6, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(6, 9, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv3): Conv2d(9, 20, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv4): Conv2d(20, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (fc1): Linear(in_features=32768, out_features=256, bias=True)
  (fc_list): ModuleList(
    (0-2): 3 x Linear(in_features=256, out_features=256, bias=True)
  )
  (prelast): Linear(in_features=256, out_features=256, bias=True)
  (fc_last): Linear(in_features=256, out_features=204, bias=True)
  (act): ReLU()
  (sigm): Sigmoid()
)

In [16]:
with torch.no_grad():
    for batch_idx, (inputs, answers) in enumerate(test):
        inputs = inputs.to(device)
        answers = answers.to(device)
        answers = scaler.transform(answers)
        outputs = model(inputs)
        outputs = scaler.inverse_transform(outputs)
        loss = criterion(outputs, answers)
        epoch_loss += loss.item()

    print(f'Test Loss: {epoch_loss/len(test):.4f}')


Test Loss: 0.0118


In [27]:
random.shuffle(test)
with torch.no_grad():
    inputs, answers = test[1]
    inputs = inputs.to(device)
    answers = answers.to(device)
    outputs = model(inputs)
    outputs = scaler.inverse_transform(outputs)
    # x_l, y_l = get_mean_coords(eye_L_pointlist, outputs)
    # x_r, y_r = get_mean_coords(eye_R_pointlist, outputs)
    # print(x_l[0], y_l[0], x_r[0], y_r[0])
    # outputs[0][1][0] = x_l[0]
    # outputs[0][1][1] = y_l[0]
    # outputs[0][2][0] = x_r[0]
    # outputs[0][2][1] = y_r[0]
    print(outputs.shape, answers.shape)
    img = modelutils.show_tensor(inputs[0], outputs[0])
    img.show()
    imgdlib = modelutils.show_tensor(inputs[0], answers[0])
    imgdlib.show()



torch.Size([16, 68, 3]) torch.Size([16, 68, 3])


In [25]:


#Save the model after training
torch.save({
    'model_state_dict': model.state_dict(),
}, 'globalDetector.pth')



In [None]:
class mouthBoundDetector(nn.Module):
    def __init__(self, device):
        super(mouthBoundDetector, self).__init__()
        self.last_detector_size = 128 
        self.adpool = nn.AdaptiveAvgPool2d((128, 128)).to(device) 
        self.conv1 = nn.Conv2d(3, 6, 3, padding = 1).to(device)
        self.pool = nn.MaxPool2d(2, 2).to(device)
        self.conv2 = nn.Conv2d(6, 9, 3, padding = 1).to(device)
        self.conv3 = nn.Conv2d(9, 20, 3, padding = 1).to(device)
        self.conv4 = nn.Conv2d(20, self.last_detector_size, 3, padding = 1).to(device)
        fcsize = 256
        self.fc1 = nn.Linear(self.last_detector_size*16*16, fcsize).to(device)
        self.fc_list = []
        for i in range(3):
            self.fc_list.append(nn.Linear(fcsize, fcsize).to(device))
        self.prelast = nn.Linear(fcsize, fcsize).to(device)
        self.fc_last = nn.Linear(fcsize, 3 * 68).to(device)
        self.act = nn.ReLU().to(device)
        self.sigm = nn.Sigmoid().to(device)
    
    def forward(self, x):
        # Input: [batch, 3, H, W]
        x = self.adpool(x)
        x = self.pool(self.act(self.conv1(x)))
        x = self.pool(self.act(self.conv2(x)))
        x = self.act(self.conv3(x))  
        x = self.pool(self.act(self.conv4(x)))
        x = x.view(-1, self.last_detector_size*16*16)       
        x = self.act(self.fc1(x))
        for i in range(len(self.fc_list)):
            x = self.act(self.fc_list[i](x))
        x = self.act(self.prelast(x))
        x = self.fc_last(x)
        x = x.view(-1, 68, 3)
        return x


In [20]:
with torch.no_grad():
    inputs, answers = test[1]
    inputs = inputs.to(device)
    answers = answers.to(device)
    outputs = model(inputs)
    outputs = scaler.inverse_transform(outputs)
    print(outputs.shape, answers.shape)
    img = show_tensor(inputs[0], outputs[0])
    img.show()
    imgdlib = show_tensor(inputs[0], answers[0])
    imgdlib.show()

NameError: name 'test' is not defined