In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "1"

In [2]:
import torch
import torch.nn.functional as F
import matplotlib.pyplot as plt
import numpy as np
import torch.utils.data as Data
import scipy.ndimage
BATCH_SIZE =  32
NUM_EPOCH = 500


In [3]:
import torch
import torch.nn as nn
import torch.nn.functional as F

# Construct customized ResNet

def conv3x3(in_planes, out_planes, stride=1):
    """3x3 convolution with padding"""
    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
                     padding=1, bias=False)

class BasicBlock(nn.Module):
    expansion = 1
 
    def __init__(self, inplanes, planes, stride=1, downsample=None):
        super(BasicBlock, self).__init__()
        self.conv1 = conv3x3(inplanes, planes, stride)
        self.bn1 = nn.BatchNorm2d(planes)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = conv3x3(planes, planes)
        self.bn2 = nn.BatchNorm2d(planes)
        self.downsample = downsample
        self.stride = stride
 
    def forward(self, x):
        residual = x
 
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
 
        out = self.conv2(out)
        out = self.bn2(out)
 
        if self.downsample is not None:
            residual = self.downsample(x)
 
        out += residual
        out = self.relu(out)
 
        return out

class Submodule2(nn.Module):
        
    def __init__(self, pcpt_block, pcpt_layers, scoop_block, scoop_layers, h, w, pcpt_is_upsample=0, scoop_is_upsample=0):
        self.inplanes = 64
        self.pcpt_is_upsample = pcpt_is_upsample
        super(Submodule2, self).__init__()
        self.pcpt_conv1 = nn.Conv2d(4, 64, kernel_size=3, stride=1, padding=1,
                               bias=False)
        self.pcpt_bn1 = nn.BatchNorm2d(64)
        self.pcpt_relu = nn.ReLU(inplace=True)
        self.pcpt_maxpool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        self.pcpt_upsample = nn.Upsample(scale_factor=2, mode='bilinear')
        self.pcpt_layer1 = self._make_layer(pcpt_block, 128, pcpt_layers[0])
        self.pcpt_layer2 = self._make_layer(pcpt_block, 256, pcpt_layers[1])
        self.pcpt_layer3 = self._make_layer(pcpt_block, 512, pcpt_layers[2])

        self.inplanes = 512
        self.scoop_is_upsample = scoop_is_upsample
        self.scoop_upsample = nn.Upsample(scale_factor=2, mode='bilinear')
        self.scoop_layer1 = self._make_layer(scoop_block, 256, scoop_layers[0])
        self.scoop_layer2 = self._make_layer(scoop_block, 128, scoop_layers[1])
        self.scoop_layer3 = self._make_layer(scoop_block, 64, scoop_layers[2])
        self.scoop_conv1 = nn.Conv2d(64, 1, kernel_size=3, stride=1, padding=1,
                               bias=False)
        self.scoop_bn1 = nn.BatchNorm2d(1)
        self.scoop_conv2 = nn.Conv2d(64, 3, kernel_size=3, stride=1, padding=1,
                               bias=False)
        self.scoop_bn2 = nn.BatchNorm2d(3)
        self.scoop_relu = nn.ReLU(inplace=True)

            
            
        self.x1_hidden = torch.nn.Linear(3*200*200, 200)
        self.x2_hidden = torch.nn.Linear(2, 200)
        self.x_hidden1 = torch.nn.Linear(400, 200)
        self.x_hidden2 = torch.nn.Linear(200, 50)
        self.x_hidden3 = torch.nn.Linear(50, 1)
        
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)

    def _make_layer(self, block, planes, blocks, stride=1):
        downsample = None
        if stride != 1 or self.inplanes != planes * block.expansion:
            downsample = nn.Sequential(
                nn.Conv2d(self.inplanes, planes * block.expansion,
                          kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(planes * block.expansion),
            )

        layers = []
        layers.append(block(self.inplanes, planes, stride, downsample))
        self.inplanes = planes * block.expansion
        for i in range(1, blocks):
            layers.append(block(self.inplanes, planes))

        return nn.Sequential(*layers)

    def forward(self, x1, x2):
        x1 = self.pcpt_conv1(x1)
        x1 = self.pcpt_bn1(x1)
        x1 = self.pcpt_relu(x1)
        x1 = self.pcpt_maxpool(x1)

        x1 = self.pcpt_layer1(x1)
        x1 = self.pcpt_maxpool(x1)
        x1 = self.pcpt_layer2(x1)
        x1 = self.pcpt_layer3(x1)

        x1 = self.scoop_layer1(x1)
        x1 = self.scoop_layer2(x1)
        x1 = self.scoop_upsample(x1)
        x1 = self.scoop_layer3(x1)
        x1 = self.scoop_upsample(x1)

        x1 = self.scoop_conv2(x1)
        x1 = self.scoop_bn2(x1)
        x1 = self.scoop_relu(x1)       
        x1 = x1.reshape(x1.shape[0],-1)
        
        x1 = F.relu(self.x1_hidden(x1))
        
        x2 = F.relu(self.x2_hidden(x2))
        
        x = torch.cat((x1, x2), dim=-1)
        
        x = F.relu(self.x_hidden1(x))
        x = F.relu(self.x_hidden2(x))
        x = F.relu(self.x_hidden3(x))
        #x=torch.reshape(x,(-1,3,200))
        
        return x

In [4]:
submodule2_net = Submodule2(pcpt_block=BasicBlock, pcpt_layers=[1,1,1], scoop_block=BasicBlock, scoop_layers=[1,1,1], h=200, w=200).cuda()
print(submodule2_net)

'''
try_data_input1=torch.from_numpy(np.load("data_20210605/input_data_array_submodule2_image.npy")/255.0).permute(0,3,1,2)[0:2, :, :, :].cuda().float()
try_data_input2=torch.from_numpy(np.array([[50,60], [30,40]])).cuda().float()
aaa=submodule2_net(try_data_input1, try_data_input2)
print(aaa.shape)
'''



Submodule2(
  (pcpt_conv1): Conv2d(4, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (pcpt_bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (pcpt_relu): ReLU(inplace=True)
  (pcpt_maxpool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (pcpt_upsample): Upsample(scale_factor=2.0, mode=bilinear)
  (pcpt_layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (downsample): Sequential(
        (0): Conv2d(64, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (1): BatchNorm2d(128, eps=1e-05, mome

'\ntry_data_input1=torch.from_numpy(np.load("data_20210605/input_data_array_submodule2_image.npy")/255.0).permute(0,3,1,2)[0:2, :, :, :].cuda().float()\ntry_data_input2=torch.from_numpy(np.array([[50,60], [30,40]])).cuda().float()\naaa=submodule2_net(try_data_input1, try_data_input2)\nprint(aaa.shape)\n'

In [5]:
import random

submodule2_data_input_1 = np.load("data_20210711/input_data_array_submodule2_image_20210711.npy")/255.0
print(submodule2_data_input_1.shape)

temp_index_set = random.sample(range(len(submodule2_data_input_1)), 50)
temp_index_set_other = list(set(range(len(submodule2_data_input_1))).difference(set(temp_index_set)))

submodule2_data_input_1 = torch.from_numpy(submodule2_data_input_1).permute(0,3,1,2)
train_submodule2_data_input_1 = submodule2_data_input_1[temp_index_set_other, :, :, :]
val_submodule2_data_input_1 = submodule2_data_input_1[temp_index_set, :, :, :]

submodule2_data_input_2 = np.load("data_20210711/input_data_array_submodule2_finger_position_20210711.npy")
print(submodule2_data_input_2.shape)
submodule2_data_input_2 = torch.from_numpy(submodule2_data_input_2)
train_submodule2_data_input_2 = submodule2_data_input_2[temp_index_set_other, :]
val_submodule2_data_input_2 = submodule2_data_input_2[temp_index_set, :]

#print(train_submodule2_data_input_1)


submodule2_data_label = np.load("data_20210711/label_data_array_submodule2_thumb_position_20210711.npy")
print(submodule2_data_label.shape)

submodule2_data_label = torch.from_numpy(submodule2_data_label).long()
train_submodule2_data_label = submodule2_data_label[temp_index_set_other, :]
val_submodule2_data_label = submodule2_data_label[temp_index_set, :]

submodule2_train_torch_dataset = Data.TensorDataset(train_submodule2_data_input_1, train_submodule2_data_input_2, train_submodule2_data_label)
loader = Data.DataLoader(dataset=submodule2_train_torch_dataset, batch_size=BATCH_SIZE, shuffle=True)
htmap_h = submodule2_data_input_1.shape[2]
htmap_w = submodule2_data_input_1.shape[3]

(6727, 200, 200, 4)
(6727, 2)
(6727, 1)


In [None]:
#optimizer = torch.optim.SGD(params = submodule2_net.parameters(), lr=1e-4, momentum=0.9, weight_decay=2e-5)
optimizer = torch.optim.Adam(params = submodule2_net.parameters(), lr=1e-4)
#optimizer = torch.optim.SGD(params = submodule2_net.parameters(), lr=1e-4)
#loss_func = torch.nn.CrossEntropyLoss(weight=class_weights).cuda()  # the target label is NOT an one-hotted
loss_func = torch.nn.SmoothL1Loss(reduce=False, size_average=False).cuda()
L1_loss = torch.nn.L1Loss(size_average=None, reduce=None, reduction='mean')

val_submodule2_data_input_1 = val_submodule2_data_input_1.cuda().float()
val_submodule2_data_input_2 = val_submodule2_data_input_2.cuda().float()
val_submodule2_data_label = val_submodule2_data_label.cuda().float()

min_val_loss_print=float('inf')
for epoch in range(NUM_EPOCH):
    for step, (batch_data_1, batch_data_2, batch_label) in enumerate(loader):
        batch_data_1 = batch_data_1.cuda().float()
        batch_data_2 = batch_data_2.cuda().float()
        batch_label = batch_label.cuda().float()
        submodule2_net.train()
        output  = submodule2_net(batch_data_1, batch_data_2)
        train_loss = loss_func(output,batch_label).mean()
        #print(train_loss)
        optimizer.zero_grad()
        train_loss.backward()
        optimizer.step()
        train_loss_print = train_loss.data.item()
        
        with torch.no_grad():
            submodule2_net.eval()
            val_output  = submodule2_net(val_submodule2_data_input_1, val_submodule2_data_input_2)
            val_loss = loss_func(val_output,val_submodule2_data_label).mean()
            val_loss_print = val_loss.data.item()
            val_L1_loss = L1_loss(val_output,val_submodule2_data_label).mean().data.item()
            torch.cuda.empty_cache()
        #print('epoch: ', epoch, '  step: ', step, '  train loss: ', train_loss_print, 'a val loss: ', val_loss_print)
        print('epoch: ', epoch, '  step: ', step, '  train loss: ', train_loss_print, ' val loss: ', val_loss_print, ' val L1 loss: ', round(val_L1_loss,4))
        if val_loss_print < min_val_loss_print:
            torch.save(submodule2_net.state_dict(), 'net_params_20210711/net_params_submodule2_20210711_1/epoch_'+str(epoch)+'.pkl') 
            min_val_loss_print = val_loss_print
            print('min_val_loss_print', min_val_loss_print)

  "See the documentation of nn.Upsample for details.".format(mode))


epoch:  0   step:  0   train loss:  65.59323120117188  val loss:  60.90070343017578  val L1 loss:  61.4007
min_val_loss_print 60.90070343017578
epoch:  0   step:  1   train loss:  60.5460205078125  val loss:  58.14142608642578  val L1 loss:  58.6361
min_val_loss_print 58.14142608642578
epoch:  0   step:  2   train loss:  57.863765716552734  val loss:  56.21961975097656  val L1 loss:  56.7196
min_val_loss_print 56.21961975097656
epoch:  0   step:  3   train loss:  52.14617156982422  val loss:  54.10171890258789  val L1 loss:  54.6017
min_val_loss_print 54.10171890258789
epoch:  0   step:  4   train loss:  42.818687438964844  val loss:  52.03583908081055  val L1 loss:  52.5358
min_val_loss_print 52.03583908081055
epoch:  0   step:  5   train loss:  55.885494232177734  val loss:  50.21784973144531  val L1 loss:  50.7178
min_val_loss_print 50.21784973144531
epoch:  0   step:  6   train loss:  47.47514343261719  val loss:  48.39632797241211  val L1 loss:  48.8963
min_val_loss_print 48.39632

# round(2.34567890, 4)