In [1]:
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1 import ImageGrid

def display_images(images):
    grid = utils.make_grid(images)
    plt.imshow(grid.cpu().detach().numpy().transpose((1, 2, 0)))
    plt.show();

def format_data_for_display(tensor):
    maxVal = tensor.max()
    minVal = abs(tensor.min())
    maxVal = max(maxVal,minVal)
    output_data = tensor / maxVal
    output_data = output_data / 2
    output_data = output_data + 0.5
    return output_data

In [2]:
from __future__ import print_function
import zipfile
import os
import pdb
import torch
import h5py
from PIL import Image
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms
from torchvision import datasets, transforms, utils

import numpy as np
import torch.nn as nn
import torch

output_height=64
output_width=64

class TransposeDepthInput(object):
    def __call__(self, depth):
        depth = depth.transpose((2, 0, 1))
        depth = torch.from_numpy(depth)
        depth = depth.view(1, depth.shape[0], depth.shape[1], depth.shape[2])
        depth = nn.functional.interpolate(depth, size=(output_height, output_width), mode='bilinear', align_corners=False)
        depth = torch.log(depth[0])
        return depth

rgb_data_transforms = transforms.Compose([
    transforms.Resize((output_height, output_width)),    # Different for Input Image & Depth Image
    transforms.ToTensor(),
])

depth_data_transforms = transforms.Compose([
    TransposeDepthInput(),
])

input_for_plot_transforms = transforms.Compose([
    transforms.Resize((output_height, output_width)),    # Different for Input Image & Depth Image
    transforms.ToTensor(),
])

class NYUDataset(Dataset):
    def __init__(self, filename, type, rgb_transform = None, depth_transform = None):
        f = h5py.File(filename, 'r')
        if type == "training":
            self.images = f['images'][0:1024]
            self.depths = f['depths'][0:1024]
        elif type == "validation":
            self.images = f['images'][1024:1248]
            self.depths = f['depths'][1024:1248]
        elif type == "test":
            self.images = f['images'][1248:]
            self.depths = f['depths'][1248:]
        self.rgb_transform = rgb_transform
        self.depth_transform = depth_transform

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        image = self.images[idx]
        image = image.transpose((2, 1, 0))
        image = Image.fromarray(image)
        if self.rgb_transform:
            image = self.rgb_transform(image)
        depth = self.depths[idx]
        depth = np.reshape(depth, (1, depth.shape[0], depth.shape[1]))
        depth = depth.transpose((2, 1, 0))
        if self.depth_transform:
            depth = self.depth_transform(depth)
        sample = {'image': image, 'depth': depth}
        return sample

  from ._conv import register_converters as _register_converters


In [3]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class UNetConvBlock(nn.Module):
    def __init__(self, in_size, out_size, kernel_size=3, padding=1, stride=2):
        super(UNetConvBlock, self).__init__()
        self.conv = nn.Conv2d(in_size, out_size, kernel_size=kernel_size, padding=padding, stride=stride)

    def forward(self, x):
        return self.conv(x)

class UNetUpBlock(nn.Module):
    def __init__(self, in_size, out_size, kernel_size=2, stride=2, space_dropout=False):
        super(UNetUpBlock, self).__init__()
        self.up = nn.ConvTranspose2d(in_size, out_size, kernel_size=kernel_size, stride=stride)

    def forward(self, x):
        return self.up(x)

class UNet(nn.Module):
    def __init__(self):
        super(UNet, self).__init__()

        self.activation = F.relu

        self.conv_block3_16 = UNetConvBlock(3, 16)
        self.conv_block16_32 = UNetConvBlock(16, 32)
        self.conv_block32_64 = UNetConvBlock(32, 64)

        self.up_block64_32 = UNetUpBlock(64, 32)
        self.up_block32_16 = UNetUpBlock(32, 16)
        self.up_block16_1 = UNetUpBlock(16, 1)

    def forward(self, x):
        block1 = self.activation(self.conv_block3_16(x))
        block2 = self.activation(self.conv_block16_32(block1))
        block3 = self.activation(self.conv_block32_64(block2))
        up1 = self.activation(self.up_block64_32(block3))
        up2 = self.activation(self.up_block32_16(up1))
        up3 = self.up_block16_1(up2)
        return up3


In [4]:
model = UNet()
from torchsummary import summary
summary(model, input_size=(3, 64, 64))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 16, 32, 32]             448
     UNetConvBlock-2           [-1, 16, 32, 32]               0
            Conv2d-3           [-1, 32, 16, 16]           4,640
     UNetConvBlock-4           [-1, 32, 16, 16]               0
            Conv2d-5             [-1, 64, 8, 8]          18,496
     UNetConvBlock-6             [-1, 64, 8, 8]               0
   ConvTranspose2d-7           [-1, 32, 16, 16]           8,224
       UNetUpBlock-8           [-1, 32, 16, 16]               0
   ConvTranspose2d-9           [-1, 16, 32, 32]           2,064
      UNetUpBlock-10           [-1, 16, 32, 32]               0
  ConvTranspose2d-11            [-1, 1, 64, 64]              65
      UNetUpBlock-12            [-1, 1, 64, 64]               0
Total params: 33,937
Trainable params: 33,937
Non-trainable params: 0
---------------------------------

In [5]:
import torch
from torchvision import datasets, transforms, utils

batch_size = 1

train_loader = torch.utils.data.DataLoader(NYUDataset( 'nyu_depth_v2_labeled.mat', 
                                                       'training', 
                                                        rgb_transform = rgb_data_transforms, 
                                                        depth_transform = depth_data_transforms), 
                                                        batch_size = batch_size, 
                                                        shuffle = False, num_workers = 0)

val_loader = torch.utils.data.DataLoader(NYUDataset( 'nyu_depth_v2_labeled.mat',
                                                     'validation', 
                                                     rgb_transform = rgb_data_transforms, 
                                                     depth_transform = depth_data_transforms), 
                                                     batch_size = batch_size, 
                                                     shuffle = False, num_workers = 0)

In [6]:
# from torchvision.utils import save_image
# from IPython.core.display import Image, display

data = next(iter(train_loader))
first_rgb = data['image'][0]
first_depth = data['depth'][0]
print(first_rgb.shape)
print(first_depth.shape)
# npimg = first_rgb.numpy()
plt.imshow(np.transpose(first_rgb.numpy(), (1,2,0)), interpolation='nearest')
# display_images(format_data_for_display(first_rgb))
# display_images(format_data_for_display(first_depth))
# save_image(first_rgb, 'real_image.png')

# Image('real_image.png', width=500)

torch.Size([3, 64, 64])
torch.Size([1, 64, 64])


<matplotlib.image.AxesImage at 0x1c0b9af50>

In [12]:
from __future__ import print_function
import argparse
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms, utils

from torch.autograd import Variable
from logger import Logger
import pdb
import os
import re
import numpy as np

epochs = 100
log_interval = 10
model_folder = 'small-unet'

model = UNet()
loss_function = F.mse_loss
optimizer = optim.Adam(model.parameters(), amsgrad=True, lr=0.001)
logger = Logger('./logs/' + model_folder)

def train_Unet(epoch):
    model.train()
    for batch_idx, data in enumerate(train_loader):
        rgb, depth = data['image'], data['depth']
        optimizer.zero_grad()
        output = model(rgb)
        target = depth[:,0,:,:].view(list(depth.shape)[0], 1, output_height, output_width)
        loss = loss_function(output, target)
        loss.backward()
        optimizer.step()

        if batch_idx % log_interval == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(rgb), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))
        if epoch % 10 == 0:
            F = plt.figure(1, (30, 60))
            F.subplots_adjust(left=0.05, right=0.95)
            plot_grid(F, rgb, target, output, batch_size)
            plt.show()
        if batch_idx == 0: break

def validate_Unet():
    print('validating unet')
    model.eval()
    validation_loss = 0
    with torch.no_grad():
        for batch_idx, data in enumerate(val_loader):
            rgb, depth = data['image'], data['depth']
            output = model(rgb)
            target = depth[:,0,:,:].view(list(depth.shape)[0], 1, output_height, output_width)
            validation_loss += F.mse_loss(output, target)
        validation_loss /= batch_idx
        print('\nValidation set: Average loss: {:.6f} {:.6f} {:.6f}\n'.format(validation_loss))

folder_name = "models/" + model_folder
if not os.path.exists(folder_name): os.mkdir(folder_name)

for epoch in range(1, epochs + 1):
    print("********* Training the Unet Model **************")
    train_Unet(epoch)
    if epoch % 25== 0:
        model_file = folder_name + "/" + 'model_' + str(epoch) + '.pth'
        torch.save(model.state_dict(), model_file)
#    validate_Unet()

********* Training the Unet Model **************
********* Training the Unet Model **************
********* Training the Unet Model **************
********* Training the Unet Model **************
********* Training the Unet Model **************
********* Training the Unet Model **************
********* Training the Unet Model **************
********* Training the Unet Model **************
********* Training the Unet Model **************
********* Training the Unet Model **************
********* Training the Unet Model **************
********* Training the Unet Model **************
********* Training the Unet Model **************
********* Training the Unet Model **************
********* Training the Unet Model **************
********* Training the Unet Model **************
********* Training the Unet Model **************
********* Training the Unet Model **************
********* Training the Unet Model **************
********* Training the Unet Model **************
********* Training t

********* Training the Unet Model **************
********* Training the Unet Model **************
********* Training the Unet Model **************
********* Training the Unet Model **************
********* Training the Unet Model **************
********* Training the Unet Model **************
********* Training the Unet Model **************
********* Training the Unet Model **************
********* Training the Unet Model **************
********* Training the Unet Model **************


In [26]:
for batch_idx, data in enumerate(train_loader):
    rgb, depth = data['image'], data['depth']
    noisy_depth = depth + 0.1
    loss = custom_loss_function(depth, noisy_depth)
    print("depth:")
    print(depth)
    print("noisy depth:")
    print(noisy_depth)
    print("cust loss %.6f" % loss)
    print("mse loss %.6f" % F.mse_loss(depth, noisy_depth))
    print("l1 loss %.6f" % F.l1_loss(depth, noisy_depth))
    print("smooth l1 loss %.6f" % F.smooth_l1_loss(depth, noisy_depth))
    if batch_idx == 1: break

depth:
tensor([[[[1.0020, 1.0046, 1.0299,  ..., 0.8161, 0.7773, 0.7794],
          [0.9916, 0.9909, 0.9994,  ..., 0.7719, 0.7768, 0.7795],
          [0.9869, 0.9793, 0.9828,  ..., 0.7678, 0.7797, 0.7832],
          ...,
          [0.7032, 0.7053, 0.7219,  ..., 0.6852, 0.6584, 0.6599],
          [0.6525, 0.6451, 0.6386,  ..., 0.6874, 0.6606, 0.6618],
          [0.6252, 0.6051, 0.5937,  ..., 0.6794, 0.6641, 0.6635]]]])
noisy depth:
tensor([[[[1.1020, 1.1046, 1.1299,  ..., 0.9161, 0.8773, 0.8794],
          [1.0916, 1.0909, 1.0994,  ..., 0.8719, 0.8768, 0.8795],
          [1.0869, 1.0793, 1.0828,  ..., 0.8678, 0.8797, 0.8832],
          ...,
          [0.8032, 0.8053, 0.8219,  ..., 0.7852, 0.7584, 0.7599],
          [0.7525, 0.7451, 0.7386,  ..., 0.7874, 0.7606, 0.7618],
          [0.7252, 0.7051, 0.6937,  ..., 0.7794, 0.7641, 0.7635]]]])
cust loss 0.005000
mse loss 0.010000
l1 loss 0.099999
smooth l1 loss 0.005000
depth:
tensor([[[[0.6341, 0.6316, 0.6214,  ..., 0.3574, 0.3218, 0.3185],
 