In [4]:
from __future__ import print_function
import zipfile
import os
import pdb
import torch

import torchvision.transforms as transforms
from torchvision import datasets, transforms, utils

rgb_data_transforms = transforms.Compose([
    transforms.Resize((256, 256)),    # Different for Input Image & Depth Image
    transforms.ToTensor(),
    # transforms.Normalize((0.3337, 0.3064, 0.3171), ( 0.2672, 0.2564, 0.2629)) # Calculate this statistics for the training image.
])

depth_data_transforms = transforms.Compose([
    transforms.Resize((256, 256)),    # Different for Input Image & Depth Image
    transforms.ToTensor(),
    # transforms.Normalize((0.3337, 0.3064, 0.3171), ( 0.2672, 0.2564, 0.2629)) # Calculate this statistics for the training image.
])

input_for_plot_transforms = transforms.Compose([
    transforms.Resize((256, 256)),    # Different for Input Image & Depth Image
    transforms.ToTensor(),
    # transforms.Normalize((0.3337, 0.3064, 0.3171), ( 0.2672, 0.2564, 0.2629)) # Calculate this statistics for the training image.
])

def initialize_data(folder):
    rgb_images = folder + '/rgb'
    if not os.path.isdir(rgb_images):
        raise(RuntimeError("Could not found {}/rgb folder".format(folder)))

    depth_images = folder + '/depth'
    if not os.path.isdir(depth_images):
        raise(RuntimeError("Could not found {}/depth folder".format(folder)))

    # Total Image - 1449 (Division: Train-ing - 1024, Validation - 256, Testing - 169)
    dataset_prepared = True

    train_folder = folder + '/train_images'
    if not os.path.isdir(train_folder):
        dataset_prepared = False
        os.mkdir(train_folder)
        os.mkdir(train_folder + '/rgb')
        os.mkdir(train_folder + '/depth')

    val_folder = folder + '/val_images'
    if not os.path.isdir(val_folder):
        os.mkdir(val_folder)
        os.mkdir(val_folder + '/rgb')
        os.mkdir(val_folder + '/depth')

    test_folder = folder + '/test_images'
    if not os.path.isdir(test_folder):
        os.mkdir(test_folder)
        os.mkdir(test_folder + '/rgb')
        os.mkdir(test_folder + '/depth')

    if not dataset_prepared:
        for f in os.listdir(rgb_images):
            image_no = int(f.split(".")[0])
            if image_no < 1024:
                dest_folder = train_folder
            elif image_no < 1280:       # 1024 + 256
                dest_folder = val_folder
            else:
                dest_folder = test_folder
            os.rename(rgb_images + '/' + f, dest_folder + '/rgb' + '/' + f)
        for f in os.listdir(depth_images):
            image_no = int(f.split(".")[0])
            if image_no < 1024:
                dest_folder = train_folder
            elif image_no < 1280:       # 1024 + 256
                dest_folder = val_folder
            else:
                dest_folder = test_folder
            os.rename(depth_images + '/' + f, dest_folder + '/depth' + '/' + f)

In [5]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class UNetConvBlock(nn.Module):
    def __init__(self, in_size, out_size, kernel_size=3, activation=F.relu):
        super(UNetConvBlock, self).__init__()
        self.conv = nn.Conv2d(in_size, out_size, kernel_size, padding=1)
        self.conv2 = nn.Conv2d(out_size, out_size, kernel_size, padding=1)
        self.activation = activation

    def forward(self, x):
        out = self.activation(self.conv(x))
        out = self.activation(self.conv2(out))
        return out

class UNetUpBlock(nn.Module):
    def __init__(self, in_size, out_size, kernel_size=3, activation=F.relu, space_dropout=False):
        super(UNetUpBlock, self).__init__()
        self.up = nn.ConvTranspose2d(in_size, out_size, 2, stride=2)
        self.conv = nn.Conv2d(in_size, out_size, kernel_size, padding=1)
        self.conv2 = nn.Conv2d(out_size, out_size, kernel_size, padding=1)
        self.activation = activation

    def forward(self, x, bridge):
        up = self.up(x)
        out = torch.cat([up, bridge], 1)
        out = self.activation(self.conv(out))
        out = self.activation(self.conv2(out))
        return out

class UNet(nn.Module):
    def __init__(self):
        super(UNet, self).__init__()

        self.activation = F.tanh
        
        self.pool1 = nn.MaxPool2d(2)
        self.pool2 = nn.MaxPool2d(2)
        self.pool3 = nn.MaxPool2d(2)
        self.pool4 = nn.MaxPool2d(2)

        self.conv_block1_64 = UNetConvBlock(3, 64)
        self.conv_block64_128 = UNetConvBlock(64, 128)
        self.conv_block128_256 = UNetConvBlock(128, 256)
        self.conv_block256_512 = UNetConvBlock(256, 512)
        self.conv_block512_1024 = UNetConvBlock(512, 1024)

        self.up_block1024_512 = UNetUpBlock(1024, 512)
        self.up_block512_256 = UNetUpBlock(512, 256)
        self.up_block256_128 = UNetUpBlock(256, 128)
        self.up_block128_64 = UNetUpBlock(128, 64)

        self.last = nn.Conv2d(64, 1, 1)

    def forward(self, x):
        block1 = self.conv_block1_64(x)
        pool1 = self.pool1(block1)

        block2 = self.conv_block64_128(pool1)
        pool2 = self.pool2(block2)

        block3 = self.conv_block128_256(pool2)
        pool3 = self.pool3(block3)

        block4 = self.conv_block256_512(pool3)
        pool4 = self.pool4(block4)

        block5 = self.conv_block512_1024(pool4)

        up1 = self.up_block1024_512(block5, block4)

        up2 = self.up_block512_256(up1, block3)

        up3 = self.up_block256_128(up2, block2)

        up4 = self.up_block128_64(up3, block1)

        return self.last(up4)

In [6]:
def display_images(images):
    grid = utils.make_grid(images)
    plt.imshow(grid.cpu().detach().numpy().transpose((1, 2, 0)))
    plt.show();

def format_data_for_display(tensor):
    maxVal = tensor.max()
    minVal = abs(tensor.min())
    maxVal = max(maxVal,minVal)
    output_data = tensor / maxVal
    output_data = output_data / 2
    output_data = output_data + 0.5
    return output_data

In [7]:
from __future__ import print_function
import argparse
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms, utils
import matplotlib.pyplot as plt
from torch.autograd import Variable
from logger import Logger
import pdb
import os
import re

# Training settings
# parser = argparse.ArgumentParser(description='PyTorch depth map prediction example')
# parser.add_argument('model_folder', type=str, default='trial', metavar='F',
#                     help='In which folder do you want to save the model')
# parser.add_argument('--data', type=str, default='data', metavar='D',
#                     help="folder where data is located. train_data.zip and test_data.zip need to be found in the folder")
# parser.add_argument('--batch-size', type = int, default = 32, metavar = 'N',
#                     help='input batch size for training (default: 8)')
# parser.add_argument('--epochs', type=int, default = 10, metavar='N',
#                     help='number of epochs to train (default: 10)')
# parser.add_argument('--lr', type=float, default=0.01, metavar='LR',
#                     help='learning rate (default: 0.01)')
# parser.add_argument('--momentum', type=float, default=0.5, metavar='M',
#                     help='SGD momentum (default: 0.5)')
# parser.add_argument('--seed', type=int, default=1, metavar='S',
#                     help='random seed (default: 1)')
# parser.add_argument('--log-interval', type=int, default=10, metavar='N',
#                     help='how many batches to wait before logging training status')
# parser.add_argument('--suffix', type=str, default='', metavar='D',
#                     help='suffix for the filename of models and output files')
# args = parser.parse_args()

data = 'data'
batch_size = 1
epochs = 10
lr = 0.0001
momentum = 0.5
seed = 1
log_interval = 10
suffix = ''
model_folder = 'unet-image-size-const'

torch.manual_seed(seed)

output_height = 256 
output_width = 256

### Data Initialization and Loading
initialize_data(data) # extracts the zip files, makes a validation set

train_rgb_loader = torch.utils.data.DataLoader(datasets.ImageFolder(data + '/train_images/rgb/', transform = rgb_data_transforms), batch_size=batch_size, shuffle=False, num_workers=1)
train_depth_loader = torch.utils.data.DataLoader(datasets.ImageFolder(data + '/train_images/depth/', transform = depth_data_transforms), batch_size=batch_size, shuffle=False, num_workers=1)
val_rgb_loader = torch.utils.data.DataLoader(datasets.ImageFolder(data + '/val_images/rgb/', transform = rgb_data_transforms), batch_size=batch_size, shuffle=False, num_workers=1)
val_depth_loader = torch.utils.data.DataLoader(datasets.ImageFolder(data + '/val_images/depth/', transform = depth_data_transforms), batch_size=batch_size, shuffle=False, num_workers=1)

model = UNet()
loss_function = F.mse_loss
#loss_function = F.smooth_l1_loss
# loss_function = F.l1_loss
optimizer = optim.Adam(model.parameters(), amsgrad=True, lr=0.0001)
dtype=torch.cuda.FloatTensor
logger = Logger('./logs/' + model_folder)

def train_Unet(epoch):
    model.train()
    for batch_idx, (rgb, depth) in enumerate(zip(train_rgb_loader, train_depth_loader)):
        rgb, depth = rgb[0], depth[0]
        optimizer.zero_grad()
        output = model(rgb)
        target = depth[:,0,:,:].view(list(depth.size())[0], 1, output_height, output_width)
        loss = loss_function(output, target)
        loss.backward()
        optimizer.step()
#         print("rgb:")
#         print(rgb)
#         display_images(format_data_for_display(rgb))
        print("depth:")
#         display_images(format_data_for_display(depth))
        print(depth)
        print("output:")
        print(output)
        display_images(format_data_for_display(output))
        if batch_idx % log_interval == 0:
#             training_tag = "training loss epoch:" + str(epoch)
#             logger.scalar_summary(training_tag, loss.item(), batch_idx)

#             for tag, value in model.named_parameters():
#                 tag = tag.replace('.', '/') + ":" + str(epoch)
#                 logger.histo_summary(tag, value.data.numpy(), batch_idx)
#                 logger.histo_summary(tag + '/grad', value.grad.data.numpy(), batch_idx)

            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(rgb), len(train_rgb_loader.dataset),
                100. * batch_idx / len(train_rgb_loader), loss.item()))
#         batch_idx = batch_idx + 1
        if batch_idx == 0: break

def validate_Unet():
    model.eval()
    validation_loss = 0
    with torch.no_grad():
        for batch_idx,(rgb, depth) in enumerate(zip(train_rgb_loader, train_depth_loader)):
            rgb, depth = rgb[0], depth[0]
            output = model(rgb)
            target = depth[:,0,:,:].view(batch_size, 1, output_height, output_width)
            validation_loss += loss_function(output, target)
    #         if batch_idx == 2: break
        validation_loss /= batch_idx
        logger.scalar_summary("validation loss", validation_loss, epoch)
        print('\nValidation set: Average loss: {:.4f} \n'.format(validation_loss))

folder_name = "models/" + model_folder
if not os.path.exists(folder_name): os.mkdir(folder_name)

for epoch in range(1, epochs + 1):
    print("********* Training the Unet Model **************")
    train_Unet(epoch)
#     validate_Unet()
#     model_file = folder_name + "/" + 'model_' + str(epoch) + '.pth'
#     torch.save(model.state_dict(), model_file)

  from ._conv import register_converters as _register_converters


********* Training the Unet Model **************
depth:
tensor([[[[0.5294, 0.5294, 0.5294,  ..., 0.2471, 0.2471, 0.2471],
          [0.5294, 0.5294, 0.5294,  ..., 0.2471, 0.2471, 0.2471],
          [0.5294, 0.5294, 0.5255,  ..., 0.2471, 0.2471, 0.2471],
          ...,
          [0.2235, 0.2157, 0.1725,  ..., 0.1451, 0.1569, 0.1569],
          [0.2196, 0.2196, 0.2196,  ..., 0.1569, 0.1569, 0.1569],
          [0.2196, 0.2196, 0.2196,  ..., 0.1569, 0.1569, 0.1569]],

         [[0.5294, 0.5294, 0.5294,  ..., 0.2471, 0.2471, 0.2471],
          [0.5294, 0.5294, 0.5294,  ..., 0.2471, 0.2471, 0.2471],
          [0.5294, 0.5294, 0.5255,  ..., 0.2471, 0.2471, 0.2471],
          ...,
          [0.2235, 0.2157, 0.1725,  ..., 0.1451, 0.1569, 0.1569],
          [0.2196, 0.2196, 0.2196,  ..., 0.1569, 0.1569, 0.1569],
          [0.2196, 0.2196, 0.2196,  ..., 0.1569, 0.1569, 0.1569]],

         [[0.5294, 0.5294, 0.5294,  ..., 0.2471, 0.2471, 0.2471],
          [0.5294, 0.5294, 0.5294,  ..., 0.2471, 0.2

<Figure size 640x480 with 1 Axes>

********* Training the Unet Model **************


KeyboardInterrupt: 

In [12]:
count = 0
for parameter in model.parameters():
    count += (parameter.numel())
print(count)

31031745


In [6]:
torch.set_printoptions(precision=10)
data = 'data'
batch_size = 1
output_height = 256 
output_width = 256

def mse_log_error(output, target):
    target = target + 0.000001
    target = torch.log10(target)
    output = output + 0.000001
    output = torch.log10(output)
    return F.mse_loss(output, target)

def rel_error(output, target):
    target = target + 0.000001
    target = torch.log10(target)
    output = output + 0.000001
    output = torch.log10(output)
    diff = (output-target)/target
    return diff.mean()
### Data Initialization and Loading
initialize_data(data) # extracts the zip files, makes a validation set
loss_function = nn.SmoothL1Loss()

train_rgb_loader = torch.utils.data.DataLoader(datasets.ImageFolder(data + '/train_images/rgb/', transform = rgb_data_transforms), batch_size=batch_size, shuffle=True, num_workers=1)
train_depth_loader = torch.utils.data.DataLoader(datasets.ImageFolder(data + '/train_images/depth/', transform = depth_data_transforms), batch_size=batch_size, shuffle=True, num_workers=1)
output_width=256
output_height=256
for batch_idx,(rgb, depth) in enumerate(zip(train_rgb_loader, train_depth_loader)):
    depth = depth[0]
    target = depth[:,0,:,:].view(list(depth.size())[0], 1, output_height, output_width)
    noise = torch.zeros((list(depth.size())[0], 1, output_height, output_width), dtype=torch.float32) + 0.01
    noisy_target = torch.add(target, noise)
#     print("target:")
#     print(target)
#     print("noisy target:")
#     print(noisy_target)
    error1 = F.mse_loss(target, noisy_target)
    error2 = F.smooth_l1_loss(target, noisy_target)
    error3 = F.l1_loss(target, noisy_target)
    error4 = rel_error(noisy_target, target)
    error5 = mse_log_error(noisy_target, target)
    print("error1:{}".format(error1))
    print("error2:{}".format(error2))
    print("error3:{}".format(error3))
    print("error4:{}".format(error4))
    print("error5:{}".format(error5))
    if batch_idx == 4:
        break

error1:9.99998956104e-05
error2:5.00124297105e-05
error3:0.0100050121546
error4:0.903911232948
error5:0.0130405314267
error1:9.99998446787e-05
error2:5.00124297105e-05
error3:0.0100050121546
error4:-0.0511762723327
error5:0.030722219497
error1:9.99999756459e-05
error2:5.00124369864e-05
error3:0.0100050121546
error4:-0.0824354141951
error5:0.604435503483
error1:9.9999961094e-05
error2:5.00124333485e-05
error3:0.0100050121546
error4:0.0579924061894
error5:1.37280464172
error1:9.99998810585e-05
error2:5.00124297105e-05
error3:0.0100050121546
error4:0.120071038604
error5:0.00920666847378
