In [1]:
import os
import struct
import numpy as np
import scipy.io as scio
import pickle
import glob
from torch.utils.data import  DataLoader
import time
import cv2

import torch.backends.cudnn as cudnn
cudnn.benchmark = True
#cudnn.fastest = True

In [2]:
from os.path import expanduser

data_file = expanduser("../datasets/sem3d_rgbd_depthlim5_ims/semantic3Dsplatting.mat")
#data_file = "/home/linhe979/3dPointCloud/sem3d_rgbd_depthlim5_ims/semantic3Dsplatting.mat"
matfile = scio.loadmat(data_file)
name = matfile["images"]["name"][0][0][0]  # should give a list of names
image_sizes = matfile["images"]["size"][0][0][0]
istrain = matfile["images"]["set"][0][0][0]
training_set = [i for i in range(len(istrain)) if istrain[i] > 0]
validation_set = [i for i in range(len(istrain)) if istrain[i] == 0]
imdb = {"name": name, "image_sizes": image_sizes, "training_set": training_set, "validation_set":validation_set}

In [3]:
import torch

class IMDBReader(object):
    
    def __init__(self, streams: list, imdb_file, stat_file, suffix, fetch_range=None):
   
        self.streams_db  = ['color', 'depth', 'normal', 'label']
        self.streams = streams
    
        assert set(self.streams).issubset(set(self.streams_db)), "The input streams should refer to {}".format(self.streams_db)
        
        self.imdb = self.loadIMDBFile(imdb_file)

        print("self.imdb validation_set: ", self.imdb['validation_set'])
        self.stat = self.loadStatFile(stat_file)
        self.suffix = suffix  # '.png' or '.jpg' etc
        
        self.attributes = ['rgb', 'shape', 'dtype']
        self.path_names = ['color_path', 'djet_path', 'normal_path', 'label_path']
        
        path_to_color = '../datasets/sem3d_rgbd_depthlim5_ims/rgb/'    # need to change to Rel Path.
        path_to_depth = '../datasets/sem3d_rgbd_depthlim5_ims/djet/'
        path_to_normal = '../datasets/sem3d_rgbd_depthlim5_ims/normals/'
        path_to_label = '../datasets/sem3d_rgbd_depthlim5_ims/labels/'
        self.paths = {self.streams_db[0]: path_to_color, 
                 self.streams_db[1]: path_to_depth, 
                 self.streams_db[2]: path_to_normal, 
                 self.streams_db[3]: path_to_label,
                }
        
        
        self.expected_num_of_files = len(glob.glob1(self.paths[self.streams[0]], '*' + self.suffix))
        
        #check if the number of imgaes with samma extension is same for all streams
        hasSameNumOfFiles = all([len(glob.glob1(self.paths[self.streams[i]], '*' + self.suffix)) == self.expected_num_of_files for i in range(len(self.streams)) ])
        assert hasSameNumOfFiles, ",\n".join(["{} folder has {} files".format(self.streams[i], len(glob.glob1(self.paths[self.streams[i]], '*' + self.suffix))) for i in range(len(self.streams))])
        
        
        self.num_of_imgs = len(self.imdb['name'])
        self.fetch_range = fetch_range
        self.indices = list(range(self.num_of_imgs))
        if self.fetch_range:
            assert isinstance(self.fetch_range, tuple), "The fetch_range should have type 'tuple'"
            assert self.fetch_range[1] < self.num_of_imgs, "fetch_range out of range, total number of existing files: {}".format(self.num_of_imgs)
            self.all_indices = list(range(self.num_of_imgs))
            self.indices = self.all_indices[slice(*self.fetch_range)]

        self.images = {}
    
    
    def loadIMDBFile(self, imdb_file):
        if ".mat" in imdb_file:
            matfile = scio.loadmat(imdb_file)
            name = matfile["images"]["name"][0][0][0]  # should give a list of names
            image_sizes = matfile["images"]["size"][0][0][0]
            istrain = matfile["images"]["set"][0][0][0]
            training_set = [i for i in range(len(istrain)) if istrain[i] > 0]
            validation_set = [i for i in range(len(istrain)) if istrain[i] == 0]
            imdb = {"name": name, "image_sizes": image_sizes, "training_set": training_set, "validation_set":validation_set}
            return imdb
        else:
            imdb = pickle.load( open( imdb_file, "rb" ) )
            return imdb


    def loadStatFile(self, stat_file):
        if ".mat" in stat_file:
            stat = scio.loadmat(stat_file)
            return stat
        else:
            imdb = pickle.load( open( stat_file, "rb" ) )
            return stat
    
    '''
    def __getitem__(self, idx):

        self.images = {}  # remove the buffer from previous data fetch
        
        if(isinstance(idx, int)):
            return self._loadImage(idx)
        
        elif(isinstance(idx, slice)):  # enable slicing indexing
            idx_list = idx.indices(self.expected_num_of_files)  # get (start, stop, step)
            return self._loadImage([i for i in range(*idx_list)])
            
        elif(isinstance(idx, str)):            
            ind = np.where(self.imdb['name'] == np.array([idx], dtype='<U38'))[0][0]  
            ind = ind.item()
            return self._loadImage(ind)
    '''
    
    def loadImage(self, idx):
        self.images = {}  # remove the buffer from previous data fetch
        
        selected_indices = self.makeToIndices(idx)
        
        img_names = [self.imdb['name'][selected_indices[i]][0] + self.suffix for i in range(len(selected_indices))]
        
        for stream_name in self.streams:
            if stream_name == 'label':
                imgs = [cv2.imread(self.paths[stream_name] + '/' + img_names[i], 0) for i in range(len(img_names))]
                self.images[stream_name] = imgs

            elif stream_name != 'label':
                imgs = [cv2.imread(self.paths[stream_name] + '/' + img_names[i]) for i in range(len(img_names))]
                imgs_reshaped = [imgs[i].transpose((2, 0, 1)) for i in range(len(imgs))]  #From N x H x W x C  To  N x C x H x W for each image
                self.images[stream_name] = imgs_reshaped
            
            
               
        '''
        for i in range(len(indices)):
            img_name = self.imdb['name'][indices[i]][0]
            img_name += self.suffix
            
            streams_img = [cv2.imread(self.paths[self.streams[i]] + '/' + img_name) for i in range(len(self.streams))] 

            data = {}  
            #check if all streams have rgb channels for an image
            assert all(len(streams_img[s].shape)==3 for s in range(len(streams_img))), "Not all streams of {} have rgb channels!".format(img.name)
            
            for i in range(len(self.streams)):
                data[self.streams[i]] = self._getImageInfo(streams_img[i])
            
            self.images.append(data)
        '''
        return self.images   # {'stream1': (N x C x H x W), 'stream2': (N x C x H x W), .....}
        

    def setRange(self, newRange):
        assert isinstance(newRange, tuple), "The range input should have type 'tuple'"
        assert newRange[1] - 1 > self.num_of_imgs, "fetch_range out of range, total number of existing files: {}".format(self.num_of_imgs)

        self.fetch_range = newRange
        self.indices = self.all_indices[slice(*self.fetch_range)]

    def makeToIndices(self, idx):
        list_of_index = []
        if(isinstance(idx, int)):
            list_of_index = [self.indices[idx]]
        elif(isinstance(idx, slice)):
            list_of_index = self.indices[idx]
        #elif(isinstance(idx, str)):
        #   pass
        return list_of_index
    
        
    

In [4]:
from torch.utils.data.dataset import Dataset

class Sem3DIMGDataset(Dataset):

    def __init__(self, params, training=True, transform=None): 
        
        self.training = training
        #self.scale_range = scale_range
        _type = "TRAIN" if training else "TEST"
        
        self.dset = IMDBReader(params.streams, params.imdb_file, params.stat_file, params.suffix, params.fetch_range)
        #print("training: ", self.dset.imdb['training_set'])
        self.inds = self.dset.imdb["training_set"] if self.training else self.dset.imdb["validation_set"]
        self.pid = os.getpid()
        self.rng = np.random.RandomState()
        
        self.transform = transform


    def __len__(self):
        return len(self.dset.indices)

    def __getitem__(self, idx):
        sample = self.dset.loadImage(idx)
        if self.transform:
            sample = self.transform(sample)
        
        #self.length = len(sample[params.streams[0]])
        #if(self.length == 1):
            #sample.update({i: sample[i][0] for i in params.streams})
 
        return sample
        
        #elif(isinstance(idx, str)):            
        #    ind = np.where(self.dset.imdb['name'] == np.array([idx], dtype='<U38'))[0][0]  
        #    ind = ind.item()
        #    return self.dset.loadImage(ind)
        

    def setRange(self, newRange):
        self.dset.setRange(newRange)
        

In [5]:
import torch.nn.functional as F

class ToTensor(object):
    """Convert ndarrays in sample to Tensors."""
    
    def __call__(self, sample):
        #sample has type list, created from LoadImage().
        keys = list(sample.keys())

        for i in range(len(sample)):
            #sample.update({keys[i]: [torch.from_numpy(j) for j in sample[keys[i]]]})
            
            if(len(sample[keys[i]]) == 1): 
                #sample.update({keys[i]: torch.from_numpy(sample[keys[i]][0])})
                if keys[i] == 'label': 
                    converted = torch.from_numpy(sample[keys[i]][0]).long()

                else:
                    converted = torch.from_numpy(sample[keys[i]][0]).float()
                sample.update({keys[i]: converted})
            else:
                #sample.update({keys[i]: [torch.from_numpy(j).float() for j in sample[keys[i]]]})
                if keys[i] == 'label':
                    converted = [torch.from_numpy(j).long() for j in sample[keys[i]]]
                else:
                    converted = [torch.from_numpy(j).float() for j in sample[keys[i]]]
                sample.update({keys[i]: converted})
        return sample

#normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
 #F.normalize(tensor, self.mean, self.std)
#sample[name] = F.normalize(sample[name], [0.485, 0.456, 0.406], [0.229, 0.224, 0.225])

class ToNormalize(object):
    def __init__(self, mean, std):
        self.mean = mean
        self.std = std
        
    def __call__(self, sample):
        # after ToTensor(), sample['bla'] is a Tensor now
        keys = list(sample.keys())
        if 'label' in keys: 
            keys.remove('label')     #labels will NOT be normalized
            
        for name in keys:
            tensor = sample[name]
            for t, m, s in zip(tensor, self.mean, self.std):
                t.sub_(m).div_(s)
            sample.update({name: tensor})

        return sample
    


In [9]:
class Hyperparams(object):
    def __init__(self):
        self.streams = ['color','label'] # now using only one stream for training!!!
        self.imdb_file = '../datasets/sem3d_rgbd_depthlim5_ims/semantic3Dsplatting.mat'
        self.stat_file = '../datasets/sem3d_rgbd_depthlim5_ims/semantic3Dsplatting-stat.mat'
        self.fetch_range = (1,1003,600)  # (start, end, step)
        self.suffix = '.png'
        self.num_workers = 2
        self.batch_size = 2
        self.gpu_id = []             # GPU ID. -1 == use any available GPU
        self.lr = 0.0001
        self.loss_func = torch.nn.NLLLoss(ignore_index=255, size_average=True)
        self.momentum = 0.99
        self.pretrained=None
        
    def get_optimizer(self, model):
        self.optimizer = torch.optim.SGD(model.parameters(), lr=self.lr, momentum=self.momentum, weight_decay=0)
        return self.optimizer

In [7]:


#params = Hyperparams()
#my_dataset = Sem3DIMGDataset(params, transform=ToTensor())
#dataloader = DataLoader(my_dataset, batch_size=2, shuffle=True, num_workers=4)
#for i, sample in enumerate(dataloader):
#    print("sample type:", type(sample))
#    print("i: ", i, " , label size: ", sample['label'].size() )
#print("len(my_dataset): ", len(my_dataset))


#cv2.imshow("out", one_label)
#k = cv2.waitKey(0)
#if k == ord('s'):        # wait for pressing 's'
#    cv2.destroyAllWindows()


#ame_db = glob.glob("../datasets/sem3d_rgbd_depthlim5_ims/labels/*.png")
#n_classes = 0
#for name in name_db:
#    img = cv2.imread(name,0)
#    num_of_classes = len(set([img.item(i,j) for i in range(500) for j in range(500)]))
#    n_classes = num_of_classes if num_of_classes > n_classes else n_classes
    
#print("n_classes: ", n_classes) 
# 10 classes in total !!!!



In [7]:
import torchvision
vgg16_pretrained = torchvision.models.vgg16(pretrained=True)

In [10]:
import torchvision.transforms as transforms

def main(args: Hyperparams):
    num_streams = len(args.streams)-1 if 'label' in args.streams else len(args.streams)
    tr_dset = Sem3DIMGDataset(args, training=True, transform=transforms.Compose([
            ToTensor(),
            ToNormalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
]))
    tr_loader = DataLoader(tr_dset, num_workers=args.num_workers, shuffle=True, batch_size=args.batch_size)
    #print("dataloader length:", len(tr_loader))
    model = DeePr3essNet(pretrained_path=vgg16_pretrained, num_modalities=num_streams, use_cuda=False)
    #model = model.cuda()
    optimizer = args.get_optimizer(model)
    loss_func = args.loss_func
    

    for epoch in range(2):
        running_loss = 0.0
        for i, data in enumerate(tr_loader):
            #print("i : ", i)
            keys = list(data.keys())
            
            if 'label' in keys:
                keys.remove('label')
            else:
                raise ValueError("There is no label in dataloader")
            #print(keys)
            
            '''
            #--------------------------------------loss method 1-----------------------------------------------
            
            batch_loss = 0
            batch_size = len(data[keys[0]])# batch_size is not necessarily the args.batch_size, think of mod() operation
            for k in range(batch_size):
                inputs = [Variable(data[name][k].unsqueeze(0), requires_grad=True) for name in keys]
                target = data['label'][k]
                target = Variable(target.unsqueeze(0), requires_grad=False)

                output = model(inputs)
                logsoftmax = torch.nn.LogSoftmax(dim=1)
                loss = loss_func(logsoftmax(output), target)
                print('------------------------------loss-------------------------------  -> ', loss.data[0])
                batch_loss += loss
            batch_loss /= 2
            '''
            '''
            #--------------------------------------loss method 2-----------------------------------------------
            
            batch_loss = 0
            batch_size = len(data[keys[0]])# batch_size is not necessarily the args.batch_size, think of mod() operation
            output = []
            targets = Variable(data['label'].cuda(), requires_grad=False)
            for k in range(batch_size):
                inputs = [Variable(data[name][k].unsqueeze(0).cuda(), requires_grad=True) for name in keys]
                #target = data['label'][k]
                #target = Variable(target.unsqueeze(0), requires_grad=False)
                res = model(inputs)
                output.append(res.squeeze(0))
            outputs = torch.stack(output, dim=0)
            logsoftmax = torch.nn.LogSoftmax(dim=1)
            batch_loss = loss_func(logsoftmax(outputs), targets)
            print('------------------------------batch_loss-------------------------------  -> ', batch_loss.data[0])
            '''
            
             #--------------------------------------loss method 3-----------------------------------------------
            
            batch_loss = 0
            #targets = Variable(data['label'].cuda(), requires_grad=False)
            targets = Variable(data['label'], requires_grad=False)
            
            #inputs = [Variable(data[name].cuda(), requires_grad=True) for name in keys]
            inputs = [Variable(data[name], requires_grad=True) for name in keys]
            
            #print("inputs color shape:", inputs[0].size())

            #print("target shape:", targets.size())
            output = model(inputs)
            print("output shape:", output.size())
            
            logsoftmax = torch.nn.LogSoftmax(dim=1)
            batch_loss = loss_func(logsoftmax(output), targets)
            print('------------------------------batch_loss-------------------------------  -> ', batch_loss.data[0])
            

            optimizer.zero_grad()
            batch_loss.backward()
            optimizer.step()
                        
            #time.sleep(5)
            running_loss += batch_loss
        print(epoch," running_loss: ", running_loss.data[0])

params = Hyperparams()
main(params)


self.imdb validation_set:  []
output shape: torch.Size([2, 9, 500, 500])
------------------------------batch_loss-------------------------------  ->  5.3384623527526855
0  running_loss:  5.3384623527526855
output shape: torch.Size([2, 9, 500, 500])
------------------------------batch_loss-------------------------------  ->  4.756173610687256
1  running_loss:  4.756173610687256


In [8]:
import os.path as osp

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
import numpy as np

def get_upsampling_weight(in_channels, out_channels, kernel_size):
    """Make a 2D bilinear kernel suitable for upsampling"""
    factor = (kernel_size + 1) // 2
    if kernel_size % 2 == 1:
        center = factor - 1
    else:
        center = factor - 0.5
    og = np.ogrid[:kernel_size, :kernel_size]
    filt = (1 - abs(og[0] - center) / factor) * \
           (1 - abs(og[1] - center) / factor)
    weight = np.zeros((in_channels, out_channels, kernel_size, kernel_size),
                      dtype=np.float64)
    weight[range(in_channels), range(out_channels), :, :] = filt
    return torch.from_numpy(weight).float()


class DeePr3essNet(nn.Module):

    def __init__(self, n_class=9, pretrained_path=None, num_modalities=1, use_cuda=False):
        super(DeePr3essNet, self).__init__()
        self.use_cuda = use_cuda
        
        self.nets = []
        for i in range(0,num_modalities):
            self.nets.append(FCN8s(n_class,pretrained_path))

        self.upscore8 = nn.ConvTranspose2d(  
            n_class, n_class, 16, stride=8, bias=False)

    
    def forward(self, x):  
        assert len(x) == len(self.nets) 
        h = 0
        for i,net in enumerate(self.nets):
            if self.use_cuda:
                net = net.cuda()
            res = net(x[i])
            h = h + res  #fusion

        h = self.upscore8(h)
        #print("upscore8 output size : ", h.data.size(), ' -> 1')
        
        h = h[:, :, 2:2 + x[0].size()[2], 2:2 + x[0].size()[3]].contiguous()
        #print("h[2 : 2+input.size()] output size : ", h.data.size(), ' -> 1')
        return h


class FCN8s(nn.Module):

    def __init__(self, n_class=9, pretrained_path=None):
        super(FCN8s, self).__init__()
        # conv1
        self.conv1_1 = nn.Conv2d(3, 64, 3, padding=70)
        self.relu1_1 = nn.ReLU(inplace=True)
        self.conv1_2 = nn.Conv2d(64, 64, 3, padding=1)
        self.relu1_2 = nn.ReLU(inplace=True)
        self.pool1 = nn.MaxPool2d(2, stride=2, ceil_mode=True)  # 1/2

        # conv2
        self.conv2_1 = nn.Conv2d(64, 128, 3, padding=1)
        self.relu2_1 = nn.ReLU(inplace=True)
        self.conv2_2 = nn.Conv2d(128, 128, 3, padding=1)
        self.relu2_2 = nn.ReLU(inplace=True)
        self.pool2 = nn.MaxPool2d(2, stride=2, ceil_mode=True)  # 1/4

        # conv3
        self.conv3_1 = nn.Conv2d(128, 256, 3, padding=1)
        self.relu3_1 = nn.ReLU(inplace=True)
        self.conv3_2 = nn.Conv2d(256, 256, 3, padding=1)
        self.relu3_2 = nn.ReLU(inplace=True)
        self.conv3_3 = nn.Conv2d(256, 256, 3, padding=1)
        self.relu3_3 = nn.ReLU(inplace=True)
        self.pool3 = nn.MaxPool2d(2, stride=2, ceil_mode=True)  # 1/8

        # conv4
        self.conv4_1 = nn.Conv2d(256, 512, 3, padding=1)
        self.relu4_1 = nn.ReLU(inplace=True)
        self.conv4_2 = nn.Conv2d(512, 512, 3, padding=1)
        self.relu4_2 = nn.ReLU(inplace=True)
        self.conv4_3 = nn.Conv2d(512, 512, 3, padding=1)
        self.relu4_3 = nn.ReLU(inplace=True)
        self.pool4 = nn.MaxPool2d(2, stride=2, ceil_mode=True)  # 1/16

        # conv5
        self.conv5_1 = nn.Conv2d(512, 512, 3, padding=1)
        self.relu5_1 = nn.ReLU(inplace=True)
        self.conv5_2 = nn.Conv2d(512, 512, 3, padding=1)
        self.relu5_2 = nn.ReLU(inplace=True)
        self.conv5_3 = nn.Conv2d(512, 512, 3, padding=1)
        self.relu5_3 = nn.ReLU(inplace=True)
        self.pool5 = nn.MaxPool2d(2, stride=2, ceil_mode=True)  # 1/32

        # fc6
        self.fc6 = nn.Conv2d(512, 4096, 7)
        self.relu6 = nn.ReLU(inplace=True)
        self.drop6 = nn.Dropout2d()

        # fc7
        self.fc7 = nn.Conv2d(4096, 4096, 1)
        self.relu7 = nn.ReLU(inplace=True)
        self.drop7 = nn.Dropout2d()

        self.score_fr = nn.Conv2d(4096, n_class, 1)
        self.score_pool3 = nn.Conv2d(256, n_class, 1)
        self.score_pool4 = nn.Conv2d(512, n_class, 1)

        self.upscore2 = nn.ConvTranspose2d(
            n_class, n_class, 4, stride=2, bias=False)
        self.upscore8 = nn.ConvTranspose2d(
            n_class, n_class, 16, stride=8, bias=False)
        self.upscore_pool4 = nn.ConvTranspose2d(
            n_class, n_class, 4, stride=2, bias=False)

        self._initialize_weights()
        if pretrained_path is not None:
            self.copy_params_from_vgg16(pretrained_path)
            torch.nn.init.xavier_uniform(self.score_fr.weight)

    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                m.weight.data.zero_()
                if m.bias is not None:
                    m.bias.data.zero_()
            if isinstance(m, nn.ConvTranspose2d):
                assert m.kernel_size[0] == m.kernel_size[1]
                initial_weight = get_upsampling_weight(
                    m.in_channels, m.out_channels, m.kernel_size[0])
                m.weight.data.copy_(initial_weight)

    def forward(self, x):
        h = x
        #print("input size : ", h.data.size())

        h = self.relu1_1(self.conv1_1(h))
        #print("conv1_1 PADDING=70 output size : ", h.data.size(), " -> 1")

        h = self.relu1_2(self.conv1_2(h))
        h = self.pool1(h)
        #print("conv1 output size : ", h.data.size(), " -> 1/2")

        h = self.relu2_1(self.conv2_1(h))
        h = self.relu2_2(self.conv2_2(h))
        h = self.pool2(h)
        #print("conv2 output size : ", h.data.size(), " -> 1/4")

        h = self.relu3_1(self.conv3_1(h))
        h = self.relu3_2(self.conv3_2(h))
        h = self.relu3_3(self.conv3_3(h))
        h = self.pool3(h)
        pool3 = h  # 1/8
        #print("conv3 output size : ", h.data.size(), " -> 1/8")

        h = self.relu4_1(self.conv4_1(h))
        h = self.relu4_2(self.conv4_2(h))
        h = self.relu4_3(self.conv4_3(h))
        h = self.pool4(h)
        pool4 = h  # 1/16
        #print("conv4 output size : ", h.data.size(), " -> 1/16")

        h = self.relu5_1(self.conv5_1(h))
        h = self.relu5_2(self.conv5_2(h))
        h = self.relu5_3(self.conv5_3(h))
        h = self.pool5(h)
        #print("conv5 output size : ", h.data.size(), " -> 1/32")

        h = self.relu6(self.fc6(h))
        h = self.drop6(h)
        #print("fc6 output size : ", h.data.size())

        h = self.relu7(self.fc7(h))
        h = self.drop7(h)
        #print("fc7 output size : ", h.data.size())

        h = self.score_fr(h)
        #print("score_fr output size : ", h.data.size())

        h = self.upscore2(h)
        upscore2 = h  # 1/16
        #print("upscore2 output size : ", h.data.size())

        h = self.score_pool4(pool4)
        #print("score_pool4(pool4) output size : ", h.data.size())

        h = h[:, :, 5:5 + upscore2.size()[2], 5:5 + upscore2.size()[3]]
        #print("score_pool4c = h[5 : 5+upscore2.size()] output size : ", h.data.size(), ' -> 1/16')
        score_pool4c = h  # 1/16

        h = upscore2 + score_pool4c  # 1/16
        #print("upscore2 + score_pool4c output size : ", h.data.size(), ' -> 1/16')

        h = self.upscore_pool4(h)
        upscore_pool4 = h  # 1/8
        #print("upscore_pool4(upscore2 + score_pool4c) output size : ", h.data.size(), ' -> 1/8')

        h = self.score_pool3(pool3)
        #print("score_pool3(pool3) output size : ", h.data.size(), ' -> 1/8')
        
        h = h[:, :,
              9:9 + upscore_pool4.size()[2],
              9:9 + upscore_pool4.size()[3]]
        score_pool3c = h  # 1/8
        #print("score_pool3c = h[9 : 9+upscore_pool4.size()] output size : ", h.data.size(), ' -> 1/8')

        h = upscore_pool4 + score_pool3c  # 1/8
        #print("upscore_pool4 + score_pool3c output size : ", h.data.size(), ' -> 1/8')
        
        #h = self.upscore8(h)
        #print("upscore8 output size : ", h.data.size(), ' -> 1')
        
        #h = h[:, :, 2:2 + x.size()[2], 2:2 + x.size()[3]].contiguous()
        #print("h[2 : 2+input.size()] output size : ", h.data.size(), ' -> 1')

        return h
    
    def copy_params_from_vgg16(self, vgg16):
        features = [
            self.conv1_1, self.relu1_1,
            self.conv1_2, self.relu1_2,
            self.pool1,
            self.conv2_1, self.relu2_1,
            self.conv2_2, self.relu2_2,
            self.pool2,
            self.conv3_1, self.relu3_1,
            self.conv3_2, self.relu3_2,
            self.conv3_3, self.relu3_3,
            self.pool3,
            self.conv4_1, self.relu4_1,
            self.conv4_2, self.relu4_2,
            self.conv4_3, self.relu4_3,
            self.pool4,
            self.conv5_1, self.relu5_1,
            self.conv5_2, self.relu5_2,
            self.conv5_3, self.relu5_3,
            self.pool5,
        ]
        for l1, l2 in zip(vgg16.features, features):
            if isinstance(l1, nn.Conv2d) and isinstance(l2, nn.Conv2d):
                assert l1.weight.size() == l2.weight.size()
                assert l1.bias.size() == l2.bias.size()
                l2.weight.data = l1.weight.data
                l2.bias.data = l1.bias.data
        for i, name in zip([0, 3], ['fc6', 'fc7']):
            l1 = vgg16.classifier[i]
            l2 = getattr(self, name)
            l2.weight.data = l1.weight.data.view(l2.weight.size())
            l2.bias.data = l1.bias.data.view(l2.bias.size())


Downloading: "https://download.pytorch.org/models/vgg16-397923af.pth" to /Users/LinboHe/.torch/models/vgg16-397923af.pth
100.0%


In [21]:
print(type(vgg16_pretrained.features))
print(type(vgg16_pretrained.features[12]))
print(vgg16_pretrained.features[12].weight.size())
vgg16_dict = vgg16_pretrained.state_dict()

fcn32s = FCN32s()

<class 'torch.nn.modules.container.Sequential'>
<class 'torch.nn.modules.conv.Conv2d'>
torch.Size([256, 256, 3, 3])
------------------------copy params to fc6 and fc7------------------------------------
l2 fcn32 weight size :  torch.Size([4096, 512, 7, 7])
l1 vgg16 weight size :  torch.Size([4096, 25088])
------------------------copy params to fc6 and fc7------------------------------------
l2 fcn32 weight size :  torch.Size([4096, 4096, 1, 1])
l1 vgg16 weight size :  torch.Size([4096, 4096])


In [20]:
def get_upsampling_weight(in_channels, out_channels, kernel_size):
    """Make a 2D bilinear kernel suitable for upsampling"""
    factor = (kernel_size + 1) // 2
    if kernel_size % 2 == 1:
        center = factor - 1
    else:
        center = factor - 0.5
    og = np.ogrid[:kernel_size, :kernel_size]
    filt = (1 - abs(og[0] - center) / factor) * \
           (1 - abs(og[1] - center) / factor)
    weight = np.zeros((in_channels, out_channels, kernel_size, kernel_size),
                      dtype=np.float64)
    weight[range(in_channels), range(out_channels), :, :] = filt
    return torch.from_numpy(weight).float()


class FCN32s(nn.Module):

    def __init__(self, n_class=21, pretrained=False, vgg=None):
        super(FCN32s, self).__init__()
        # conv1
        self.conv1_1 = nn.Conv2d(3, 64, 3, padding=100)
        self.relu1_1 = nn.ReLU(inplace=True)
        self.conv1_2 = nn.Conv2d(64, 64, 3, padding=1)
        self.relu1_2 = nn.ReLU(inplace=True)
        self.pool1 = nn.MaxPool2d(2, stride=2, ceil_mode=True)  # 1/2

        # conv2
        self.conv2_1 = nn.Conv2d(64, 128, 3, padding=1)
        self.relu2_1 = nn.ReLU(inplace=True)
        self.conv2_2 = nn.Conv2d(128, 128, 3, padding=1)
        self.relu2_2 = nn.ReLU(inplace=True)
        self.pool2 = nn.MaxPool2d(2, stride=2, ceil_mode=True)  # 1/4

        # conv3
        self.conv3_1 = nn.Conv2d(128, 256, 3, padding=1)
        self.relu3_1 = nn.ReLU(inplace=True)
        self.conv3_2 = nn.Conv2d(256, 256, 3, padding=1)
        self.relu3_2 = nn.ReLU(inplace=True)
        self.conv3_3 = nn.Conv2d(256, 256, 3, padding=1)
        self.relu3_3 = nn.ReLU(inplace=True)
        self.pool3 = nn.MaxPool2d(2, stride=2, ceil_mode=True)  # 1/8

        # conv4
        self.conv4_1 = nn.Conv2d(256, 512, 3, padding=1)
        self.relu4_1 = nn.ReLU(inplace=True)
        self.conv4_2 = nn.Conv2d(512, 512, 3, padding=1)
        self.relu4_2 = nn.ReLU(inplace=True)
        self.conv4_3 = nn.Conv2d(512, 512, 3, padding=1)
        self.relu4_3 = nn.ReLU(inplace=True)
        self.pool4 = nn.MaxPool2d(2, stride=2, ceil_mode=True)  # 1/16

        # conv5
        self.conv5_1 = nn.Conv2d(512, 512, 3, padding=1)
        self.relu5_1 = nn.ReLU(inplace=True)
        self.conv5_2 = nn.Conv2d(512, 512, 3, padding=1)
        self.relu5_2 = nn.ReLU(inplace=True)
        self.conv5_3 = nn.Conv2d(512, 512, 3, padding=1)
        self.relu5_3 = nn.ReLU(inplace=True)
        self.pool5 = nn.MaxPool2d(2, stride=2, ceil_mode=True)  # 1/32

        # fc6
        self.fc6 = nn.Conv2d(512, 4096, 7)
        self.relu6 = nn.ReLU(inplace=True)
        self.drop6 = nn.Dropout2d()

        # fc7
        self.fc7 = nn.Conv2d(4096, 4096, 1)
        self.relu7 = nn.ReLU(inplace=True)
        self.drop7 = nn.Dropout2d()

        self.score_fr = nn.Conv2d(4096, n_class, 1)
        self.upscore = nn.ConvTranspose2d(n_class, n_class, 64, stride=32,
                                          bias=False)

        self._initialize_weights()
        if pretrained:
            self.copy_params_from_vgg16(vgg)
            torch.nn.init.xavier_uniform(self.score_fr.weight)

    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                m.weight.data.zero_()
                if m.bias is not None:
                    m.bias.data.zero_()
            if isinstance(m, nn.ConvTranspose2d):
                assert m.kernel_size[0] == m.kernel_size[1]
                initial_weight = get_upsampling_weight(
                    m.in_channels, m.out_channels, m.kernel_size[0])
                m.weight.data.copy_(initial_weight)

    def forward(self, x):
        h = x
        print("input size : ", h.data.size())
        print("value: ", h[0][0][0][:3])
        h = self.relu1_1(self.conv1_1(h))
        print("conv1_1 output size : ", h.data.size(), " -> 1")
        h = self.relu1_2(self.conv1_2(h))
        h = self.pool1(h)
        print("conv1 output size : ", h.data.size(), " -> 1/2")
        print("value: ", h[0][0][0][:3])

        h = self.relu2_1(self.conv2_1(h))
        h = self.relu2_2(self.conv2_2(h))
        h = self.pool2(h)
        print("conv2 output size : ", h.data.size(), " -> 1/4")
        print("value: ", h[0][0][0][:3])

        h = self.relu3_1(self.conv3_1(h))
        h = self.relu3_2(self.conv3_2(h))
        h = self.relu3_3(self.conv3_3(h))
        h = self.pool3(h)
        print("conv3 output size : ", h.data.size(), " -> 1/8")
        print("value: ", h[0][0][0][:3])

        h = self.relu4_1(self.conv4_1(h))
        h = self.relu4_2(self.conv4_2(h))
        h = self.relu4_3(self.conv4_3(h))
        h = self.pool4(h)
        print("conv4 output size : ", h.data.size(), " -> 1/16")
        print("value: ", h[0][0][0][:3])

        h = self.relu5_1(self.conv5_1(h))
        h = self.relu5_2(self.conv5_2(h))
        h = self.relu5_3(self.conv5_3(h))
        h = self.pool5(h)
        print("conv5 output size : ", h.data.size(), " -> 1/32")
        print("value: ", h[0][0][0][:3])

        h = self.relu6(self.fc6(h))
        h = self.drop6(h)
        print("fc6 output size : ", h.data.size())
        print("value: ", h[0][0][0][:3])

        h = self.relu7(self.fc7(h))
        h = self.drop7(h)
        print("fc7 output size : ", h.data.size())
        print("value: ", h[0][0][0][:3])

        h = self.score_fr(h)
        print("score_fr output size : ", h.data.size(), " -> prediction for n classes")

        h = self.upscore(h)
        print("upscore output size : ", h.data.size(), " -> 1")

        # h = h[:, :, 19:19 + x.size()[2], 19:19 + x.size()[3]].contiguous()
        # print("with offset 19 output size : ", h.data.size())

        h = h[:, :, 30:30 + x.size()[2], 30:30 + x.size()[3]].contiguous()
        print("with offset 22 output size : ", h.data.size())

        return h

    def copy_params_from_vgg16(self, vgg16):
        features = [
            self.conv1_1, self.relu1_1,
            self.conv1_2, self.relu1_2,
            self.pool1,
            self.conv2_1, self.relu2_1,
            self.conv2_2, self.relu2_2,
            self.pool2,
            self.conv3_1, self.relu3_1,
            self.conv3_2, self.relu3_2,
            self.conv3_3, self.relu3_3,
            self.pool3,
            self.conv4_1, self.relu4_1,
            self.conv4_2, self.relu4_2,
            self.conv4_3, self.relu4_3,
            self.pool4,
            self.conv5_1, self.relu5_1,
            self.conv5_2, self.relu5_2,
            self.conv5_3, self.relu5_3,
            self.pool5,
        ]
        for l1, l2 in zip(vgg16.features, features):
            if isinstance(l1, nn.Conv2d) and isinstance(l2, nn.Conv2d):
                assert l1.weight.size() == l2.weight.size()
                assert l1.bias.size() == l2.bias.size()
                l2.weight.data = l1.weight.data
                l2.bias.data = l1.bias.data
        for i, name in zip([0, 3], ['fc6', 'fc7']):
            l1 = vgg16.classifier[i]
            l2 = getattr(self, name)
            l2.weight.data = l1.weight.data.view(l2.weight.size())
            l2.bias.data = l1.bias.data.view(l2.bias.size())