# Make sure using PYTORCH=1.0.x to run this code!

In [0]:
%%bash
pip uninstall pytorch torchvision -y
pip uninstall torch -y
pip uninstall torch -y  # yes twice

pip install https://download.pytorch.org/whl/cu100/torch-1.0.1.post2-cp36-cp36m-linux_x86_64.whl
pip install torchvision

In [4]:
import torch
print(torch.__version__)
import sys
print(sys.version)

1.0.1.post2 Do not consider this number. Python is telling bullshit!
3.6.7 (default, Oct 22 2018, 11:32:17) 
[GCC 8.2.0]


# Clone required repos

In [0]:
! git clone https://github.com/Nikronic/ObjectNet.git
% cd ObjectNet/
! git clone https://github.com/Nikronic/CoarseNet.git

In [0]:
# ! git clone https://github.com/Nikronic/EdgeNet.git

# Download pretrained weights of ObjectNet

In [0]:
%%bash

# Image and model names
TEST_IMG=ADE_val_00001519.jpg
MODEL_PATH=baseline-resnet101dilated-ppm_deepsup
RESULT_PATH=./

ENCODER=$MODEL_PATH/encoder_epoch_25.pth
DECODER=$MODEL_PATH/decoder_epoch_25.pth

# Download model weights and image
if [ ! -e $MODEL_PATH ]; then
  mkdir $MODEL_PATH
fi
if [ ! -e $ENCODER ]; then
  wget -P $MODEL_PATH http://sceneparsing.csail.mit.edu/model/pytorch/$ENCODER
fi
if [ ! -e $DECODER ]; then
  wget -P $MODEL_PATH http://sceneparsing.csail.mit.edu/model/pytorch/$DECODER
fi
if [ ! -e $TEST_IMG ]; then
  wget -P $RESULT_PATH http://sceneparsing.csail.mit.edu/data/ADEChallengeData2016/images/validation/$TEST_IMG
fi

# Inference
python3 -u test.py \
  --model_path $MODEL_PATH \
  --test_imgs $TEST_IMG \
  --suffix '_epoch_25.pth' \
  --arch_encoder resnet101dilated \
  --arch_decoder ppm_deepsup \
  --fc_dim 2048 \
  --result $RESULT_PATH


# SubModules

In [0]:
# %% libraries
import torch.nn as nn
import torch


class CoarseLoss(nn.Module):
    def __init__(self, w1=50, w2=1):
        """
        A weighted sum of pixel-wise L1 loss and sum of L2 loss of Gram matrices.

        :param w1: weight of L1  (pixel-wise)
        :param w2: weight of L2 loss (Gram matrix)
        """
        super(CoarseLoss, self).__init__()
        self.w1 = w1
        self.w2 = w2
        self.l1 = nn.L1Loss(reduction='mean')
        self.l2 = nn.MSELoss(reduction='sum')

    # reference: https://github.com/pytorch/tutorials/blob/master/advanced_source/neural_style_tutorial.py
    @staticmethod
    def gram_matrix(mat):
        """
        Return Gram matrix

        :param mat: A matrix  (a=batch size(=1), b=number of feature maps,
        (c,d)=dimensions of a f. map (N=c*d))
        :return: Normalized Gram matrix
        """
        a, b, c, d = mat.size()
        features = mat.view(a * b, c * d)
        gram = torch.mm(features, features.t())
        return gram.div(a * b * c * d)

    def forward(self, y, y_pred):
        loss = self.w1 * self.l1(y, y_pred) + \
               self.w2 * self.l2(self.gram_matrix(y), self.gram_matrix(y_pred))
        return loss


In [0]:
# %% libraries
import PIL.Image as Image
import numpy.matlib
import numpy as np
import random
import math

# %% functions
dithMat =[
    # 8x8 sprial
    [[62, 58, 45, 41, 37, 49, 53, 61],
     [54, 34, 25, 21, 17, 29, 33, 57],
     [ 50, 30, 13,  9,  5, 12, 24, 44],
     [ 38, 18,  6,  1,  0,  8, 20, 40],
     [42, 22, 10, 2, 3, 4, 16, 36],
     [46, 26, 14, 7, 11, 15, 28, 48],
     [59, 35, 31, 19, 23, 27, 32, 52],
     [ 63, 55, 51, 39, 43, 47, 56, 60]],
    # 8x8 dispresed
    [[ 1, 30, 8, 28, 2, 29, 7, 27],
     [ 17, 9, 24, 16, 18, 10, 23, 15],
     [ 5, 25, 3, 32, 6, 26, 4, 31],
     [ 21, 13, 19, 11, 22, 14, 20, 12],
     [ 2, 29, 7, 27, 1, 30, 8, 28],
     [ 18, 10, 23, 15, 17, 9, 24, 16],
     [ 6, 26, 4, 31, 5, 25, 3, 32],
     [ 22, 14, 20, 12, 21, 13, 19, 11]],
    # 8X8 octa_dot
    [[ 45, 17, 25, 37, 47, 19, 27, 39],
     [ 49, 1, 9, 57, 51, 3, 11, 59],
     [ 29, 33, 41, 21, 31, 35, 43, 23],
     [ 13, 61, 53, 5, 15, 63, 55, 7],
     [ 48, 20, 28, 40, 46, 18, 26, 38],
     [ 52, 4, 12, 60, 50, 2, 10, 58],
     [ 32, 36, 44, 24, 30, 34, 42, 22],
     [ 16, 64, 56, 8, 14, 62, 54, 6]],
    # 5x5 diamond
    [[ 5, 118, 160, 58, 17],
     [ 48, 201, 232, 170, 99],
     [ 129, 211, 252, 242, 150],
     [ 89, 191, 221, 181, 68],
     [ 38, 78, 140, 108, 27]],
    # 5x5 clockwise sprial
    [[3, 10, 16, 11, 4],
     [ 9, 20, 21, 17, 12],
     [ 15, 24, 25, 22, 13],
     [ 8, 19, 23, 18, 5],
     [ 2, 7, 14, 6, 1]],
    # 4x4 ordered 
    [[ 5, 9, 6, 10],
     [ 13, 1, 14, 2],
     [ 7 ,11, 4, 8],
     [ 15, 3, 12, 0]],
]


def get_resDmat(channel_size,dithMat):
    newSzY,newSzX = channel_size[1],channel_size[0]
    minDmat = min(min(dithMat))
    maxDmat = max(max(dithMat))
    nbOfIntervals = maxDmat-minDmat+2
    singleInterval = 255/nbOfIntervals
    scaledDithMat = np.multiply(np.subtract(dithMat , minDmat+1),singleInterval)
    scaledDithMat = scaledDithMat.astype(int)


    dmatSzY, dmatSzX = len(scaledDithMat),len(scaledDithMat[0])
    nX = math.ceil(newSzX / dmatSzX) 
    nY = math.ceil(newSzY / dmatSzY)
    resDmat = np.matlib.repmat(scaledDithMat.astype(int), nY, nX)[:newSzY,:newSzX]
    return resDmat


def generate_halftone(im):
    cmyk_im = im.convert('CMYK')
    dithMat_sample = dithMat[random.randint(0, len(dithMat) - 1)]
    cmyk = cmyk_im.split()
    angles = [[ 15, 45, 0, 75],
              [ 45, 15, 0, 75],
              [ 0, 0, 0, 0]]

    angles = angles[random.randint(0, len(angles) - 1)]
    if cmyk[0] == cmyk[1] == cmyk[2] :
        angles = angles[:1]*4
    dots = []
    for x,i in enumerate(cmyk):
        channel_Rotation = i.rotate(angles[x], expand=1)
        channel = np.asarray(channel_Rotation) > get_resDmat(channel_Rotation.size,dithMat_sample)
        channel = Image.fromarray((channel * 255).astype('uint8')).convert('L').rotate(-angles[x], expand=1)
        # https://stackoverflow.com/questions/27622834/write-numpy-ndarray-to-image
        # reason of casting to 'uint8'
        w,h = channel.size
        im_x,im_y = i.size
        x1 = (w-im_x)/2
        y1 = (h-im_y)/2
        channel = channel.crop((x1, y1, x1+im_x, y1+im_y))
        dots.append(channel)
    
    halftoned_im = Image.merge('CMYK',dots)
    return halftoned_im.convert('RGB')


# %% test
# im = Image.open('data/Places365_val_00000001.jpg')
# imh = generate_halftone(im)
# imh.show()


In [0]:
from __future__ import print_function, division
from PIL import Image
from skimage import feature, color
from torchvision.transforms import ToTensor, ToPILImage
import numpy as np
import random

import tarfile
import io
import os
import pandas as pd

from torch.utils.data import Dataset
import torch

# from utils.Halftone.halftone import generate_halftone


class PlacesDataset(Dataset):
    def __init__(self, txt_path='filelist.txt', img_dir='data', transform=None):
        """
                Initialize data set as a list of IDs corresponding to each item of data set

                :param img_dir: path to image files as a uncompressed tar archive
                :param txt_path: a text file containing names of all of images line by line
                :param transform: apply some transforms like cropping, rotating, etc on input image

                :return a 3-value dict containing input image (y_descreen) as ground truth, input image X as halftone image
                        and edge-map (y_edge) of ground truth image to feed into the network.
                """

        df = pd.read_csv(txt_path, sep=' ', index_col=0)
        self.img_names = df.index.values
        self.txt_path = txt_path
        self.img_dir = img_dir
        self.transform = transform
        self.to_tensor = ToTensor()
        self.to_pil = ToPILImage()
        self.get_image_selector = True if img_dir.__contains__('tar') else False
        self.tf = tarfile.open(self.img_dir) if self.get_image_selector else None

    def get_image_from_tar(self, name):
        """
        Gets a image by a name gathered from file list csv file

        :param name: name of targeted image
        :return: a PIL image
        """
        image = self.tf.extractfile(name)
        image = image.read()
        image = Image.open(io.BytesIO(image))
        return image

    def get_image_from_folder(self, name):
        """
        gets a image by a name gathered from file list text file

        :param name: name of targeted image
        :return: a PIL image
        """

        image = Image.open(os.path.join(self.img_dir, name))
        return image

    def __len__(self):
        """
        Return the length of data set using list of IDs

        :return: number of samples in data set
        """
        return len(self.img_names)

    def __getitem__(self, index):
        """
        Generate one item of data set. Here we apply our preprocessing things like halftone styles and
        subtractive color process using CMYK color model, generating edge-maps, etc.

        :param index: index of item in IDs list

        :return: a sample of data as a dict
        """

        if index == (self.__len__() - 1) and self.get_image_selector:  # close tarfile opened in __init__
            self.tf.close()

        if self.get_image_selector:  # note: we prefer to extract then process!
            y_descreen = self.get_image_from_tar(self.img_names[index])
        else:
            y_descreen = self.get_image_from_folder(self.img_names[index])

        # generate halftone image
        X = generate_halftone(y_descreen)

        # generate edge-map
        y_edge = self.canny_edge_detector(y_descreen)

        if self.transform is not None:
            X = self.transform(X)
            y_descreen = self.transform(y_descreen)
            y_edge = self.transform(y_edge)

        sample = {'X': X,
                  'y_descreen': y_descreen,
                  'y_edge': y_edge}

        return sample

    def canny_edge_detector(self, image):
        """
        Returns a binary image with same size of source image which each pixel determines belonging to an edge or not.

        :param image: PIL image
        :return: Binary numpy array
        """
        if type(image) == torch.Tensor:
            image = self.to_pil(image)
        image = image.convert(mode='L')
        image = np.array(image)
        edges = feature.canny(image, sigma=1)  # TODO: the sigma hyper parameter value is not defined in the paper.
        size = edges.shape[::-1]
        databytes = np.packbits(edges, axis=1)
        edges = Image.frombytes(mode='1', size=size, data=databytes)
        return edges


# https://discuss.pytorch.org/t/adding-gaussion-noise-in-cifar10-dataset/961/2
class RandomNoise(object):
    def __init__(self, p, mean=0, std=1):
        self.p = p
        self.mean = mean
        self.std = std

    def __call__(self, img):
        if random.random() <= self.p:
            return img.clone().normal_(self.mean, self.std)
        return img

# TRAIN.py  - - - - - - - - - - - - - - - CoarseNet-ObjectNet

In [36]:


###########################
# System libs
import os
import argparse
from distutils.version import LooseVersion
# Numerical libs
import numpy as np
import torch
import torch.nn as nn
from scipy.io import loadmat
# Our libs

from models import ModelBuilder, SegmentationModule
from utils import colorEncode
from lib.nn import user_scattered_collate, async_copy_to
from lib.utils import as_numpy
import lib.utils.data as torchdata
import cv2
from tqdm import tqdm

###########################

# %% import library
from CoarseNet import CoarseNet
from torchvision.transforms import Compose, ToPILImage, ToTensor, RandomResizedCrop, RandomRotation, \
    RandomHorizontalFlip
# from CoarseNet.utils.preprocess import *
import torch
from torch.utils.data import DataLoader
# from CoarseNet.utils.Loss import CoarseLoss

import torch.optim as optim
import torch.nn as nn
from torch.backends import cudnn

#################################


device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")


# %% define data sets and their loaders
custom_transforms = Compose([
    RandomResizedCrop(size=224, scale=(0.8, 1.2)),
    RandomRotation(degrees=(-30, 30)),
    RandomHorizontalFlip(p=0.5),
    ToTensor(),
    RandomNoise(p=0.5, mean=0, std=0.1)])

train_dataset = PlacesDataset(txt_path='CoarseNet/filelist.txt',
                              img_dir='CoarseNet/data',
                              transform=custom_transforms)

train_loader = DataLoader(dataset=train_dataset,
                          batch_size=128,
                          shuffle=True,
                          num_workers=6,
                          pin_memory= False)

test_dataset = PlacesDataset(txt_path='CoarseNet/filelist.txt',
                             img_dir='CoarseNet/data',
                             transform=ToTensor())

test_loader = DataLoader(dataset=test_dataset,
                         batch_size=128,
                         shuffle=False,
                         num_workers=0,
                         pin_memory=False)


# %% initialize network, loss and optimizer
def init_weights(m):
    """
    Initialize weights of layers using Kaiming Normal (He et al.) as argument of "Apply" function of
    "nn.Module"

    :param m: Layer to initialize
    :return: None
    """

    if isinstance(m, nn.Conv2d) or isinstance(m, nn.ConvTranspose2d):
        torch.nn.init.kaiming_normal_(m.weight, mode='fan_in')
        m.bias.data.fill_(0.0)
    elif isinstance(m, nn.BatchNorm2d):  # reference: https://github.com/pytorch/pytorch/issues/12259
        nn.init.constant_(m.weight, 1)
        nn.init.constant_(m.bias, 0)


def show_test(image_batch):
    """
    Get a batch of images of torch.Tensor type and show them as a single gridded PIL image

    :param image_batch: A Batch of torch.Tensor contain images
    :return: An array of PIL images
    """
    to_pil = ToPILImage()
    fs = []
    for i in range(len(image_batch)):
        img = to_pil(image_batch[i].cpu())
        fs.append(img)
    x, y = fs[0].size
    ncol = 3
    nrow = 3
    cvs = Image.new('RGB', (x * ncol, y * nrow))
    for i in range(len(fs)):
        px, py = x * int(i / nrow), y * (i % nrow)
        cvs.paste((fs[i]), (px, py))
    cvs.save('out.png', format='png')
    cvs.show()
    return fs
  
  
  
def test_model(net, data_loader):
    """
    Return loss on test

    :param net: The trained NN network
    :param data_loader: Data loader containing test set
    :return: Print loss value over test set in console
    """
    
    coarsenet = net[0]
    objectnet = net[1]
    
    coarsenet.train()
    objectnet.eval()
    
    running_loss = 0.0
    with torch.no_grad():
        for data in data_loader:
            X = data['X']
            y_d = data['y_descreen']
            X = X.to(device)
            y_d = y_d.to(device)
            outputs = coarsenet(X)
            output_obj=objectnet(outputs)
            loss = criterion(output_obj, y_d)
            running_loss += loss

            print('loss: %.3f' % running_loss)
    return outputs, output_obj
  
        
def train_model(net, data_loader, optimizer, criterion, epochs=10):
    """
    Train model

    :param net: Parameters of defined neural network
    :param data_loader: A data loader object defined on train data set
    :param epochs: Number of epochs to train model
    :param optimizer: Optimizer to train network
    :param criterion: The loss function to minimize by optimizer
    :return: None
    """

    coarsenet = net[0]
    objectnet = net[1]
    
    coarsenet.train()
    objectnet.eval()
    for epoch in range(epochs): 

        running_loss = 0.0
        for i, data in enumerate(data_loader, 0):
            
            X = data['X']
            y_d = data['y_descreen']

            X = X.to(device)
            y_d = y_d.to(device)

            optimizer.zero_grad()
            
            outputs = coarsenet(X)
            
            output_obj = objectnet(outputs)
            
            loss = criterion(output_obj, y_d)
            loss.backward()
            optimizer.step()

            # print statistics
            running_loss += loss.item()

            print(epoch + 1, ',', i + 1, 'loss:', running_loss)
    print('Finished Training')    
    

    
criterion = CoarseLoss(w1=50, w2=1)
coarsenet = CoarseNet.CoarseNet().to(device)
optimizer = optim.Adam(coarsenet.parameters(), lr=0.0001)
coarsenet.apply(init_weights)

builder = ModelBuilder()
net_encoder = builder.build_encoder(
    arch='resnet101dilated',
    fc_dim=2048,
    weights=os.path.join('baseline-resnet101dilated-ppm_deepsup', 'encoder' + '_epoch_25.pth'))
net_decoder = builder.build_decoder(
    arch='ppm_deepsup',
    fc_dim=2048,
    num_class=150,
    weights=os.path.join('baseline-resnet101dilated-ppm_deepsup', 'decoder' + '_epoch_25.pth'),
    use_softmax=True)

    

segmentation_module = SegmentationModule(net_encoder, net_decoder, criterion)
segmentation_module.cuda()


train_model([coarsenet, segmentation_module], train_loader, optimizer, criterion, epochs=20)
show_test(test_model([coarsenet, segmentation_module], test_loader)[0])
show_test(test_model([coarsenet, segmentation_module], test_loader)[1])

Loading weights for net_encoder
Loading weights for net_decoder


IndexError: ignored

# 

In [0]:
! mkdir data
! tar -xvf data.tar -C data

In [21]:
! pwd
% cd ..

/content/ObjectNet/CoarseNet
/content/ObjectNet


In [34]:
pwd

'/content/ObjectNet'