# Grayscale Image Converstion
Zhang et al. –– https://link.springer.com/chapter/10.1007%2F978-3-319-46487-9_40 <br />
Luke Melas dataset –– https://lukemelas.github.io/image-colorization.html

In [1]:
# Add official website of pytorch
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torch.utils.data import sampler

import torchvision.datasets as dset
import torchvision.transforms as T
from torch.autograd import Variable
from torchvision import datasets, transforms
import torchvision.models as models
import torch.nn.functional as F

import numpy as np
import sys
import os, time, shutil, argparse

import matplotlib.pyplot as plt
from skimage.color import rgb2lab, rgb2gray, lab2rgb
plt.switch_backend('agg')


# Download and Build Dataset <br />
Using the MIT database for LAB images, we import 40,000 training images and 1,000 testing and validation images




In [2]:
# # %cd / Kaggle
# !wget http://data.csail.mit.edu/places/places205/testSetPlaces205_resize.tar.gz

# !tar -xzf testSetPlaces205_resize.tar.gz


# # Move data into training and validation directories
# print('MOVING DIRECTORIES')

# os.makedirs('images/train/class/', exist_ok=True) # 40,000 images
# os.makedirs('images/val/class/', exist_ok=True)   #  1,000 images

# for i, file in enumerate(os.listdir('testSet_resize')):
#   if i < 1000: # first 1000 will be val
#       os.rename('testSet_resize/' + file, 'images/val/class/' + file)
#   else: # others will be val
#       os.rename('testSet_resize/' + file, 'images/train/class/' + file)
# print('done')

In [3]:
USE_GPU = True

dtype = torch.float32 # we will be using float throughout this tutorial

if USE_GPU and torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')

# Constant to control how frequently we print train loss
print_every = 100

print('using device:', device)

using device: cuda


# Run Transformations, Grayscale Converter, and DataLoader  <br />
### For this grayscale dataset, run a random horizontal flip
Note: The torchvision.transforms package provides tools for preprocessing data
and for performing data augmentation; here we set up a transform to
preprocess the data by flipping images at random inxedes.




### Grayscale <br />
Our grayscale (Gray) class will convert each of the  LAB images into a lightness level image (provided by Luke Melas at MIT). The class also returns the ogirinal image and the AB qauntized zone. The class is called in the LAB data_loader class

In [4]:
train_transforms = transforms.Compose([transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip()])

val_transforms = transforms.Compose([transforms.Resize(256), transforms.CenterCrop(224)])

In [5]:
class Gray(datasets.ImageFolder):
    def __getitem__(self, index):
        path, target = self.imgs[index]
        
        img = self.loader(path)

        if self.transform is not None:
            img_original = self.transform(img)
            img_original = np.asarray(img_original)
            img_lab = rgb2lab(img_original)
            img_lab = (img_lab + 128) / 255
            img_ab = img_lab[:, :, 1:3]
            img_ab = torch.from_numpy(img_ab.transpose((2, 0, 1))).float()
            img_original = rgb2gray(img_original)
            img_original = torch.from_numpy(img_original).unsqueeze(0).float()
        if self.target_transform is not None:
            target = self.target_transform(target)
        return (img_original, img_ab, target)

In [6]:
## builds our data loaders
class LAB():
  def __init__(self, train_directory, val_directory):
    # split training data
    train=Gray(train_directory,train_transforms)
    self.loader_train=DataLoader(train,  batch_size=15, shuffle=True)

    # split testing data
    test=Gray(testing_directory,val_transforms)
    self.loader_test=DataLoader(train,batch_size=15, shuffle=False)

  def give(self):
    train, load =  self.loader_train, self.loader_test
    return train, load


In [7]:
train_directory= os.path.join('.', 'images/train/')
testing_directory= os.path.join('.', 'images/val/')

train_loader, val_loader = LAB(train_directory, testing_directory).give()

In [8]:
print('Number of Batches in Training: ', len(train_loader))

print('\nTraining Data:')
print(train_loader.dataset)


print('\nSize of Each Image: ', len(train_loader.dataset[0][0][0]))
input_n = len(train_loader.dataset[0][0][0])

Number of Batches in Training:  2667

Training Data:
Dataset Gray
    Number of datapoints: 40000
    Root location: ./images/train/
    StandardTransform
Transform: Compose(
               RandomResizedCrop(size=(224, 224), scale=(0.08, 1.0), ratio=(0.75, 1.3333), interpolation=PIL.Image.BILINEAR)
               RandomHorizontalFlip(p=0.5)
           )

Size of Each Image:  224


# Defining the Model and Helper Functions
Now that our data had been downloaded, transformed, and finally loaded, let us begin defining our model. First, we must define out helper function to help gage accuracy, or in the case of Colorization, display hallucinated images at random indexes in the testing dataset.

### Helper Functions
The below class will be used to display the hallucinated images, the probability distribution of it's quantized zone (predicted AB values), the original image, and the original AB values

## Here we define our model
We are not using class rebalancing below, just simply running through the model during training and then calculating the resulting color distribution. <br />
> * Richard Zhang et al. recomended the use of cross entropy loss to measure difference (after color probability layer) to account for differnces in the ab layer
> * Don't use softmax before cross entropy loss 
  * Doing so could increases chance of overflow encounter

### Sources
Understanding how PyTorch determines layer size
  * https://towardsdatascience.com/pytorch-layer-dimensions-what-sizes-should-they-be-and-why-4265a41e01fd





In [9]:
class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()

    ## LIGHTNESS LEVEL LEARNERS
        self.layer1 = nn.Sequential(nn.Conv2d(1, 64, kernel_size=3, stride=1, padding=1, bias=True), nn.ReLU(),nn.Conv2d(64, 64, kernel_size=3, stride=2, padding=1, bias=True),nn.ReLU(), nn.BatchNorm2d(64))

        self.layer2 = nn.Sequential(nn.Conv2d(64,128, kernel_size=3, stride=1, padding=1, bias=True),nn.ReLU(),nn.Conv2d(128,128, kernel_size=3, stride=2, padding=1, bias=True),nn.ReLU(), nn.BatchNorm2d(128))

        self.layer3 = nn.Sequential(nn.Conv2d(128,256, kernel_size=3, stride=1, padding=1, bias=True),nn.ReLU(),nn.Conv2d(256,256,kernel_size=3, stride=1, padding=1, bias=True),nn.ReLU(),nn.Conv2d(256,256, kernel_size=3, stride=2, padding=1, bias=True),nn.ReLU(),nn.BatchNorm2d(256))

        self.layer4 = nn.Sequential(nn.Conv2d(256,512, kernel_size=3, stride=1, padding=1, bias=True), nn.ReLU(),nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1, bias=True),nn.ReLU(), nn.Conv2d(512,512, kernel_size=3, stride=1, padding=1, bias=True),nn.ReLU(),nn.BatchNorm2d(512))

        self.layer5 = nn.Sequential(nn.Conv2d(512,512, kernel_size=3,dilation=2, stride=1, padding=2, bias=True),nn.ReLU(),nn.Conv2d(512, 512, kernel_size=3, dilation=2, stride=1, padding=2, bias=True),nn.ReLU(),nn.Conv2d(512,512 ,kernel_size=3,dilation=2, stride=1, padding=2, bias=True),nn.ReLU(),nn.BatchNorm2d(512))     

        self.layer6 = nn.Sequential(nn.Conv2d(512,512, kernel_size=3,dilation=2, stride=1, padding=2, bias=True),nn.ReLU(),nn.Conv2d(512, 512, kernel_size=3, dilation=2, stride=1, padding=2, bias=True),nn.ReLU(),nn.Conv2d(512,512 ,kernel_size=3,dilation=2, stride=1, padding=2, bias=True),nn.ReLU(),nn.BatchNorm2d(512))

        self.layer7 = nn.Sequential(nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1, bias=True),nn.ReLU(True),nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1, bias=True),nn.ReLU(True),nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1, bias=True),nn.ReLU(True),nn.BatchNorm2d(512))
        
        self.layer8 = nn.Sequential(nn.ConvTranspose2d(512, 256 ,kernel_size=4, stride=2, padding=1, bias=True),nn.ReLU(),nn.Conv2d(256,256, kernel_size=3, stride=1, padding=1, bias=True),nn.ReLU(),nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1, bias=True),nn.ReLU())

#           model8=[nn.ConvTranspose2d(512, 256, kernel_size=4, stride=2, padding=1, bias=True),]
#         model8+=[nn.ReLU(True),]
#         model8+=[nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1, bias=True),]
#         model8+=[nn.ReLU(True),]
#         model8+=[nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1, bias=True),]
#         model8+=[nn.ReLU(True),]
        
    ## Predictions of AB values
#     nn.Conv2d(256, 313, kernel_size=1, stride=1, padding=0, bias=True),]
        self.prob = nn.Sequential(nn.ConvTranspose2d(256,313, kernel_size=1, stride=1, padding=0, bias=True),nn.ReLU())

        self.model_out = nn.Conv2d(313, 2, kernel_size=1, padding=0, dilation=1, stride=1, bias=False)

        self.softmax = nn.Softmax()

        # # self.prob = nn.Conv2d()



        
        self.upsample4 = nn.Upsample(scale_factor=4, mode='bilinear')


    def forward(self, x):
        temperature = 0.38

        one = self.layer1(x)
        two = self.layer2(one)
        three = self.layer3(two)
        four = self.layer4(three)
        five = self.layer5(four)
 
    
        # upward = self.layer5(self.layer4(self.layer3(self.layer2(temp))))
        dilate = self.layer8(self.layer7(self.layer6(five)))/temperature
        
        prob = self.prob(dilate)
        

        image = self.softmax(self.upsample4(self.model_out(prob)))

        
        return image



# Train and Optimize our Model
Some info on computing a loss function –– https://neptune.ai/blog/pytorch-loss-functions
* Note: CrossEntropy Loss is best for computing error between 2 probability distributions and heavily penalizes high confidence

In [10]:
def train(train_loader, net, criterion, optimizer, epoch):
    long_loss = []

    for epoch in range(epochs):  # loop over the dataset multiple times


        running_loss = 0.0
        for i, (input_gray, input_ab, target)in enumerate(train_loader, 0):
          
          # move data to the GPU
            input_gray, input_ab, target = input_gray.cuda(), input_ab.cuda(), target.cuda()
#             print('iteration',i)

            # forward and loss
            output_ab = net(input_gray)

#             global temp
#             temp = input_ab
            target = input_ab.flatten()
            input =output_ab.flatten()



            loss = criterion(input, target)
#             print('loss calculated')


            # backward and optimize –– zero the parameter gradients
#             print('training')
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # print statistics
            long_loss.append(loss)
            running_loss += loss.item()
            if i % 2000 == 1999:    # print every 2000 mini-batches
                print('[%d, %5d] loss: %.3f' %
                    (epoch + 1, i + 1, running_loss / 2000))
                running_loss = 0.0

    print('Finished Training')
    return long_loss
                               
                                               # target needs to be size NxNumClasses
                # labels needs to be size N
                    # Note: flatten both tensors in order to create N
            
#           target = input_ab.long().flatten().reshape([15,224*2, 224])[0]
#           input = output_ab.reshape([15,224*2, 224])[0]

In [None]:
colorizer = Model()
colorizer.cuda()

criterion = nn.MSELoss()
optimizer = optim.SGD(colorizer.parameters(), lr=0.001, momentum=0.9)
epochs = 1


graph = train(train_loader, colorizer, criterion, optimizer,  epochs)

  "See the documentation of nn.Upsample for details.".format(mode))


[1,  2000] loss: 0.004


In [None]:
# temp.size()

In [None]:
# a = temp[0,0,:,:]
# a.size()

In [None]:
# a