# CS470 - Introduction to Artificial Intelligence
## Project : Colorizing grayscale images

Authors: Ayoub Mellah 20196411, Quentin Nieloud 20196414, Malek Neila Rostom 20196507, Pablo Chabance 20196417



---



####Connection to Drive

In [81]:
from google.colab import drive

drive.mount('/gdrive')
gdrive_root = '/gdrive/My Drive/Colorize'
gdrive_data = '/gdrive/My Drive/Colorize/data'

Drive already mounted at /gdrive; to attempt to forcibly remount, call drive.mount("/gdrive", force_remount=True).


####Import librairies

In [0]:
import torch
import torchvision.models as models
from PIL import Image as image_pil
import torchvision.transforms as transforms 
import torch.nn as nn 
from skimage import io, color
from skimage.color import rgb2lab, lab2rgb, rgb2gray, gray2rgb
from skimage.io import imsave
from torchvision import datasets
import torch.nn.functional as F
import torch.optim as optim
import numpy as np
import os
import copy
import glob
from torchvision.models import resnet152
from torchvision import transforms
from PIL import Image
from torch.utils.data import DataLoader

#### Hyper-Parameters


In [0]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

max_epoch = 200
batch_size = 10
learning_rate = 0.001

training_process = True

####Construct Data Pipeline


In [0]:
class GrayscaleImageFolder(datasets.ImageFolder): #Data preprocessing
  def __getitem__(self, index):
    path, target = self.imgs[index]
    img = self.loader(path)
    if self.transform is not None: #apply transformations if available
      img_l = self.transform(img)
      img_embed = img.resize((299, 299)) 
      img_embed = np.asarray(img_embed) #Convert image to numpy array
      img_embed = gray2rgb(rgb2gray(img_embed)) #transform rgb image to gray , then grayscale image to rgb ==> values of gray in 3 dimensions 
      img_embed = torch.from_numpy(img_embed).unsqueeze(0).float() #adds a dimension at the position 0
      img_l = np.asarray(img_l) #Convert image to numpy array
      img_lab = rgb2lab(img_l) #convert the rgb image to the lab color space
      img_lab = (img_lab + 128) / 255 #clip values in the interval domain [0,1]
      img_label = img_lab[:, :, 1:] #create the label composed of the a b dimensions of the initial image
      img_label = torch.from_numpy(img_label.transpose((2, 0, 1))).float() # transpose the dimensions of the
                                                                           # tensor to the right format
      img_l = rgb2gray(img_l) #create the graycscale image
      img_l = torch.from_numpy(img_l).unsqueeze(0).float() # adds a dimension at the position 0
    if self.target_transform is not None: # apply transformations if available
      target = self.target_transform(target) 
    return img_l, img_embed, img_label, target # return the l dimension image, the embeded image for resnet
                                               # the a b image (label) and the original image

# Training dataset
traindir = os.path.join(gdrive_data, 'train')

train_transforms = transforms.Compose([transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip()])
train_imagefolder = GrayscaleImageFolder(traindir, train_transforms)
train_loader = DataLoader(train_imagefolder, batch_size=batch_size, shuffle=True)

# Validation dataset
testdir = os.path.join(gdrive_data, 'test')

test_transforms = transforms.Compose([transforms.Resize(256), transforms.CenterCrop(224)])
test_imagefolder = GrayscaleImageFolder(testdir , test_transforms)
test_loader = torch.utils.data.DataLoader(test_imagefolder, batch_size=10, shuffle=False)

# Create generators for the Color Network
resnet = resnet152(pretrained=True, progress=True) #loading the resnet network to create the embeddings

#Minibatch creation for the coloring network
def training_generator():
  for batch_l, batch_emb, labels, _ in train_loader:
    batch_emb = batch_emb.permute(0,4,2,3,1).squeeze(4) #rearranging the image for resnet
    embed = resnet(batch_emb) #getting the embedding
    yield([batch_l, embed], labels) #creating the batches for training the network

def testing_generator():
  for batch_l, batch_emb, labels, _ in test_loader:
    batch_emb = batch_emb.permute(0,4,2,3,1).squeeze(4)
    embed = resnet(batch_emb)
    yield([batch_l, embed], labels)


#### ColorNet Model Architecture
##### Composed of one Encoder, one Decoder and one Fusion Network

In [0]:
# ENCODER
class Encoder(nn.Module):
  def __init__(self):
    super(Encoder, self).__init__()

    def block(input_size, output_size, stride=False): #Definition of the convolutional layers
      layers = [nn.Conv2d(in_channels=input_size, out_channels=output_size, kernel_size=3, stride=2, padding=1)] if stride \
      else [nn.Conv2d(in_channels=input_size, out_channels=output_size, kernel_size=3, padding=1)]
      
      layers.append(nn.ReLU())
      return layers

    self.model = nn.Sequential(
      *block(1, 64, True),
      *block(64, 128),
      *block(128, 128, True),
      *block(128, 256),
      *block(256, 256, True),
      *block(256, 512),
      *block(512, 512),
      *block(512, 256)
    ) #Architecture of the encoder

  def forward(self, x):
    return self.model(x)

In [0]:
# FUSION
class Fusion(nn.Module):
  def __init__(self):
    super(Fusion, self).__init__()

    self.model = nn.Sequential(
      nn.Conv2d(1256, 256, 1), 
      nn.ReLU()
    )#Convolutional layer followed by a ReLU activation

  def forward(self, encoder_output, embed):
    base = torch.zeros(10, 1000, 1, 1) #create a tensor full of zero with the following shape
    output = embed.unsqueeze(2).unsqueeze(3) #adds dimensions at the position 2 and 3
    output[:, :, 0:, :] = base 
    output[:, :, :, 0:] = base
    #make the last 2 dimensions of the output equal 0
    output = output.repeat(1, 1, 28, 28) #repeat the last 2 dimension 28 times
    output = torch.cat((encoder_output, output), 1) #concatenates the output with the encoder_output
    return self.model(output)

In [0]:
# DECODER
class Decoder(nn.Module):
  def __init__(self):
    super(Decoder, self).__init__()

    def block(input_size, output_size, tanh=False): #definition of the convolutional layers
      layers = [nn.Conv2d(in_channels=input_size, out_channels=output_size, kernel_size=3, padding=1)] 
      layers.append(nn.Tanh()) if tanh else layers.append(nn.ReLU())
      return layers

    self.model = nn.Sequential(
      *block(256, 128),
      nn.Upsample(scale_factor=(2,2)),
      *block(128, 64),
      nn.Upsample(scale_factor=(2,2)),
      *block(64, 32),
      *block(32, 16),
      *block(16, 2, tanh=True),
      nn.Upsample(scale_factor=(2,2))
    )#architecture of the decoder

  def forward(self, x):
    return self.model(x)

In [0]:
# COLORNET
class ColorNet(nn.Module):
  def __init__(self):
    super(ColorNet, self).__init__()

    self.encoder = Encoder()
    self.fusion = Fusion()
    self.decoder = Decoder()

  def forward(self, x, embed):
    x = self.encoder(x)
    x = self.fusion(x, embed)
    x = self.decoder(x)
    return x

#Complete architecture of the network : ENCODER + FUSION + DECODER

#### Training ColorNet

In [89]:
if training_process:

  ckpt_file = os.path.join(gdrive_root, 'checkpoint')

  net = ColorNet().to(device)
  optim = optim.Adam(net.parameters(), learning_rate, weight_decay=0)

  train_losses = []

  for epoch in range(max_epoch):
    net.train()
    for inputs, labels in training_generator():

      enc_inputs = inputs[0].to(device)
      embed_outputs = inputs[1].to(device)
      labels = labels.to(device)

      prediction = net(enc_inputs, embed_outputs)

      loss = F.mse_loss(prediction, labels)

      optim.zero_grad()
      loss.backward()
      optim.step()

      print('[Epoch:{}/{}] Train Loss:{:.4f}'.format(epoch, max_epoch, loss.item()))

    train_losses.append(loss)
    if epoch % 10:
      torch.save(net.state_dict(), ckpt_file + '/latest.pt')

[Epoch:0/200] Train Loss:0.2906
[Epoch:1/200] Train Loss:0.2802
[Epoch:2/200] Train Loss:0.2405
[Epoch:3/200] Train Loss:0.1717
[Epoch:4/200] Train Loss:0.0645
[Epoch:5/200] Train Loss:0.0483
[Epoch:6/200] Train Loss:0.0384
[Epoch:7/200] Train Loss:0.0262
[Epoch:8/200] Train Loss:0.0210
[Epoch:9/200] Train Loss:0.0196
[Epoch:10/200] Train Loss:0.0237
[Epoch:11/200] Train Loss:0.0378
[Epoch:12/200] Train Loss:0.0197
[Epoch:13/200] Train Loss:0.0221
[Epoch:14/200] Train Loss:0.0275
[Epoch:15/200] Train Loss:0.0159
[Epoch:16/200] Train Loss:0.0339
[Epoch:17/200] Train Loss:0.0196
[Epoch:18/200] Train Loss:0.0214
[Epoch:19/200] Train Loss:0.0189
[Epoch:20/200] Train Loss:0.0156
[Epoch:21/200] Train Loss:0.0165
[Epoch:22/200] Train Loss:0.0147
[Epoch:23/200] Train Loss:0.0162
[Epoch:24/200] Train Loss:0.0139
[Epoch:25/200] Train Loss:0.0158
[Epoch:26/200] Train Loss:0.0136
[Epoch:27/200] Train Loss:0.0142
[Epoch:28/200] Train Loss:0.0121
[Epoch:29/200] Train Loss:0.0139
[Epoch:30/200] Train

#### Testing ColorNet

In [90]:
ckpt_file = os.path.join(gdrive_root, 'checkpoint') #path of the trained network

net = ColorNet().to(device) #creating the model
net.load_state_dict(torch.load(ckpt_file + '/latest.pt')) #loading the weitghs

for inputs, labels in testing_generator(): #testing the image colorization
  enc_inputs = inputs[0].to(device) #encoder input
  embed_outputs = inputs[1].to(device) #embedding output
  labels = labels.to(device) #label

  prediction = net(enc_inputs, embed_outputs) #predicted output of the network

  
  for i in range(len(prediction)): #reconstruction of the image
    cur = np.zeros((224, 224, 3)) #create of an array
    tmp = inputs[0][i].permute(1,2,0) #rearring the output to lab domain
    tmp = tmp * 100 #normalization
    cur[:,:,0] = tmp[:,:,0] #copy of the L domain
    cur[:,:,1:] = prediction[i].cpu().detach().permute(1,2,0) * 255 - 128 #copy of the a b prediction to the list followed by normalization
    imsave(gdrive_data + "/output/img_"+str(i)+".png", lab2rgb(cur)) #creating the rgb image and saving it

