# Imports

In [2]:
import os
import torch
import torch.nn as nn
import numpy as np
from IPython import embed
from skimage import color
from PIL import Image

# Data

In [None]:
#Pre processing of data to index rbg training data

def preProcessing(path, h=256, w=256):
  if not os.path.exists(path + "/train"):
    os.mkdir(path + "/train")
    os.mkdir(path + "/train/rgb")

  idx=0
  for filename in os.listdir(path):
    if not os.path.isdir(path + "/" + filename):
      
      #get RGB image, resize, and put in rgb folder
      rgb_img=Image.open(path + "/" + filename)
      rgb_img=rgb_img.resize((w, h))
      rgb_img.save(path + "/train/target/" + str(idx) + "_rgb.png")

      idx+=1


In [None]:
#Scafolding for Dataset

def myDataset(h=256, w=256, rgb2lab=True, zhangmodel=True, edmodel=False):
  def __init__(self, img_dir, transform):
    self.img_dir=img_dir
    self.transform=transform
    self.dataset_length = len(os.listdir(img_dir + "/train/rgb"))
    
    #Set up dataset so images are indexed
    preProcessing(img_dir, h, w)

    if zhangmodel:
      print("ZHANG MODEL")

    elif edmodel:
      print("ED MODEL")

    def __len__(self):
      return self.dataset_length

    def __getitem__(self, idx):
      #Get RGB image, resize, and transform it
      rgb_img=Image.open(self.img_dir + "/" + str(idx) + "_rgb.png")
      rgb_img=self.transform(rgb_img.resize((w, h)))

      #Get CIE-Lab equivalent of the RGB image
      clab_array=color.rgb2lab(np.asarray(rgb_img))
      clab_img=Image.fromarray(clab_array, mode="LAB")

      #Get greyscale CIE-Lab equivalent of the RGB image
      clab_grayimg=Image.fromarray(clab_array[:, :, 0], mode="LAB")

      data={'input':clab_grayimg, 'target':clab_img}

      return data
    

# Model 1 ("Colorful Image Colorization" by Zhang et al.)

In [None]:
class BaseModel(nn.Module):
  '''
  A 8-blocks cnn model, each block has multiple cnn layer (22 in total)
  "prediction" in CIELAB space (L, a, b)
  For this model, it takes in "grayscale image" with only L value
  and it outputs a and b values
  '''
  def __init__(self, norm_layer = nn.BatchNorm2d):
    super(BaseModel, self).__init__()
    # layer 1
    self.layer1 = nn.Sequential([
            nn.Conv2d(1, 64, kernel_size=3, stride=1, padding=1, bias=True),
            nn.ReLU(True),
            nn.Conv2d(64, 64, kernel_size=3, stride=2, padding=1, bias=True),
            nn.ReLU(True),
            norm_layer(64)])

    # layer 2
    self.layer2 = nn.Sequential([
        nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1, bias=True),
        nn.ReLU(True),
        nn.Conv2d(128, 128, kernel_size=3, stride=2, padding=1, bias=True),
        nn.ReLU(True),
        norm_layer(128)])

    # layer 3
    self.layer3 = nn.Sequential([
        nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1, bias=True),
        nn.ReLU(True),
        nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1, bias=True),
        nn.ReLU(True),
        nn.Conv2d(256, 256, kernel_size=3, stride=2, padding=1, bias=True),
        nn.ReLU(True),
        norm_layer(256)])

    # layer 4
    self.layer4 = nn.Sequential([
        nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1, bias=True),
        nn.ReLU(True),
        nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1, bias=True),
        nn.ReLU(True),
        nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1, bias=True),
        nn.ReLU(True),
        norm_layer(512)])

    # layer 5
    self.layer5 = nn.Sequential([
        nn.Conv2d(512, 512, kernel_size=3, dilation=2, stride=1, padding=2, bias=True),
        nn.ReLU(True),
        nn.Conv2d(512, 512, kernel_size=3, dilation=2, stride=1, padding=2, bias=True),
        nn.ReLU(True),
        nn.Conv2d(512, 512, kernel_size=3, dilation=2, stride=1, padding=2, bias=True),
        nn.ReLU(True),
        norm_layer(512)])

    #layer 6
    self.layer6 = nn.Sequential([
        nn.Conv2d(512, 512, kernel_size=3, dilation=2, stride=1, padding=2, bias=True),
        nn.ReLU(True),
        nn.Conv2d(512, 512, kernel_size=3, dilation=2, stride=1, padding=2, bias=True),
        nn.ReLU(True),
        nn.Conv2d(512, 512, kernel_size=3, dilation=2, stride=1, padding=2, bias=True),
        nn.ReLU(True),
        norm_layer(512),])

    #layer 7
    self.layer7 = nn.Sequential([
        nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1, bias=True),
        nn.ReLU(True),
        nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1, bias=True),
        nn.ReLU(True),
        nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1, bias=True),
        nn.ReLU(True),
        norm_layer(512),])

    #layer 8
    self.layer8 = nn.Sequential([
        nn.ConvTranspose2d(512, 256, kernel_size=4, stride=2, padding=1, bias=True),
        nn.ReLU(True),
        nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1, bias=True),
        nn.ReLU(True),
        nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1, bias=True),
        nn.ReLU(True),
        nn.Conv2d(256, 313, kernel_size=1, stride=1, padding=0, bias=True),])

    self.softmax = nn.Softmax(dim=1)
    # 2 means (a, b)
    self.model_out = nn.Conv2d(313, 2, kernel_size=1, padding=0, dilation=1, stride=1, bias=False)
    self.upsample = nn.Upsample(scale_factor=4, mode='bilinear')

  def forward(self, input_l):
    # model
    conv1_2 = self.layer1(self.normalize_l(input_l))
    conv2_2 = self.layer2(conv1_2)
    conv3_3 = self.layer3(conv2_2)
    conv4_3 = self.layer4(conv3_3)
    conv5_3 = self.layer5(conv4_3)
    conv6_3 = self.layer6(conv5_3)
    conv7_3 = self.layer7(conv6_3)
    conv8_3 = self.layer8(conv7_3)

    out_reg = self.model_out(self.softmax(conv8_3))

    # this is deal with nomalization
    # output is in [0,1] (ratio of a, b to L)
    # L ususally has a range [0, 100] (or 110)
    return 100*(self.upsample(out_reg))

# Model 1 Loss

In [None]:
def RebalanceLoss

In [None]:
def GetClassWeights

# Model 2: Encoder Decoder


In [9]:
%%capture
!pip install pretrainedmodels

In [10]:
import pretrainedmodels
from pretrainedmodels import utils

In [4]:
class Encoder(nn.Module):
  def __init__(self):
    super(Encoder, self).__init__()
      
    #what is the padding?
    self.layer1 = nn.Sequential([
      nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=2),
      nn.ReLU(),
    ])

    self.layer2 = nn.Sequential([
      nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, stride=1),
      nn.ReLU(),
    ])

    self.layer3 = nn.Sequential([
      nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=2),
      nn.ReLU(),
    ])

    self.layer4 = nn.Sequential([
      nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=1),
      nn.ReLU(),
    ])

    self.layer5 = nn.Sequential([
      nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, stride=2),
      nn.ReLU(),
    ])

    self.layer6 = nn.Sequential([
      nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1),
      nn.ReLU(),
    ])

    self.layer6 = nn.Sequential([
      nn.Conv2d(in_channels=512, out_channels=256, kernel_size=3, stride=1),
      nn.ReLU(),
    ])

    #Each convolutional layer uses a ReLu activation function, 
    #except for the last one that employs a hyperbolic tangent function?
    self.layer7 = nn.Sequential([
      nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=1),
      nn.ReLU(),
    ])

  def forward(self, x):
    l1=self.layer1(x)
    l2=self.layer2(l1)
    l3=self.layer3(l2)
    l4=self.layer4(l3)
    l5=self.layer5(l4)
    l6=self.layer6(l5)
    l7=self.layer(l6)

    return l7

In [5]:
class Decoder(nn.Module):
  def __init__(self):
    super(Decoder, self).__init__()
      
    #what is the padding?
    self.layer1 = nn.Sequential([
      nn.Conv2d(in_channels=256, out_channels=128, kernel_size=3, stride=1),
      nn.UpsamplingNearest2d()
    ])

    self.layer2 = nn.Conv2d(in_channels=128, out_channels=64, kernel_size=3, stride=1)

    self.layer3 = nn.Sequential([
      nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, stride=2),
      nn.UpsamplingNearest2d()
    ])

    self.layer4 = nn.Conv2d(in_channels=64, out_channels=32, kernel_size=3, stride=1)

    self.layer5 = nn.Sequential([
      nn.Conv2d(in_channels=32, out_channels=2, kernel_size=3, stride=2),
      nn.UpsamplingNearest2d()
    ])

  def forward(self, x):
    l1=self.layer1(x)
    l2=self.layer2(l1)
    l3=self.layer3(l2)
    l4=self.layer4(l3)
    l5=self.layer5(l4)

    return l5

In [13]:
#feature extractor
import ssl
ssl._create_default_https_context = ssl._create_unverified_context
inception = pretrainedmodels.__dict__["inceptionresnetv2"](
            num_classes=1001, 
            pretrained="imagenet+background")
inception.eval()

Downloading: "http://data.lip6.fr/cadene/pretrainedmodels/inceptionresnetv2-520b38e4.pth" to /root/.cache/torch/hub/checkpoints/inceptionresnetv2-520b38e4.pth
  1%|          | 2.45M/213M [00:07<10:19, 357kB/s]


KeyboardInterrupt: ignored

In [11]:
#https://github.com/lauradang/automatic-image-colorization/blob/master/notebooks/inception_resnet.ipynb
class EDModel(nn.Module):
    def __init__(self):
      super(EDModel, self).__init__()

      self.encoder=Encoder()
      self.decoder=Decoder()

    def forward(self, x, feature):
      enout=self.encoder(x)

      extract_feat=(inception(feature)).view(-1, 1001, 1, 1)
      rows = torch.cat([extract_feat] * 28, dim=3)
      embedding_block = torch.cat([rows] * 28, dim=2)
      fusion_block = torch.cat([enout, embedding_block], dim=1)

      return self.decoder(fusion_block)

Downloading: "http://data.lip6.fr/cadene/pretrainedmodels/inceptionresnetv2-520b38e4.pth" to /root/.cache/torch/hub/checkpoints/inceptionresnetv2-520b38e4.pth


URLError: ignored