<center>
    <h1>AIVCUDL</h1>
    <hr><h2>Automatic Image & Video Colourisation Using Deep Learning</h2><hr>
</center>

In [1]:
# Imports
import tensorflow as tf
import keras
from keras.engine import Layer
from keras.applications.inception_resnet_v2 import preprocess_input, InceptionResNetV2
from keras.preprocessing import image
from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img
from keras.layers import Conv2D, UpSampling2D, InputLayer, Input, Reshape, merge, concatenate, Activation, Dense, Dropout, Flatten
from keras.layers.core import RepeatVector, Permute
from keras.models import Sequential, Model, load_model
from skimage.color import rgb2lab, lab2rgb, rgb2gray, gray2rgb
from skimage.transform import resize
from skimage.io import imsave
import numpy as np
import os
import cv2
import random

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.
  not cbook.is_string_like(colors[0]):


In [2]:
# Load inception weights
inception = InceptionResNetV2(weights='imagenet', include_top=True)
inception.graph = tf.get_default_graph()

In [3]:
def resize_training_data (image):
    desired_size = 256
    im = cv2.imread(image)
    old_size = im.shape[:2]
    ratio = float(desired_size)/max(old_size)
    new_size = tuple([int(x*ratio) for x in old_size])
    im = cv2.resize(im, (new_size[1], new_size[0]))
    del_w = desired_size - new_size[1]
    del_h = desired_size - new_size[0]
    top, bottom = del_h//2, del_h-(del_h//2)
    left, right = del_w//2, del_w-(del_w//2)
    color = [0, 0, 0]
    new_im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT,
    value=color)
    return new_im

# Write images
i = 0
for filename in os.listdir('test_images/Train4.1/'):
    if (i<2000):
        resized_im = resize_training_data('test_images/Train4.1/'+filename)
        #os.remove('test_images/Train4.1/'+filename)
        cv2.imwrite('dataset/temp_dataset/'+filename, resized_im)
        i = i + 1
    

In [None]:
# Deleting images from dataset

for filename in os.listdir('dataset/new_dataset/'):
    if (i<2000):
        os.remove('dataset/new_dataset/'+filename)
        i = i + 1
        
print("Images Deleted")

In [3]:
# Get images

X = []
i = 0
for filename in os.listdir('dataset/new_dataset/'):
    if (i<2000):
        X.append(img_to_array(load_img('dataset/new_dataset/'+filename)))
        os.remove('dataset/new_dataset/'+filename)
        i = i + 1
    
X = np.array(X, dtype=float)
Xtrain = 1.0/255*X
print("Loaded", X.shape[0], "images of resolution", X.shape[1],"x", X.shape[2])

Loaded 2000 images of resolution 256 x 256


In [4]:
embed_input = Input(shape=(1000,))

#Encoder
encoder_input = Input(shape=(256, 256, 1,))
encoder_output = Conv2D(64, (3,3), activation='relu', padding='same', strides=2)(encoder_input)
encoder_output = Conv2D(128, (3,3), activation='relu', padding='same')(encoder_output)
encoder_output = Conv2D(128, (3,3), activation='relu', padding='same', strides=2)(encoder_output)
encoder_output = Conv2D(256, (3,3), activation='relu', padding='same')(encoder_output)
encoder_output = Conv2D(256, (3,3), activation='relu', padding='same', strides=2)(encoder_output)
encoder_output = Conv2D(512, (3,3), activation='relu', padding='same')(encoder_output)
encoder_output = Conv2D(512, (3,3), activation='relu', padding='same')(encoder_output)
encoder_output = Conv2D(256, (3,3), activation='relu', padding='same')(encoder_output)

#Fusion
fusion_output = RepeatVector(32 * 32)(embed_input) 
fusion_output = Reshape(([32, 32, 1000]))(fusion_output)
fusion_output = concatenate([encoder_output, fusion_output], axis=3) 
fusion_output = Conv2D(256, (1, 1), activation='relu', padding='same')(fusion_output) 

#Decoder
decoder_output = Conv2D(128, (3,3), activation='relu', padding='same')(fusion_output)
decoder_output = UpSampling2D((2, 2))(decoder_output)
decoder_output = Conv2D(64, (3,3), activation='relu', padding='same')(decoder_output)
decoder_output = UpSampling2D((2, 2))(decoder_output)
decoder_output = Conv2D(32, (3,3), activation='relu', padding='same')(decoder_output)
decoder_output = Conv2D(16, (3,3), activation='relu', padding='same')(decoder_output)
decoder_output = Conv2D(2, (3, 3), activation='tanh', padding='same')(decoder_output)
decoder_output = UpSampling2D((2, 2))(decoder_output)

model = Model(inputs=[encoder_input, embed_input], outputs=decoder_output)

In [5]:
print(model.summary())

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_3 (InputLayer)            (None, 256, 256, 1)  0                                            
__________________________________________________________________________________________________
conv2d_204 (Conv2D)             (None, 128, 128, 64) 640         input_3[0][0]                    
__________________________________________________________________________________________________
conv2d_205 (Conv2D)             (None, 128, 128, 128 73856       conv2d_204[0][0]                 
__________________________________________________________________________________________________
conv2d_206 (Conv2D)             (None, 64, 64, 128)  147584      conv2d_205[0][0]                 
__________________________________________________________________________________________________
conv2d_207

In [6]:
def create_inception_embedding(grayscaled_rgb):
    grayscaled_rgb_resized = []
    for i in grayscaled_rgb:
        i = resize(i, (299, 299, 3), mode='constant')
        grayscaled_rgb_resized.append(i)
    grayscaled_rgb_resized = np.array(grayscaled_rgb_resized)
    grayscaled_rgb_resized = preprocess_input(grayscaled_rgb_resized)
    with inception.graph.as_default():
        embed = inception.predict(grayscaled_rgb_resized)
    return embed

# Image transformer
datagen = ImageDataGenerator(
        shear_range=0.2,
        zoom_range=0.2,
        rotation_range=20,
        horizontal_flip=True)

#Generate training data
batch_size = 10

def image_a_b_gen(batch_size):
    for batch in datagen.flow(Xtrain, batch_size=batch_size):
        grayscaled_rgb = gray2rgb(rgb2gray(batch))
        embed = create_inception_embedding(grayscaled_rgb)
        lab_batch = rgb2lab(batch)
        X_batch = lab_batch[:,:,:,0]
        X_batch = X_batch.reshape(X_batch.shape+(1,))
        Y_batch = lab_batch[:,:,:,1:] / 128
        yield ([X_batch, create_inception_embedding(grayscaled_rgb)], Y_batch)
        
print("Training Data Generated")

Training Data Generated


In [7]:
# Model Checkpointing 1 - loading
model = load_model('latest.h5')
print("Model Loaded")

Model Loaded


In [8]:
#Train model      
model.compile(optimizer='rmsprop', loss='mse', metrics=['accuracy'])
%time model.fit_generator(image_a_b_gen(batch_size), epochs=1, steps_per_epoch=200)

Epoch 1/1
Wall time: 17min 32s


<keras.callbacks.History at 0x1d2a3670f28>

In [14]:
# Model checkpointing 2 - saving
model.save('latest.h5')
print("Model Saved")

Model Saved


In [15]:
# Testing
color_me = []
for filename in os.listdir('dataset/test/'):
    color_me.append(img_to_array(load_img('dataset/test/'+filename)))

color_me = np.array(color_me, dtype=float)
gray_me = gray2rgb(rgb2gray(1.0/255*color_me))
color_me_embed = create_inception_embedding(gray_me)
color_me = rgb2lab(1.0/255*color_me)[:,:,:,0]
color_me = color_me.reshape(color_me.shape+(1,))


# Test model
output = model.predict([color_me, color_me_embed])
output = output * 128

# Output colorizations
for i in range(len(output)):
    cur = np.zeros((256, 256, 3))
    cur[:,:,0] = color_me[i][:,:,0]
    cur[:,:,1:] = output[i]
    imsave("dataset/result/img_"+str(i)+".png", lab2rgb(cur))
    imsave("dataset/result/img_"+str(i)+"_gray.png", rgb2gray(lab2rgb(cur)))

  .format(dtypeobj_in, dtypeobj_out))
  .format(dtypeobj_in, dtypeobj_out))


In [17]:
# Single image
color_me = []
color_me.append(img_to_array(load_img('dataset/test/3.jpg')))

color_me = np.array(color_me, dtype=float)
gray_me = gray2rgb(rgb2gray(1.0/255*color_me))
color_me_embed = create_inception_embedding(gray_me)
color_me = rgb2lab(1.0/255*color_me)[:,:,:,0]
color_me = color_me.reshape(color_me.shape+(1,))


# Test model
output = model.predict([color_me, color_me_embed])
output = output * 128

# Output colorizations
for i in range(len(output)):
    cur = np.zeros((512, 512, 3))
    cur[:,:,0] = color_me[i][:,:,0]
    cur[:,:,1:] = output[i]
    imsave("dataset/result/img_"+str(i)+".png", lab2rgb(cur))

  .format(dtypeobj_in, dtypeobj_out))


In [15]:
def resize_output_image (image): # YTBD
    im = cv2.imread(image)
    old_size = im.shape[:2]
    ratio = float(desired_size)/max(old_size)
    new_size = tuple([int(x*ratio) for x in old_size])
    im = cv2.resize(im, (new_size[1], new_size[0]))
    del_w = desired_size - new_size[1]
    del_h = desired_size - new_size[0]
    top, bottom = del_h*2, del_h-(del_h*2)
    left, right = del_w*2, del_w-(del_w*2)
    print(top,left,bottom,right)
    color = [0, 0, 0]
    #new_im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)
    return new_im

resize_output_image(lab2rgb(cur))

TypeError: bad argument type for built-in operation