In [1]:
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt

In [2]:
# load saliency and motion videos and split them into chunks
path = '../data/sources/'

In [5]:
def extractImages(video):
    newdir = path+'frames'
    if os.path.exists(newdir):
        os.system('rm -rf '+newdir)
    os.makedirs(newdir)
    os.system('ffmpeg -i '+path+video+' -vf fps=30 -s 3840x1920 '+newdir+'/frame%0000d.png')

In [6]:
video = path+'RollerCoaster1min.mp4'
extractImages(video)

In [3]:
def makeNumpyArrayGroundTruth(totalImages):
    frame_no = 1
    for imageName in totalImages:
        windowsize_r = 384
        windowsize_c = 384
        tile_no = 1
        tiles = []
        
        image = path+'frames/'+imageName
        image = cv2.imread(image)
        b,g,r = cv2.split(image)         # get b,g,r
        image = cv2.merge([r,g,b])
        
        for r in range(0,image.shape[0], windowsize_r):
            for c in range(0,image.shape[1], windowsize_c):
                window = image[r:r+windowsize_r,c:c+windowsize_c]
                tiles.append(window)
                tile_no += 1
        tiles = np.array(tiles)
        arrayName = path+'nparrs/'+imageName[:-4]+'.npy'
        np.save(arrayName, tiles)
        frame_no += 1

In [4]:
images = os.listdir(path+'frames')
makeNumpyArrayGroundTruth(images)

In [5]:
from __future__ import print_function, division

from keras.datasets import mnist
from keras.layers import Input, Dense, Reshape, Flatten, Dropout, multiply, GlobalAveragePooling2D
from keras.layers import BatchNormalization, Activation, Embedding, ZeroPadding2D
from keras.layers.advanced_activations import LeakyReLU
from keras.layers.convolutional import UpSampling2D, Conv2D
from keras.models import Sequential, Model
from keras.optimizers import Adam

import matplotlib.pyplot as plt

import numpy as np
from scipy.misc import imread
from scipy.misc import imresize
from scipy.misc import imsave
import glob
import random

from keras.preprocessing.image import ImageDataGenerator
from keras import applications
from keras.optimizers import SGD
from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau
from keras.callbacks import EarlyStopping
from keras.utils import to_categorical
#from keras.applications.resnet50 import preprocess_input

from keras.applications.resnet50 import ResNet50
from keras.preprocessing import image
from keras.applications.resnet50 import preprocess_input, decode_predictions
#from keras.applications.vgg19 import preprocess_input as preprocess_vgg
from keras.applications.inception_v3 import preprocess_input as inception_preprocess

#from keras.applications.inception_resnet_v2 import preprocess_input

import numpy as np

import keras
import copy

In [None]:
class CGAN():
    def __init__(self):
        # Input shape
        self.img_rows = 192
        self.img_cols = 192
        self.channels = 3
        self.img_shape = (self.img_rows, self.img_cols, self.channels)
        self.num_classes = 360000
        self.latent_dim  = 200
        self.embedding_layer = Embedding(self.num_classes, self.latent_dim)
        
        optimizer = Adam(0.0002, 0.5)

        # Build and compile the discriminator
        self.discriminator = self.build_discriminator()
        self.discriminator.compile(loss=['mse'],
            optimizer=optimizer,
            metrics=['accuracy'])

        # Build the generator
        self.generator = self.build_generator()

        # The generator takes noise and the target label as input
        # and generates the corresponding digit of that label
        noise = Input(shape=(self.latent_dim,))
        label = Input(shape=(1,))
        img = self.generator([noise, label])

        # For the combined model we will only train the generator
        self.discriminator.trainable = False

        # The discriminator takes generated image as input and determines validity
        # and the label of that image
        valid = self.discriminator([img, label])

        # The combined model  (stacked generator and discriminator)
        # Trains generator to fool discriminator
        self.combined = Model([noise, label], valid)
        self.combined.compile(loss=['mse'],
            optimizer=optimizer)

    def build_generator(self):

        model = Sequential()
        model.add(Dense(192 * 192 * 3, activation='relu', input_dim=self.latent_dim))
        model.add(Reshape((192, 192, 3)))
        model.add(Conv2D(256, kernel_size=3, padding="same"))
        model.add(BatchNormalization(momentum=0.8))
        model.add(Activation("relu"))
        model.add(Conv2D(128, kernel_size=3, padding="same"))
        model.add(BatchNormalization(momentum=0.8))
        model.add(Activation("relu"))
        model.add(Conv2D(64, kernel_size=3, padding="same"))
        model.add(BatchNormalization(momentum=0.8))
        model.add(Activation("relu"))
        model.add(Conv2D(3, kernel_size=3, padding="same"))
        model.add(Activation("tanh"))

        label           = Input(shape=(1,), dtype='int32')
        label_embedding = Flatten()(self.embedding_layer(label))
        img             = model(label_embedding)

        model2 =  Model(label, img)
        return model2

    def build_discriminator(self):
        img   = Input(shape=self.img_shape)
        label = Input(shape=(1,), dtype='int32')
        label_embedding = Flatten()(self.embedding_layer(label))
        
        label_dense = Dense(100)(label_embedding)
        
        model = Sequential()
        model.add(Conv2D(64, (3, 3), input_shape=(192, 192, 3)))
        model.add(LeakyReLU(alpha=0.2))
        #model.add(Dropout(0.25))
        model.add(Conv2D(64,  (3, 3),  strides=(2, 2)))
        model.add(BatchNormalization(momentum=0.8))
        model.add(LeakyReLU(alpha=0.2))
        #model.add(Dropout(0.25))
        model.add(Conv2D(64, (3, 3)))
        model.add(BatchNormalization(momentum=0.8))
        model.add(LeakyReLU(alpha=0.2))
        #model.add(Dropout(0.25))
        model.add(Conv2D(64, (6, 6),  strides=(2, 2)))
        model.add(LeakyReLU(alpha=0.2))
        #model.add(Dropout(0.25))
        model.add(Conv2D(64, (3, 3),  strides=(2, 2)))
        model.add(BatchNormalization(momentum=0.8))
        model.add(LeakyReLU(alpha=0.2))
        #model.add(Dropout(0.25))
        model.add(Conv2D(128, (3, 3)))
        model.add(BatchNormalization(momentum=0.8))
        model.add(LeakyReLU(alpha=0.2))
        model.add(Conv2D(128, (3, 3),  strides=(2, 2)))
        model.add(BatchNormalization(momentum=0.8))
        model.add(LeakyReLU(alpha=0.2))
        model.add(Dropout(0.25))
        model.add(Flatten())
        model.add(Dense(100))
        model.add(LeakyReLU(alpha=0.2))
        #model.add(Dropout(0.25))
        #model.add(Dense(512))
        #model.add(LeakyReLU(alpha=0.2))
        
        dense100    = model(img)
        conditioned = multiply([dense100, label_dense])
        #dense64     = Dense(128)(conditioned)
        #dense64l    = LeakyReLU(alpha=0.2)(dense64)
        dense32     = Dense(32)(conditioned)
        dense32l    = LeakyReLU(alpha=0.2)(dense32)
        
        #For ls gan
        validity = Dense(1)(dense32l)
        #validity = Dense(1, activation='sigmoid')(dense32l)
        
        return Model([img, label], validity)

    def train(self, epochs, batch_size=128, sample_interval=50):
        random.seed(10)
        
        # Load the dataset
        for epoch in range(epochs):
            X_train, y_train = get_train_data(epoch, batch_size)
            
            # Configure input
            X_train = (X_train.astype(np.float32) - 127.5) / 127.5
            #X_train = np.expand_dims(X_train, axis=3)
            y_train = y_train.reshape(-1, 1)

            # Adversarial ground truths
            valid = np.ones((batch_size, 1))
            fake  = np.zeros((batch_size, 1))
            
            # ---------------------
            #  Train Discriminator
            # ---------------------

            # Select a random half batch of images
            idx = np.random.randint(0, X_train.shape[0], batch_size)
            imgs, labels = X_train[idx], y_train[idx]

            # Sample noise as generator input
            noise = np.random.normal(0, 1, (batch_size, self.latent_dim))

            # Generate a half batch of new images
            gen_imgs = self.generator.predict([noise, labels])

            # Train the discriminator
            d_loss_real = self.discriminator.train_on_batch([imgs, labels], valid)
            d_loss_fake = self.discriminator.train_on_batch([gen_imgs, labels], fake)
            d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)

            # ---------------------
            #  Train Generator
            # ---------------------

            # Condition on labels
            sampled_labels = np.random.randint(0, 7, batch_size).reshape(-1, 1)

            # Train the generator
            g_loss = self.combined.train_on_batch([noise, sampled_labels], valid)

            # Plot the progress
            print ("%d [D loss: %f, acc.: %.2f%%] [G loss: %f]" % (epoch, d_loss[0], 100*d_loss[1], g_loss))

            # If at save interval => save generated image samples
            if epoch % sample_interval == 0:
                self.sample_images(epoch)
                self.generator.save_weights(savepath+'weights/generator_weights_'+str(epoch)+'.h5')
                self.discriminator.save_weights(savepath+'weights/discriminator_weights_'+str(epoch)+'.h5')

    def sample_images(self, epoch):
        r, c = 1, 7
        noise          = np.random.normal(0, 1, (r * c, self.latent_dim))
        sampled_labels = np.arange(0, 7).reshape(-1, 1)

        gen_imgs = self.generator.predict([noise, sampled_labels])

        # Rescale images 0 - 1
        temp = (0.5 * gen_imgs + 0.5)*255
        gen_imgs = temp.astype(int)
        #print(gen_imgs[0].shape)
        combined = np.array([gen_imgs[0], gen_imgs[1], gen_imgs[2], gen_imgs[3], gen_imgs[4], gen_imgs[5], gen_imgs[6]])
        #print(combined.shape)
        combined = np.hstack(combined.reshape(7,224,296, 3))
        imsave(savepath+"images/"+str(epoch)+".png", combined)

In [11]:
embedding_layer = Embedding(360000, 200)

model = Sequential()
model.add(Dense(192 * 192 * 3, activation='relu', input_dim=200))
model.add(Reshape((192, 192, 3)))
#model.add(UpSampling2D())
model.add(Conv2D(256, kernel_size=3, padding="same"))
model.add(BatchNormalization(momentum=0.8))
model.add(Activation("relu"))
#model.add(UpSampling2D())
model.add(Conv2D(128, kernel_size=3, padding="same"))
model.add(BatchNormalization(momentum=0.8))
model.add(Activation("relu"))
#model.add(UpSampling2D())
model.add(Conv2D(64, kernel_size=3, padding="same"))
model.add(BatchNormalization(momentum=0.8))
model.add(Activation("relu"))
model.add(Conv2D(3, kernel_size=3, padding="same"))
model.add(Activation("tanh"))

label           = Input(shape=(1,), dtype='int32')
label_embedding = Flatten()(embedding_layer(label))
img             = model(label_embedding)

model2 =  Model(label, img)

print(model2.summary())

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_5 (InputLayer)         (None, 1)                 0         
_________________________________________________________________
embedding_4 (Embedding)      (None, 1, 200)            72000000  
_________________________________________________________________
flatten_5 (Flatten)          (None, 200)               0         
_________________________________________________________________
sequential_6 (Sequential)    (None, 192, 192, 3)       22608515  
Total params: 94,608,515
Trainable params: 94,607,619
Non-trainable params: 896
_________________________________________________________________
None
