In [2]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

## Dog Image Generator - Module 5

For week five we have been asked to work through the Kaggle competition on generative Dog images. The competition will draw upon knowledge involving Generative networks, specifically DC adversarial networks.

We will produce a unique network that generates pictures of dogs. A training library will be used to help classifiy initial images of dogs. Subsequently, a generator will be trained such that its output is fed to the classifier/discriminator and a feedback function is provided to the generator to create increasingly doglike images.

IMPORTANT NOTE: When I download my notebook from Kaggle there cell outputs and impages do not follow the notebook. To view the complete notebook please use my Kaggle link below:

https://www.kaggle.com/code/guymcguy/arj-gan/notebook

#### Step 1 - Pull in data an perform cleaning/EDA
We will import the input data (dog training images). Various pre-processing steps will be coverd (cropping, normalizing and creating random noise photos for the generator. 

I relied strongly on the following notebook for image pre-processing:

https://www.kaggle.com/code/cmalla94/dcgan-generating-dog-images-with-tensorflow

In [4]:
import numpy as np, pandas as pd, os
import xml.etree.ElementTree as ET
import matplotlib.pyplot as plt, zipfile 
from PIL import Image

import tensorflow as tf
from keras.models import Model
from keras.layers import Input, Dense, Conv2D, MaxPooling2D, UpSampling2D, Reshape, BatchNormalization
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import LearningRateScheduler
from keras import optimizers

from IPython.display import FileLink, FileLinks
from PIL import Image # image processing library
import IPython.display as display # for inspecting the images by displaying them in the kernel

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

In [5]:
import zipfile

with zipfile.ZipFile("../input/generative-dog-images/all-dogs.zip","r") as zip_ref:
    zip_ref.extractall("/kaggle/temp/")
    
with zipfile.ZipFile("../input/generative-dog-images/Annotation.zip","r") as zip_ref:
    zip_ref.extractall("/kaggle/temp/")

In [6]:
# Variables to be used for network attributes.

SEED = 42
np.random.seed(SEED)
random_dim = 128

# Variables needed for the training part
EPOCHS = 280
noise_dim = 100
num_examples_to_generate = 16

# This seed will be used later on to produce samples when training
seed = tf.random.normal([num_examples_to_generate, noise_dim])

ROOT = '/kaggle/temp/'
#if not ComputeLB: ROOT = '/kaggle/working/'
breeds = os.listdir(ROOT + 'Annotation') 
IMAGES = os.listdir(ROOT + 'all-dogs')

We will need to crop images to effectively use within the model. I have utilized an existing notebook found in the competition directory located here:
https://www.kaggle.com/code/aligsaoud/dog-image-generator

In [7]:
idxIn = 0; namesIn = []
imagesIn = np.zeros((25000,64,64,3))
DogsOnly = False

# CROP WITH BOUNDING BOXES TO GET DOGS ONLY
if DogsOnly:
    for breed in breeds:
        for dog in os.listdir(ROOT+'annotation/Annotation/'+breed):
            try: img = Image.open(ROOT+'all-dogs/'+dog+'.jpg') 
            except: continue           
            tree = ET.parse(ROOT+'annotation/Annotation/'+breed+'/'+dog)
            root = tree.getroot()
            objects = root.findall('object')
            for o in objects:
                bndbox = o.find('bndbox') 
                xmin = int(bndbox.find('xmin').text)
                ymin = int(bndbox.find('ymin').text)
                xmax = int(bndbox.find('xmax').text)
                ymax = int(bndbox.find('ymax').text)
                w = np.min((xmax - xmin, ymax - ymin))
                img2 = img.crop((xmin, ymin, xmin+w, ymin+w))
                img2 = img2.resize((64,64), Image.ANTIALIAS)
                imagesIn[idxIn,:,:,:] = np.asarray(img2)
                #if idxIn%1000==0: print(idxIn)
                namesIn.append(breed)
                idxIn += 1
                
# RANDOMLY CROP FULL IMAGES
else:
    x = np.random.choice(np.arange(20000),10000)
    for k in range(len(x)):
        img = Image.open(ROOT + 'all-dogs/' + IMAGES[x[k]])
        w = img.size[0]; h = img.size[1];
        if (k%2==0)|(k%3==0):
            w2 = 100; h2 = int(h/(w/100))
            a = 18; b = 0          
        else:
            a=0; b=0
            if w<h:
                w2 = 64; h2 = int((64/w)*h)
                b = (h2-64)//2
            else:
                h2 = 64; w2 = int((64/h)*w)
                a = (w2-64)//2
        img = img.resize((w2,h2), Image.ANTIALIAS)
        img = img.crop((0+a, 0+b, 64+a, 64+b))  
        imagesIn[idxIn,:,:,:] = np.asarray(img)
        namesIn.append(IMAGES[x[k]])
        #if idxIn%1000==0: print(idxIn)
        idxIn += 1
    
# DISPLAY CROPPED IMAGES
x = np.random.randint(0,idxIn,25)
for k in range(5):
    plt.figure(figsize=(15,3))
    for j in range(5):
        plt.subplot(1,5,j+1)
        img = Image.fromarray( imagesIn[x[k*5+j],:,:,:].astype('uint8') )
        plt.axis('off')
        if not DogsOnly: 
            plt.title(namesIn[x[k*5+j]],fontsize=11)
        else: 
            plt.title(namesIn[x[k*5+j]].split('-')[1],fontsize=11)
        plt.imshow(img)
    plt.show()

In [8]:
imagesIn.shape

So we can see that there are 250K sample images with three channels and 64x64 in dimension.

We can normalize the images around 0 mean

In [9]:
# Normalize Images in 

# normalize the pixel values
imagesIn = (imagesIn[:idxIn,:,:,:]-127.5)/127.5

In [10]:
# Plot normalized sample images

plt.figure(figsize=(8,8))
for image in range(4):
    plt.subplot(2,2, image+1)
    plt.imshow((imagesIn[image]))
    plt.xlabel('shape: {}'.format(imagesIn[image].shape))

We will need a generator to pass in the images to the network. To make this work we will need to cast the images as float32.

In [11]:
# cast to float 32
imagesIn = tf.cast(imagesIn, 'float32')

buffer = 10000
batch_size=64
ds = tf.data.Dataset.from_tensor_slices(imagesIn).shuffle(buffer).batch(batch_size)


### Step 2 - Build the various models (Generator and discriminator/classifier)

I will create two separate networks:

    A. Generator - takes random input noise and produces a model with network weights that produce images that look like dogs for:
    B: Discriminator - Takes the output from model A and classifies whether or not it is a dog image.

We will have two separate loss functions, one for each model.

In [12]:
# function will return a generator model

# Weight initializers for the Generator network
WEIGHT_INIT = tf.keras.initializers.RandomNormal(mean=0.0, stddev=0.2)


def make_generator():
    model = tf.keras.Sequential(
        [
            # first layer with 32,768 nodes expecting an input of vector size 100 (random noise)
            tf.keras.layers.Dense(8*8*512, use_bias=False, input_shape=(100,)),
            # Normalize the activations of the previous layer at each batch
            tf.keras.layers.BatchNormalization(),
            # apply leaky relu activation: f(x) = {x if x > 0 : 0.01*x}
            tf.keras.layers.LeakyReLU(),
            # reshape input to (8,8,512)
            tf.keras.layers.Reshape((8, 8, 512)),
            
            # second layer Conv2DTranspose so it is doing the opposite of a convolutional layer
            tf.keras.layers.Conv2DTranspose(256, (5,5), strides=(2,2), padding='same', use_bias=False,\
                kernel_initializer=WEIGHT_INIT),
            tf.keras.layers.BatchNormalization(),
            tf.keras.layers.LeakyReLU(),
            tf.keras.layers.Dropout(0.3),
            
            tf.keras.layers.Conv2DTranspose(128, (5,5), strides=(2,2), padding='same', use_bias=False,\
                kernel_initializer=WEIGHT_INIT),
            tf.keras.layers.BatchNormalization(),
            tf.keras.layers.LeakyReLU(),
            tf.keras.layers.Dropout(0.3),
            
            tf.keras.layers.Conv2DTranspose(64, (5,5), strides=(2,2), padding='same', use_bias=False,\
                kernel_initializer=WEIGHT_INIT),
            tf.keras.layers.BatchNormalization(),
            tf.keras.layers.LeakyReLU(),
            
            tf.keras.layers.Dense(3,activation='tanh', use_bias=False,\
                kernel_initializer=WEIGHT_INIT)
        ]
    )
    return model
# create an instance of the generator model defined
generator = make_generator()
print(generator.summary())

# tf.keras.utils.plot_model(
#     generator,
#     to_file='/tmp/gen_mdl.png',
#     show_shapes=True,
#     show_layer_names=True,
#     rankdir='TB',
# )

# random noise vector
noise = tf.random.normal([1,100])
# run the generator model with the noise vector as input
generated_image = generator(noise, training=False)
# display output
plt.imshow(generated_image[0, :, :, :])
print(generated_image.shape)

In [13]:
# This is helpful for quickly downloading output data and samples

FileLinks('.')

In [14]:
# Create the Discriminator

def make_discriminator():
    model = tf.keras.Sequential(
        [
            tf.keras.layers.Conv2D(64, (4,4), strides=(2,2), padding='same', input_shape=[64,64,3],\
                kernel_initializer=WEIGHT_INIT),
            tf.keras.layers.BatchNormalization(),
            tf.keras.layers.LeakyReLU(),
            tf.keras.layers.Dropout(0.25),
            
            tf.keras.layers.Conv2D(128, (4,4), strides=(2,2), padding='same',\
                kernel_initializer=WEIGHT_INIT),
            tf.keras.layers.BatchNormalization(),
            tf.keras.layers.LeakyReLU(),
            tf.keras.layers.Dropout(0.25),
            
            tf.keras.layers.Conv2D(256, (4,4), strides=(2,2), padding='same',\
                kernel_initializer=WEIGHT_INIT),
            tf.keras.layers.BatchNormalization(),
            tf.keras.layers.LeakyReLU(),
            tf.keras.layers.Dropout(0.25),
            
            # flatten input into 1-D and output a single a number from the last layer using sigmoid activation
            tf.keras.layers.Flatten(),
            tf.keras.layers.Dense(1, activation='sigmoid')
        ]
    )
    return model

discriminator = make_discriminator()
# tf.keras.utils.plot_model(
#     generator,
#     to_file='/tmp/dis_mdl.png',
#     show_shapes=True,
#     show_layer_names=True,
#     rankdir='TB',
# )
print(discriminator.summary())
decision = discriminator(generated_image)
print (decision)


Loss and optimizer:

To train the network and individual models of the discriminator and generator we will leverage cross entropy as a starting point. Please see notes below for declaration of references for source material on creating noisy labels (From GAN hacks) and general GAN network architecture (TF tutorial).

In [15]:
# Please see cell 17 from: https://www.kaggle.com/code/cmalla94/dcgan-generating-dog-images-with-tensorflow
# to get a better understanding of whats going on

# The source also refers to GAN hacks: 
# https://github.com/soumith/ganhacks

# Label smoothing -- technique from GAN hacks, instead of assigning 1/0 as class labels, we assign a random integer in range [0.7, 1.0] for positive class
# and [0.0, 0.3] for negative class

def smooth_positive_labels(y):
    return y - 0.3 + (np.random.random(y.shape) * 0.3)

def smooth_negative_labels(y):
    return y + np.random.random(y.shape) * 0.5

# Recomended to introduce some noise to the labels, so out of 1000 real labels, approximately 50 should be flipped to 0 (5%)
# randomly flip some labels
def noisy_labels(y, p_flip):
    # determine the number of labels to flip
    n_select = int(p_flip * y.shape[0].value)
    # choose labels to flip
    flip_ix = choice([i for i in range(y.shape[0].value)], size=n_select)
    # invert the labels in place
    y[flip_ix] = 1 - y[flip_ix]
    return y

Training loop

In [16]:
# This method returns a helper function to compute cross entropy loss
# code from tf dcgan tutorial
# https://www.tensorflow.org/tutorials/generative/dcgan
cross_entropy = tf.keras.losses.BinaryCrossentropy(from_logits=False)

# The Discriminator loss function

def discriminator_loss(real_output, fake_output):
    # tf.ones_like changes all values in the tensor to 1s
    # similarly tf.zeros_like changes all values in the tensor to 0
    # then apply label smoothing
    real_output_smooth = smooth_positive_labels(tf.ones_like(real_output))
    fake_output_smooth = smooth_negative_labels(tf.zeros_like(fake_output))
    real_loss = cross_entropy(real_output_smooth, real_output)
    fake_loss = cross_entropy(fake_output_smooth, fake_output)
    total_loss = real_loss + fake_loss
    return total_loss

# The Generator loss function

def generator_loss(fake_output):
    fake_output_smooth = smooth_negative_labels(tf.ones_like(fake_output))
    return cross_entropy(fake_output_smooth, fake_output)

# # optimizers -- Adam
generator_optimizer = tf.keras.optimizers.Adam(learning_rate=.0002,beta_1=.5)
discriminator_optimizer = tf.keras.optimizers.Adam(learning_rate=.0002,beta_1=.5)

In [17]:
# code from tf dcgan tutorial
# Variables needed for the training part
EPOCHS = 528
noise_dim = 100
num_examples_to_generate = 16

def train_step(images, G_loss_list, D_loss_list):
    noise = tf.random.normal([64, noise_dim])
    
    with tf.GradientTape() as gen_tape, tf.GradientTape() as disc_tape:
        # the following are the operations recorded onto the "tape"
        generated_images = generator(noise, training=True)
        
        real_output = discriminator(images, training=True)
        fake_output = discriminator(generated_images, training=True)
        gen_loss = generator_loss(fake_output)
        disc_loss = discriminator_loss(real_output, fake_output)
        
    G_loss_list.append(gen_loss.numpy())
    D_loss_list.append(disc_loss.numpy())
    # the following lines are taking the derivatives and applying gradients using Adam optimizer
    gradients_of_generator = gen_tape.gradient(gen_loss, generator.trainable_variables)
    gradients_of_discriminator = disc_tape.gradient(disc_loss, discriminator.trainable_variables)
    
    generator_optimizer.apply_gradients(zip(gradients_of_generator, generator.trainable_variables))
    discriminator_optimizer.apply_gradients(zip(gradients_of_discriminator, discriminator.trainable_variables))
    


In [21]:
def train(dataset, epochs):
    G_loss = []
    D_loss = []
    for epoch in range(epochs):
        start = time.time()
        for image_batch in dataset:
            train_step(image_batch, G_loss, D_loss)
            
        plot_loss(G_loss, D_loss, epoch)
        G_loss = []
        D_loss = []
        if (epoch % 10 == 0):
            display.clear_output(wait=True)
            generate_and_save_images(generator,
                                 epoch + 1,
                                 seed)
        print ('Time for epoch {} is {} sec'.format(epoch + 1, time.time()-start))
        
            
    # Generate after the final epoch
    print("Final Epoch")
    generate_and_save_images(generator,
                           epochs,
                           seed)

In [22]:
import time
def generate_and_save_images(model, epoch, test_input):
    # Notice `training` is set to False.
    # This is so all layers run in inference mode (batchnorm).
    predictions = model(test_input, training=False)    
    fig = plt.figure(figsize=(8,8))
    for i in range(predictions.shape[0]):
        plt.subplot(4, 4, i+1)
        plt.imshow((predictions[i, :, :, :]+1.)/2.)
        plt.axis('off')
    plt.savefig('image_at_epoch_{}.png'.format(epoch+1))
    plt.show()

In [23]:
# function by Nanashi
# https://www.kaggle.com/code/jesucristo/introducing-dcgan-dogs-images


def plot_loss (G_losses, D_losses, epoch):
    plt.figure(figsize=(10,5))
    plt.title("Generator and Discriminator Loss - EPOCH {}".format(epoch+1))
    plt.plot(G_losses,label="G")
    plt.plot(D_losses,label="D")
    plt.xlabel("iterations")
    plt.ylabel("Loss")
    plt.legend()
    plt.show()
    

In [24]:
%%time
print('Starting training')
train(ds, EPOCHS)


Save 10K photos to zip for submission

In [25]:
%%time
# SAVE TO ZIP FILE NAMED IMAGES.ZIP

z = zipfile.PyZipFile('images.zip', mode='w')

filename = 'gen_model.h5'
tf.keras.models.save_model(
    generator,
    filename,
    overwrite=True,
    include_optimizer=True,
    save_format=None
)

for k in range(10000):
    generated_image = generator(tf.random.normal([1, noise_dim]), training=False)
    f = str(k)+'.png'
    img = ((generated_image[0,:,:,:]+1.)/2.).numpy()
    tf.keras.preprocessing.image.save_img(
        f,
        img,
        scale=True
    )
    z.write(f); os.remove(f)
z.close()



In [27]:
FileLinks('.')


## Discussion

We've built two models that compete against one another. The end result is a network that can produce images of dogs (sort of...).

As epochs progressed, any intermediate images of dogs were seen to become increasingly more distinctive and pronounced. The training loss for both networks was also seen to evolve over time which produced lower loss scores.

Because the training time was very long, and my GPU quota on Kaggle was not sufficient, I cut my training epochs to 256 max. Further training would have helped produce better results. Additionally, I believe adding a 1024 neuron layer to the generator and discriminator would have been beneficial. Lastly, I am actually not sure how this would work, but a pre-trained dog classifier could have been used to fast track the progress.

## Conclusion

We have created a network that spawns hell-like images of anthropomorphically terrifying dogs. The network could certainly be improved to increase the realism of said hell dogs. Improvements would be additional layers to the discriminator and generator or using a pre-trained network like AlexNet. 