# CycleGAN for FER - Keras Version

By Fatemeh Ghezloo and Michelle Lin


## Usage
The purpose of this notebook is to train a CycleGAN for Facial Expression Recognition Task on FER2013 dataset. We use this network to generate more sample images to supplement minority classes in our dataset. Generated images by this model are saved and then loaded as input to our CNN model for the classification task.

To run this notebook, please make sure to first download the FER2013 Kaggle dataset. Because this dataset is part of a challenge, we are not allowed to share the data.


Most of the code is implemented by us but it is based off of this [Github Repository](https://github.com/ivadym/FER).



## Initial setup

Run the following sections to make sure that you have imported all necessary packages, can read from/write to your Google Drive, and have copied all data to the GPU.

In [1]:
from google.colab import drive
drive.mount('/gdrive')
!ls /gdrive

Mounted at /gdrive
MyDrive


In [17]:
import os
BASE_PATH = '/gdrive/My Drive/colab_files/final_project/'
if not os.path.exists(BASE_PATH):
    os.makedirs(BASE_PATH)
DATA_PATH = '/content/'

if not os.path.exists(os.path.join(DATA_PATH, 'fer2013.csv')):
    os.chdir(BASE_PATH)
    # !cp fer2013.csv /content

!ls

fer2013.csv  sample_data


In [5]:
import os
import datetime
import numpy as np
import pandas as pd
import imageio

from torch.utils.data import DataLoader
from io import BytesIO
from tensorflow.python.lib.io import file_io
from keras.models import Model
from keras.optimizers import Adam
from keras.layers import Input, Dropout, Concatenate, Activation, UpSampling2D, Conv2D, LeakyReLU
import multiprocessing
num_workers = multiprocessing.cpu_count()


## Helper Function

In [12]:
from keras.engine import Layer, InputSpec
from keras import initializers, regularizers, constraints
from keras import backend as K

class InstanceNormalization(Layer):
    def __init__(self,
                 axis=None,
                 epsilon=1e-3,
                 center=True,
                 scale=True,
                 beta_initializer='zeros',
                 gamma_initializer='ones',
                 beta_regularizer=None,
                 gamma_regularizer=None,
                 beta_constraint=None,
                 gamma_constraint=None,
                 **kwargs):
        super(InstanceNormalization, self).__init__(**kwargs)
        self.supports_masking = True
        self.axis = axis
        self.epsilon = epsilon
        self.center = center
        self.scale = scale
        self.beta_initializer = initializers.get(beta_initializer)
        self.gamma_initializer = initializers.get(gamma_initializer)
        self.beta_regularizer = regularizers.get(beta_regularizer)
        self.gamma_regularizer = regularizers.get(gamma_regularizer)
        self.beta_constraint = constraints.get(beta_constraint)
        self.gamma_constraint = constraints.get(gamma_constraint)

    def build(self, input_shape):
        ndim = len(input_shape)
        if self.axis == 0:
            raise ValueError('Axis cannot be zero')

        if (self.axis is not None) and (ndim == 2):
            raise ValueError('Cannot specify axis for rank 1 tensor')

        self.input_spec = InputSpec(ndim=ndim)

        if self.axis is None:
            shape = (1,)
        else:
            shape = (input_shape[self.axis],)

        if self.scale:
            self.gamma = self.add_weight(shape=shape,
                                         name='gamma',
                                         initializer=self.gamma_initializer,
                                         regularizer=self.gamma_regularizer,
                                         constraint=self.gamma_constraint)
        else:
            self.gamma = None
        if self.center:
            self.beta = self.add_weight(shape=shape,
                                        name='beta',
                                        initializer=self.beta_initializer,
                                        regularizer=self.beta_regularizer,
                                        constraint=self.beta_constraint)
        else:
            self.beta = None
        self.built = True

    def call(self, inputs, training=None):
        input_shape = K.int_shape(inputs)
        reduction_axes = list(range(0, len(input_shape)))

        if (self.axis is not None):
            del reduction_axes[self.axis]

        del reduction_axes[0]

        mean = K.mean(inputs, reduction_axes, keepdims=True)
        stddev = K.std(inputs, reduction_axes, keepdims=True) + self.epsilon
        normed = (inputs - mean) / stddev

        broadcast_shape = [1] * len(input_shape)
        if self.axis is not None:
            broadcast_shape[self.axis] = input_shape[self.axis]

        if self.scale:
            broadcast_gamma = K.reshape(self.gamma, broadcast_shape)
            normed = normed * broadcast_gamma
        if self.center:
            broadcast_beta = K.reshape(self.beta, broadcast_shape)
            normed = normed + broadcast_beta
        return normed

    def get_config(self):
        config = {
            'axis': self.axis,
            'epsilon': self.epsilon,
            'center': self.center,
            'scale': self.scale,
            'beta_initializer': initializers.serialize(self.beta_initializer),
            'gamma_initializer': initializers.serialize(self.gamma_initializer),
            'beta_regularizer': regularizers.serialize(self.beta_regularizer),
            'gamma_regularizer': regularizers.serialize(self.gamma_regularizer),
            'beta_constraint': constraints.serialize(self.beta_constraint),
            'gamma_constraint': constraints.serialize(self.gamma_constraint)
        }
        base_config = super(InstanceNormalization, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))


##Input Preprocessing

In this section we split the fer2013.csv file into different npy files for training and testing sets of reference class(neutral) and target class (disgust and sad).

Also, other functions to preprocess data and augment them by applying some transforms are provided for preventing the models to memorize the input.

In [6]:
import csv
import os
import numpy as np
import h5py


emotionsToInd = {"Angry":0, "Disgust":1, "Fear":2, "Happy":3, "Sad":4, "Surprise":5, "Neutral":6}
indToEmotions = {emotionsToInd[key]:key for key in emotionsToInd}
setSizes = {"Training":[], "PublicTest":[], "PrivateTest":[]}
file = BASE_PATH + 'fer2013.csv'

neutral_training = open(BASE_PATH + 'neutral_training.npy', mode='wb')

neutral_test = open(BASE_PATH + 'neutral_test.npy', mode='wb')

disgust_training = open(BASE_PATH + 'disgust_training.npy', mode='wb')

disgust_test = open(BASE_PATH + 'disgust_test.npy', mode='wb')

sad_training = open(BASE_PATH + 'sad_training.npy', mode='wb')

sad_test = open(BASE_PATH + 'sad_test.npy', mode='wb')

I1 = []
I2 = []
I3 = []
I4 = []
I5 = []
I6 = []

with open(file,'r') as csvin:
  data = csv.reader(csvin)
  for row in data:
    if row[-1] == 'Training':
      if int(row[0]) == 6:
        temp_list = []
        for pixel in row[1].split( ):
          temp_list.append(int(pixel))
        I1.append(np.asarray(temp_list))
      if int(row[0]) == 1:
        temp_list = []
        for pixel in row[1].split( ):
          temp_list.append(int(pixel))
        I2.append(np.asarray(temp_list))
      if int(row[0]) == 4:
        temp_list = []
        for pixel in row[1].split( ):
          temp_list.append(int(pixel))
        I5.append(np.asarray(temp_list))

    if row[-1] == 'PublicTest':
      if int(row[0]) == 6:
        temp_list = []
        for pixel in row[1].split( ):
          temp_list.append(int(pixel))
        I3.append(np.asarray(temp_list))
      if int(row[0]) == 1:
        temp_list = []
        for pixel in row[1].split( ):
          temp_list.append(int(pixel))
        I4.append(np.asarray(temp_list))
      if int(row[0]) == 4:
        temp_list = []
        for pixel in row[1].split( ):
          temp_list.append(int(pixel))
        I6.append(np.asarray(temp_list))
np.save(neutral_training, I1)
np.save(disgust_training, I2)
np.save(neutral_test, I3)
np.save(disgust_test, I4)
np.save(sad_training, I5)
np.save(sad_test, I6)

In [18]:
# PARAMETERS 

# Input shape
img_height, img_width = 48, 48 # FER-2013
channels    = 1
img_shape   = (img_height, img_width, channels)

# Loss weights
lambda_cycle    = 10.0  # Cycle-consistency loss

optimizer = Adam(0.0002, 0.5)

# Training
epochs          = 30000
batch_size      = 1
save_interval   = 50

# DATASETS 
# Folder where logs and models are stored
folder = BASE_PATH + "logs"

# Data paths
dataset_A = BASE_PATH + "neutral_training.npy"
dataset_B = BASE_PATH + "disgust_training.npy"

image_A = BASE_PATH + "neutral_test.npy"
image_B = BASE_PATH + "disgust_test.npy"


# DATA PREPARATION 
def preprocess_input(x): 
    x = x/127.5
    x = x-1
    return x

# Function that reads the images from the csv file
def load_data(dataset):
    loaded_images = np.load(dataset)
    loaded_images = loaded_images.reshape((len(loaded_images), img_height, img_width,1))
    images = preprocess_input(loaded_images)

    return images


def augmented_data(loaded_images):
    images = np.empty((len(loaded_images), img_height, img_width, channels))
    i = 0
    for single_image in loaded_images:
        if np.random.random() > 0.5:
            single_image = np.fliplr(single_image)

        images[i, :, :, :] = single_image
        i += 1

    shuffled_indexes = np.random.permutation(len(images))
    images = images[shuffled_indexes]

    return images

##Generator and Discriminator

Generator and Discriminator network structures are defined in this section.**bold text**

In [19]:
# GENERATOR
def generator():
    """U-Net Generator"""

    def conv_2d_v0(layer_input, filters, f_size = 4):
        """Layers used during downsampling"""
        d = Conv2D(filters, kernel_size = f_size, strides = 2, padding = "same")(layer_input)
        d = LeakyReLU(alpha=0.2)(d)
        d = InstanceNormalization()(d)
        return d

    def deconv_2d_v0(layer_input, skip_input, filters, f_size = 4, dropout_rate = 0):
        """Layers used during upsampling"""
        u = UpSampling2D(size = 2)(layer_input)
        u = Conv2D(filters, kernel_size = f_size, strides = 1, padding = "same", activation = "relu")(u)
        if dropout_rate:
            u = Dropout(dropout_rate)(u)
        u = InstanceNormalization()(u)
        u = Concatenate()([u, skip_input])
        return u

    # Image input
    d0 = Input(shape = img_shape)

    # Downsampling
    d1 = conv_2d_v0(d0, 32)
    d2 = conv_2d_v0(d1, 32*2)
    d3 = conv_2d_v0(d2, 32*4)
    d4 = conv_2d_v0(d3, 32*8)

    # Upsampling
    u1 = deconv_2d_v0(d4, d3, 32*4)
    u2 = deconv_2d_v0(u1, d2, 32*2)
    u3 = deconv_2d_v0(u2, d1, 32)

    u4 = UpSampling2D(size = 2)(u3)
    output_img = Conv2D(channels, kernel_size = 4, strides = 1, padding = "same", activation = "tanh")(u4)

    return Model(d0, output_img)

# DISCRIMINATOR 

def discrimiantor():
    
    def discriminator_layer(layer_input, filters, f_size = 4, normalization = True):
        """Discriminator layer"""
        d = Conv2D(filters, kernel_size = f_size, strides = 2, padding = "same")(layer_input)
        d = LeakyReLU(alpha = 0.2)(d)
        if normalization:
            d = InstanceNormalization()(d)
        return d

    img = Input(shape = img_shape)

    d1 = discriminator_layer(img, 64, normalization = False)
    d2 = discriminator_layer(d1, 64*2)
    d3 = discriminator_layer(d2, 64*4)
    d4 = discriminator_layer(d3, 64*8)

    validity = Conv2D(1, kernel_size = 4, strides = 1, padding = "same")(d4)

    return Model(img, validity)

##Building and Compiling

This section provides the code to define, build and compile two generator models and two discriminator models and loss functions of the model.*italicized text*

In [14]:
# Build and compile the discriminators
d_A = discrimiantor()
d_B = discrimiantor()
d_A.compile(
	loss 		= "mse",
    optimizer 	= optimizer,
    metrics 	= ["accuracy"])
d_B.compile(
	loss 		= "mse",
    optimizer 	= optimizer,
    metrics 	= ["accuracy"])

# Build and compile the generators
g_AB = generator()
g_BA = generator()
g_AB.compile(
	loss 		= "binary_crossentropy",
	optimizer 	= optimizer)
g_BA.compile(
	loss 		="binary_crossentropy",
	optimizer 	= optimizer)

# Input images from both domains
img_A = Input(shape = img_shape)
img_B = Input(shape = img_shape)

# Translate images to the other domain
fake_B = g_AB(img_A)
fake_A = g_BA(img_B)

# Translate images back to original domain
reconstr_A = g_BA(fake_B)
reconstr_B = g_AB(fake_A)

# For the combined model we will only train the generators (updates discriminators weights only when discriminator.fit() is called but not when gan.fit() is called)
d_A.trainable = False
d_B.trainable = False

# Discriminators determines validity of translated images
valid_A = d_A(fake_A)
valid_B = d_B(fake_B)
combined = Model(
    [img_A, img_B],
    [valid_A, valid_B, fake_B, fake_A, reconstr_A, reconstr_B])
combined.compile(
    loss 			= ["mse", "mse", "mae", "mae", "mae", "mae"],
    loss_weights	= [1, 1, 1, 1, lambda_cycle, lambda_cycle], optimizer = optimizer)


##Save Generated Images

Every 10 epoch, we test our model, and save the generated images. Here, a function is provided to save newly generated images. This also writes image pixels to a npy file to be loaded for CNN later.

In [20]:
from skimage import img_as_ubyte

def check_file(filepath):
    if not os.path.exists(os.path.dirname(filepath)):
        os.makedirs(os.path.dirname(filepath))

def save_imgs(epoch):
    I = []
    imges_A = load_data(image_A)
    imges_B = load_data(image_B)

    check_file(folder + "/epoch_%s/" % (epoch))
    filepath = folder + "/epoch_%s/disgust_new.npy" % (epoch)
    disgust_new = open(filepath, mode='wb')
    
    titles = ["Original_A", "Translated_A", "Reconstructed_A", "Original_B", "Translated_B", "Reconstructed_B"]
    n_iterations = min(len(imges_A), len(imges_B)) // 1
    idx = 0
    for it in range (0, n_iterations):
      imgs_A = imges_A[idx : idx + 1]
      imgs_B = imges_B[idx : idx + 1]

    # Translate images to the other domain
      fake_B = g_AB.predict(imgs_A)
      fake_A = g_BA.predict(imgs_B)
    # Translate back to original domain
      reconstr_A = g_BA.predict(fake_B)
      reconstr_B = g_AB.predict(fake_A)

      gen_imgs = [np.squeeze(imgs_A, axis = 0), np.squeeze(fake_B, axis = 0), np.squeeze(reconstr_A, axis = 0),
        np.squeeze(imgs_B, axis = 0), np.squeeze(fake_A, axis = 0), np.squeeze(reconstr_B, axis = 0)]

      I.append(gen_imgs[1])
      


      for i in range(0, len(gen_imgs)):
          imageio.imwrite(folder + "/epoch_%s/%d_%s.png" % (epoch, idx, titles[i]), img_as_ubyte(gen_imgs[i]))

      idx = idx + 1

    np.save(disgust_new,I)


## Training

Here, input data is loaded, augmented, and then fed to networks to be trained. Every 10 epochs, all the losses and outputs of the model is saved using the save function provided earlier.

In [None]:
start_time = datetime.datetime.now()

# Sample a batch of images from both domains
imgs_A = load_data(dataset_A)
imgs_B = load_data(dataset_B)

discriminator_loss = []
generator_loss = []

for epoch in range(epochs):

    imgs_A = augmented_data(imgs_A)
    imgs_B = augmented_data(imgs_B)


    # Translate images to opposite domain
    fake_B = g_AB.predict(imgs_A)
    fake_A = g_BA.predict(imgs_B)

    # The generators want the discriminators to label the translated images as real
    valid = np.ones((batch_size,) + (3, 3, 1))
    fake = np.zeros((batch_size,) + (3, 3, 1))

    n_iterations = min(len(imgs_A), len(imgs_B)) // batch_size
    i = 0
    for it in range (0, n_iterations):
        imgs_A_p = imgs_A[i : i+batch_size]
        fake_A_p = fake_A[i : i+batch_size]
        imgs_B_p = imgs_B[i : i+batch_size]
        fake_B_p = fake_B[i : i+batch_size]

        # TRAIN DISCRIMINATORS 
        dA_loss_real = d_A.train_on_batch(imgs_A_p, valid)    # original images = real
        dA_loss_fake = d_A.train_on_batch(fake_A_p, fake)     # translated images = Fake
        
        dB_loss_real = d_B.train_on_batch(imgs_B_p, valid)
        dB_loss_fake = d_B.train_on_batch(fake_B_p, fake)

        # TRAIN GENERATORS
        g_loss = combined.train_on_batch([imgs_A_p, imgs_B_p], [valid, valid, imgs_A_p, imgs_B_p, imgs_A_p, imgs_B_p])

        i += batch_size

    # Disciminator losses
    dA_loss = 0.5 * np.add(dA_loss_real, dA_loss_fake)
    dB_loss = 0.5 * np.add(dB_loss_real, dB_loss_fake)
    d_loss = 0.5 * np.add(dA_loss, dB_loss)

    elapsed_time = datetime.datetime.now() - start_time
    
    # Plot the progress
    print ("Epoch: %d Time: %s \n D-Loss %s \n G-Loss %s " % (epoch, elapsed_time, d_loss, g_loss))

    # Register the losses
    discriminator_loss.append(d_loss)
    generator_loss.append(g_loss)

    # If at save interval => save generated image samples
    if epoch % save_interval == 0:
        save_imgs(epoch)

        np.save(folder + "/epoch_%s/d_loss_%s.npy" % (epoch, epoch), discriminator_loss)
        np.save(folder + "/epoch_%s/g_loss_%s.npy" % (epoch, epoch), generator_loss)

        d_A.save_weights(folder + "/epoch_%s/weights_d_A_%s.h5" % (epoch, epoch))
        d_B.save_weights(folder + "/epoch_%s/weights_d_B_%s.h5" % (epoch, epoch))
        g_AB.save_weights(folder + "/epoch_%s/weights_g_AB_%s.h5" % (epoch, epoch))
        g_BA.save_weights(folder + "/epoch_%s/weights_g_BA_%s.h5" % (epoch, epoch))

Epoch: 0 Time: 0:00:51.616839 
 D-Loss [0.01600469 1.        ] 
 G-Loss [5.38951301574707, 0.6061256527900696, 0.015671439468860626, 0.9643536806106567, 0.719403862953186, 0.11765946447849274, 0.19073641300201416] 
Epoch: 1 Time: 0:03:22.308271 
 D-Loss [0.11641598 1.        ] 
 G-Loss [4.914385795593262, 0.07189420610666275, 0.4109202027320862, 0.7062171101570129, 0.7726929187774658, 0.15506401658058167, 0.1402021199464798] 
Epoch: 2 Time: 0:03:56.972067 
 D-Loss [0.02078542 1.        ] 
 G-Loss [5.050339698791504, 0.007401918526738882, 0.0850697010755539, 1.0164365768432617, 0.6182152628898621, 0.19990263879299164, 0.1324189454317093] 
Epoch: 3 Time: 0:04:32.047419 
 D-Loss [0.02326371 1.        ] 
 G-Loss [4.114402770996094, 0.039583899080753326, 0.011753393337130547, 0.5270629525184631, 0.5846845507621765, 0.19258248805999756, 0.10254929214715958] 
Epoch: 4 Time: 0:05:06.598413 
 D-Loss [0.01210884 1.        ] 
 G-Loss [3.473181962966919, 0.007980958558619022, 0.10129909217357635, 