In [None]:
print()
import os
import json
with open('../paths.json', 'r') as f:
    paths = json.load(f)
    ptdata = paths["paths"][paths["cloud"]]["training_data"]
    pdatasets = paths["paths"][paths["cloud"]]["datasets"]

# Check if code is running in production environment
isProd = (os.path.isfile("../prod.flag"))
if(isProd):
    print("PROD environment")
else:
    print("DEV environment")

# Get last finished epoch number
epf = open(ptdata+"/TL1/last_epoch.num", "r")
last_epoch = int(epf.readlines()[0])
epf.close()
next_epoch = last_epoch + 1


print()
print("Last epoch : ", last_epoch)
print("Next epoch : ", next_epoch)

if(not(isProd)):
    my_batch_size = 32
    my_steps_per_epoch = 10
    my_validation_steps = 5
    my_validation_split = 0.02

# Loading libraries

In [None]:
# Order computing units (CPUs, GPUs) by bus ID
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
# set to -1 to use CPU only
#os.environ['CUDA_VISIBLE_DEVICES'] = '-1'

In [None]:
import multiprocessing

In [None]:
import numpy as np
print('numpy version : ', np.version.version)

In [None]:
import tensorflow as tf
tf.compat.v1.disable_eager_execution()
print('tensorflow version : ', tf.__version__)

In [None]:
import tensorflow.keras
print('tensorflow keras version : ', tensorflow.keras.__version__)
from tensorflow.keras.models import Sequential
from tensorflow.keras.models import load_model
from tensorflow.keras.layers import Activation
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import LearningRateScheduler
from tensorflow.compat.v1 import placeholder

In [None]:
import math
from matplotlib import pyplot as plt
import itertools
import matplotlib.pyplot as plt
%matplotlib inline
from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True

In [None]:
import pandas as pd
#print('pandas version : ', pd.show_versions())

# Processing units info

In [None]:
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())

# Loading and processing training and validation Dataframe

In [None]:
df = pd.read_json(pdatasets+'/ava/labels/train.json')
#df

# Generating and augmenting training and validation images

In [None]:
if(isProd):
    my_batch_size = 64
    my_validation_split = 0.02
    my_steps_per_epoch = int(int(df.shape[0] * (1 - my_validation_split)) / my_batch_size)
    my_validation_steps = int(int(df.shape[0] * my_validation_split) / my_batch_size)
print("database size :", df.shape[0], "validation split :", my_validation_split, 
      "training size :", int(df.shape[0] * (1 - my_validation_split)), 
      "validation size :", int(df.shape[0] * my_validation_split))
print("training batch size :", my_batch_size, ", steps per epoch :", my_steps_per_epoch)
print("validation batch size :", my_batch_size, ", validation steps :", my_validation_steps)

In [None]:
print("Available data :", end="")
!ls $pdatasets/ava/images | wc -l

In [None]:
dataset_path = pdatasets+'/ava/images'
train_path = dataset_path
valid_path = dataset_path

In [None]:
# Generates batches of tensor image data with real-time data augmentation.
# rescaling pixels values, random horizontal flip activated, validation split percentage
img_gen = ImageDataGenerator(rescale=1/255., horizontal_flip=True, validation_split=my_validation_split)

In [None]:
# Takes the dataframe and the path to a directory and generates batches of augmented/normalized data for training.
train_batches = img_gen.flow_from_dataframe(dataframe=df, directory=train_path, class_mode="raw", 
                                              x_col="image_id", y_col=["0","1","2","3","4","5","6","7","8","9"], 
                                              target_size=(256, 256), subset="training", shuffle=True, 
                                              batch_size=my_batch_size)

In [None]:
# Takes the dataframe and the path to a directory and generates batches of augmented/normalized data for validation.
valid_batches = img_gen.flow_from_dataframe(dataframe=df, directory=valid_path, class_mode="raw", 
                                              x_col="image_id", y_col=["0","1","2","3","4","5","6","7","8","9"], 
                                              target_size=(256, 256), subset="validation", shuffle=True, 
                                              batch_size=my_batch_size)

In [None]:
# Crops a random 224x224 pieces of the 256x256 images
def random_crop(img, random_crop_size):
    # Note: image_data_format is 'channel_last'
    assert img.shape[2] == 3
    height, width = img.shape[0], img.shape[1]
    dy, dx = random_crop_size
    x = np.random.randint(0, width - dx + 1)
    y = np.random.randint(0, height - dy + 1)
    return img[y:(y+dy), x:(x+dx), :]


def crop_generator(batches, crop_length):
    """Take as input a Keras ImageGen (Iterator) and generate random
    crops from the image batches generated by the original iterator.
    """
    while True:
        batch_x, batch_y = next(batches)
        batch_crops = np.zeros((batch_x.shape[0], crop_length, crop_length, 3))
        for i in range(batch_x.shape[0]):
            batch_crops[i] = random_crop(batch_x[i], (crop_length, crop_length))
        yield (batch_crops, batch_y)

In [None]:
# Image generators after cropping
train_crops = crop_generator(train_batches, 224)
valid_crops = crop_generator(valid_batches, 224)

In [None]:
# Plots images with labels within jupyter notebook
def plots(ims, figsize=(20,6), rows=1, interp=False, titles=None):
    if type(ims[0]) is np.ndarray:
        ims = np.array(ims).astype(np.uint8)
        if (ims.shape[-1] != 3):
            ims = ims.transpose((0,2,3,1))
    f = plt.figure(figsize=figsize)
    cols = len(ims)//rows if len(ims) % 2 == 0 else len(ims)//rows + 1
    for i in range(len(ims)):
        sp = f.add_subplot(rows, cols, i+1)
        sp.axis('Off')
        if titles is not None:
            sp.set_title(titles[i], fontsize=10)
        plt.imshow(ims[i], interpolation=None if interp else 'none')

In [None]:
#imgs, labels = next(train_crops)
#plots(imgs, titles=labels, rows=4)

In [None]:
#imgs, labels = next(valid_crops)
#plots(imgs, titles=labels, rows=4)

# Loading last model weights

In [None]:
model = load_model(ptdata+'/TL1/model_weights/model-ep' + str(last_epoch) + '.hdf5')
print("loaded model model_weights/model-ep" + str(last_epoch) + ".hdf5")
cp_path = ptdata+"/TL1/training_checkpoints/cp.ckpt"
if(last_epoch > 0):
    model.load_weights(cp_path)
    print("loaded checkpoint ", cp_path)

# Checkpoint function to be called automatically after each epoch to save all training parameters
cp_callback = tensorflow.keras.callbacks.ModelCheckpoint(filepath=cp_path, save_weights_only=True, verbose=1)

# Loss function

In [None]:
def tril_indices(n, k=0):
    """Return the indices for the lower-triangle of an (n, m) array.
    Works similarly to `np.tril_indices`
    Args:
      n: the row dimension of the arrays for which the returned indices will
        be valid.
      k: optional diagonal offset (see `np.tril` for details).
    Returns:
      inds: The indices for the triangle. The returned tuple contains two arrays,
        each with the indices along one dimension of the array.
    """
    m1 = tensorflow.tile(tensorflow.expand_dims(tensorflow.range(n), axis=0), [n, 1])
    m2 = tensorflow.tile(tensorflow.expand_dims(tensorflow.range(n), axis=1), [1, n])
    mask = (m1 - m2) >= -k
    ix1 = tensorflow.boolean_mask(m2, tensorflow.transpose(mask))
    ix2 = tensorflow.boolean_mask(m1, tensorflow.transpose(mask))
    return ix1, ix2

In [None]:
def ecdf(p):
    """Estimate the cumulative distribution function.
    The e.c.d.f. (empirical cumulative distribution function) F_n is a step
    function with jump 1/n at each observation (possibly with multiple jumps
    at one place if there are ties).
    For observations x= (x_1, x_2, ... x_n), F_n is the fraction of
    observations less or equal to t, i.e.,
    F_n(t) = #{x_i <= t} / n = 1/n \sum^{N}_{i=1} Indicator(x_i <= t).
    Args:
      p: a 2-D `Tensor` of observations of shape [batch_size, num_classes].
        Classes are assumed to be ordered.
    Returns:
      A 2-D `Tensor` of estimated ECDFs.
    """
    # if the following line produces a weird bug, replace it with `n = 10`
    n = p.get_shape().as_list()[1]
    indices = tril_indices(n)
    indices = tensorflow.transpose(tensorflow.stack([indices[1], indices[0]]))
    ones = tensorflow.ones([n * (n + 1) / 2])
    triang = tensorflow.scatter_nd(indices, ones, [n, n])
    return tensorflow.linalg.matmul(tensorflow.cast(p, tensorflow.float32), 
                                    tensorflow.cast(triang, tensorflow.float32))

In [None]:
def emd_loss(p, p_hat, r=2, scope=None):
    """Compute the Earth Mover's Distance loss.
    Hou, Le, Chen-Ping Yu, and Dimitris Samaras. "Squared Earth Mover's
    Distance-based Loss for Training Deep Neural Networks." arXiv preprint
    arXiv:1611.05916 (2016).
    Args:
      p: a 2-D `Tensor` of the ground truth probability mass functions.
      p_hat: a 2-D `Tensor` of the estimated p.m.f.-s
      r: a constant for the r-norm.
      scope: optional name scope.
    `p` and `p_hat` are assumed to have equal mass as \sum^{N}_{i=1} p_i =
    \sum^{N}_{i=1} p_hat_i
    Returns:
      A 0-D `Tensor` of r-normed EMD loss.
    """
    with tensorflow.keras.backend.name_scope('EmdLoss'):
        ecdf_p = ecdf(p)
        ecdf_p_hat = ecdf(p_hat)
        emd = tensorflow.reduce_mean(tensorflow.pow(tensorflow.abs(ecdf_p - ecdf_p_hat), r), axis=-1)
        emd = tensorflow.pow(emd, 1 / r)
        return tensorflow.reduce_mean(emd)

# SGD optimizer with an exponentially decaying learning rate

In [None]:
# Schedules an exponential decay of the learning rate
# Initial learning rate lr = 10^-6
# Number of training steps after which the exponential decay is applied
# Decay Rate = 1 - Decay Factor
# Mementum = 0.9
lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
            initial_learning_rate=0.000001, 
            decay_steps=(my_steps_per_epoch*10), 
            decay_rate=0.05)
opt = tf.keras.optimizers.SGD(learning_rate=lr_schedule, momentum=0.9)

# Compiling the model for training

In [None]:
model.compile(optimizer=tensorflow.keras.optimizers.get(opt), loss=emd_loss, metrics=["accuracy"])

# Starting training

In [None]:
model.fit(train_crops, steps_per_epoch=my_steps_per_epoch, 
          validation_data=valid_crops, validation_steps=my_validation_steps, 
          epochs=next_epoch, initial_epoch=last_epoch, verbose=1, callbacks=[cp_callback])

In [None]:
print("Epoch ", next_epoch, " Training complete")

# Saving model

In [None]:
model.save(ptdata+'/TL1/model_weights/model-ep' + str(next_epoch) + '.hdf5', include_optimizer=False)
print("model saved")