In [None]:
print()
import os
import json
with open('../paths.json', 'r') as f:
    paths = json.load(f)
    ptdata = paths["paths"][paths["cloud"]]["training_data"]
    pdatasets = paths["paths"][paths["cloud"]]["datasets"]

# Check if code is running in production environment
isProd = (os.path.isfile("../prod.flag"))
if(isProd):
    print("PROD environment")
else:
    print("DEV environment")

# Get last finished epoch number
epf = open(ptdata+"/TL2/last_epoch.num", "r")
last_epoch = int(epf.readlines()[0])
epf.close()
next_epoch = last_epoch + 1


print()
print("Last epoch : ", last_epoch)
print("Next epoch : ", next_epoch)

if(not(isProd)):
    my_batch_size = 32
    my_steps_per_epoch = 10
    my_validation_steps = 5
    my_validation_split = 0.02

# Loading libraries

In [None]:
# Order computing units (CPUs, GPUs) by bus ID
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
# set to -1 to use CPU only
#os.environ['CUDA_VISIBLE_DEVICES'] = '-1'

In [None]:
import multiprocessing

In [None]:
import numpy as np
print('numpy version : ', np.version.version)

In [None]:
import tensorflow as tf
tf.compat.v1.disable_eager_execution()
print('tensorflow version : ', tf.__version__)

In [None]:
import tensorflow.keras
print('tensorflow keras version : ', tensorflow.keras.__version__)
from tensorflow.keras.models import Sequential
from tensorflow.keras.models import load_model
from tensorflow.keras.layers import Activation
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import LearningRateScheduler
from tensorflow.compat.v1 import placeholder

In [None]:
import math
from matplotlib import pyplot as plt
import itertools
import matplotlib.pyplot as plt
%matplotlib inline
from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True

In [None]:
import pandas as pd
#print('pandas version : ', pd.show_versions())

# Processing units info

In [None]:
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())

# Loading and processing training and validation Dataframe

In [None]:
df = pd.read_json('yf labels/train.json')
#df

# Generating and augmenting training and validation images

In [None]:
if(isProd):
    my_batch_size = 64
    my_validation_split = 0.1
    my_steps_per_epoch = int(int(df.shape[0] * (1 - my_validation_split)) / my_batch_size)
    my_validation_steps = int(int(df.shape[0] * my_validation_split) / my_batch_size)
print("database size :", df.shape[0], "validation split :", my_validation_split, 
      "training size :", int(df.shape[0] * (1 - my_validation_split)), 
      "validation size :", int(df.shape[0] * my_validation_split))
print("training batch size :", my_batch_size, ", steps per epoch :", my_steps_per_epoch)
print("validation batch size :", my_batch_size, ", validation steps :", my_validation_steps)

In [None]:
print("Available data :", end="")
!ls $pdatasets/ava/images | wc -l

In [None]:
dataset_path = pdatasets+'/yf/images'
train_path = dataset_path
valid_path = dataset_path

In [None]:
# Generates batches of tensor image data with real-time data augmentation.
# rescaling pixels values, random horizontal flip activated, validation split percentage
img_gen = ImageDataGenerator(rescale=1/255., horizontal_flip=True, validation_split=my_validation_split)

In [None]:
# Takes the dataframe and the path to a directory and generates batches of augmented/normalized data for training.
train_batches = img_gen.flow_from_dataframe(dataframe=df, directory=train_path, class_mode="raw",
                                            x_col="image", y_col=["Bad Exposure","Bad White Balance",
                                                                  "Bad Saturation","Noise","Haze",
                                                                  "Undesired Blur","Bad Composition"],
                                            target_size=(224, 224), subset="training", shuffle=True, 
                                            batch_size=my_batch_size)

In [None]:
# Takes the dataframe and the path to a directory and generates batches of augmented/normalized data for validation.
valid_batches = img_gen.flow_from_dataframe(dataframe=df, directory=valid_path, class_mode="raw",
                                            x_col="image", y_col=["Bad Exposure","Bad White Balance",
                                                                  "Bad Saturation","Noise","Haze",
                                                                  "Undesired Blur","Bad Composition"],
                                              target_size=(224, 224), subset="validation", shuffle=True, 
                                              batch_size=my_batch_size)

In [None]:
# Plots images with labels within jupyter notebook
def plots(ims, figsize=(20,6), rows=1, interp=False, titles=None):
    if type(ims[0]) is np.ndarray:
        ims = np.array(ims).astype(np.uint8)
        if (ims.shape[-1] != 3):
            ims = ims.transpose((0,2,3,1))
    f = plt.figure(figsize=figsize)
    cols = len(ims)//rows if len(ims) % 2 == 0 else len(ims)//rows + 1
    for i in range(len(ims)):
        sp = f.add_subplot(rows, cols, i+1)
        sp.axis('Off')
        if titles is not None:
            sp.set_title(titles[i], fontsize=10)
        plt.imshow(ims[i], interpolation=None if interp else 'none')

In [None]:
#imgs, labels = next(train_batches)
#plots(imgs, titles=labels, rows=4)

In [None]:
#imgs, labels = next(train_batches)
#plots(imgs, titles=labels, rows=4)

# Loading last model weights

In [None]:
model = load_model(ptdata+'/TL2/model_weights/model-ep' + str(last_epoch) + '.hdf5')
print("loaded model model_weights/model-ep" + str(last_epoch) + ".hdf5")
cp_path = ptdata+"/TL2/training_checkpoints/cp.ckpt"
if(last_epoch > 0):
    model.load_weights(cp_path)
    print("loaded checkpoint ", cp_path)

# Checkpoint function to be called automatically after each epoch to save all training parameters
cp_callback = tensorflow.keras.callbacks.ModelCheckpoint(filepath=cp_path, save_weights_only=True, verbose=1)

# SGD optimizer

In [None]:
sgd = SGD(lr=0.001, momentum=0.9)

# Compiling the model for training

In [None]:
model.compile(loss='mean_squared_error', optimizer=sgd, metrics=['accuracy'])

# Starting training

In [None]:
model.fit(train_batches, steps_per_epoch=my_steps_per_epoch, 
          validation_data=valid_batches, validation_steps=my_validation_steps, 
          epochs=next_epoch, initial_epoch=last_epoch, verbose=1, callbacks=[cp_callback])

In [None]:
print("Epoch ", next_epoch, " Training complete")

# Saving model

In [None]:
model.save(ptdata+'/TL2/model_weights/model-ep' + str(next_epoch) + '.hdf5', include_optimizer=False)
print("model saved")