# Init

In [1]:
%cd /kaggle/input/leaves
!ls

In [2]:
import tensorflow as tf
import numpy as np
import os
import random
import pandas as pd
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
from sklearn.metrics import confusion_matrix
from PIL import Image

from tensorflow.keras.preprocessing.image import ImageDataGenerator
import cv2 as cv

import skimage as ski
import skimage.filters as skif
import skimage.color as skic


tfk = tf.keras
tfkl = tf.keras.layers
print(tf.__version__)

In [3]:
# Random seed for reproducibility
seed = 42

random.seed(seed)
os.environ['PYTHONHASHSEED'] = str(seed)
np.random.seed(seed)
tf.random.set_seed(seed)
tf.compat.v1.set_random_seed(seed)

In [4]:
# Dataset folders 
dataset_dir = '/kaggle/input/leaves'
training_dir = os.path.join(dataset_dir, 'training')

# Data Augmentation + preprox
## _(drop red, substitute with threshold)_

In [5]:
import skimage.filters as skif
# from PIL.Image import point

def preprocess(im):
    im = im.astype('uint8')
    
    im_thresh = cv.cvtColor(im, cv.COLOR_BGR2GRAY)
    im_thresh = cv.adaptiveThreshold(im_thresh,255,cv.ADAPTIVE_THRESH_MEAN_C, cv.THRESH_BINARY,11,2)

    im_thresh = 255-im_thresh

    im = im.astype('float64')
    im[:,:,0] = im_thresh
    
#     print(im.shape)
    
    im /= 255
    return im

In [6]:
# from PIL.Image import point

def preprocess_skif(im):
    im = im.astype('uint8')
    
    im_thresh = skic.rgb2gray(im)
    im_thresh = skif.hessian(im_thresh, mode='reflect', sigmas=[1], gamma=10)

    im_thresh = 1-im_thresh

    im = im.astype('float64')
    im/=255
    im[:,:,0] = im_thresh
    
#     print(im.shape)
    
#     im /= 255
    return im

In [7]:
tresh_data_gen = ImageDataGenerator(validation_split=0.2,
                                    rotation_range=30,
                                    zoom_range=0.3,
                                    horizontal_flip = True,
                                    vertical_flip = True,
                                    brightness_range = (0.7, 1.3),
                                    fill_mode='constant',
                                    preprocessing_function = preprocess,
#                                     preprocessing_function = preprocess_skif,
                                    #rescale=1/255.,       #rescale done after adaptive thresholding
                                    dtype='uint8')
valid_data_gen = ImageDataGenerator(validation_split=0.2,
                                   dtype='uint8',
                                   preprocessing_function = preprocess)
#                                    preprocessing_function = preprocess_skif)


#augmented
train_gen2 = tresh_data_gen.flow_from_directory(directory = training_dir,
                                                 subset = 'training',
                                                 target_size=(256,256),
                                                 color_mode='rgb',
                                                 classes=None,
                                                 class_mode='categorical',
                                                 batch_size=8,
                                                 shuffle=True,
                                                 seed=seed)
valid_gen = valid_data_gen.flow_from_directory(directory = training_dir,
                                                 subset = 'validation',
                                                 target_size=(256,256),
                                                 color_mode='rgb',
                                                 classes=None,
                                                 class_mode='categorical',
                                                 batch_size=8,
                                                 shuffle=False,
                                                 seed=seed)


In [8]:
labels = (train_gen2.class_indices)
inv_labels = {v: k for k, v in labels.items()}
print(inv_labels)

In [9]:
print(inv_labels[3])

# Data Visualization
## _from generator_

In [17]:
num_row = 2
num_col = 7
fig, axes = plt.subplots(num_row, num_col, figsize=(8*num_row,num_col))


for i in range(num_row*num_col):
  if i < (num_row*num_col):
    
    img_batch, label = train_gen2.next()   #label is useless
#     idx = (train_gen2.batch_index - 1) * train_gen2.batch_size
#     print(train_gen2.filenames[idx : idx + train_gen2.batch_size])
    img = img_batch[0]    
    img[:,:,1]=0
    img[:,:,2]=0
#     img[:,:,1]=img[:,:,0]
#     img[:,:,2]=img[:,:,0]
        
    ax = axes[i//num_col, i%num_col]
    ax.imshow(np.array(img))

    ax.set_title('{}'.format(label[0]))
plt.tight_layout()
plt.show()

## from Folder


In [None]:
class_dir = os.path.join(training_dir, inv_labels[0])
print(class_dir)
class_imgs = next(os.walk(class_dir))[2]
class_img = class_imgs[0]
print(os.path.join(class_dir, class_img))
img = cv.imread(os.path.join(class_dir, class_img))
img.astype('uint8')
img = cv.cvtColor(img, cv.COLOR_BGR2GRAY)

fig, axes = plt.subplots(1,2)
axes[0].imshow(img, 'gray')

skif.try_all_threshold(img)

In [None]:
fig, axes = plt.subplots(num_row, num_col, figsize=(8*num_row,num_col))

for i in range(num_row*num_col):
  if i < (num_row*num_col):
    
    img_batch, label = valid_gen.next()   #label is useless
    img = img_batch[0]    
    
    ax = axes[i//num_col, i%num_col]
    ax.imshow(np.array(img))

    ax.set_title('{}--{}'.format(inv_labels[i], img.shape))
plt.tight_layout()
plt.show()

# __Model__

In [None]:
input_shape = (256, 256, 3)
epochs = 100

In [None]:
# Model used for the exercise:
# (Conv + ReLU + MaxPool) x 5 + FC x 2
def build_model(input_shape):

    # Build the neural network layer by layer
    input_layer = tfkl.Input(shape=input_shape, name='Input')

    conv0 = tfkl.Conv2D(
        filters=8,
        kernel_size=(3, 3),
        strides = (1, 1),
        padding = 'same',
        activation = 'relu',
        kernel_initializer = tfk.initializers.GlorotUniform(seed)
    )(input_layer)
    pool0 = tfkl.MaxPooling2D(
        pool_size = (2, 2)
    )(conv0)

    conv1 = tfkl.Conv2D(
        filters=16,
        kernel_size=(3, 3),
        strides = (1, 1),
        padding = 'same',
        activation = 'relu',
        kernel_initializer = tfk.initializers.GlorotUniform(seed)
    )(pool0)
    pool1 = tfkl.MaxPooling2D(
        pool_size = (2, 2)
    )(conv1)

    conv2 = tfkl.Conv2D(
        filters=32,
        kernel_size=(3, 3),
        strides = (1, 1),
        padding = 'same',
        activation = 'relu',
        kernel_initializer = tfk.initializers.GlorotUniform(seed)
    )(pool1)
    pool2 = tfkl.MaxPooling2D(
        pool_size = (2, 2)
    )(conv2)

    conv3 = tfkl.Conv2D(
        filters=64,
        kernel_size=(3, 3),
        strides = (1, 1),
        padding = 'same',
        activation = 'relu',
        kernel_initializer = tfk.initializers.GlorotUniform(seed)
    )(pool2)
    pool3 = tfkl.MaxPooling2D(
        pool_size = (2, 2)
    )(conv3)

    conv4 = tfkl.Conv2D(
        filters=128,
        kernel_size=(3, 3),
        strides = (1, 1),
        padding = 'same',
        activation = 'relu',
        kernel_initializer = tfk.initializers.GlorotUniform(seed)
    )(pool3)
    pool4 = tfkl.MaxPooling2D(
        pool_size = (2, 2)
    )(conv4)

    # conv5 = tfkl.Conv2D(
    #     filters=256,
    #     kernel_size=(3, 3),
    #     strides = (1, 1),
    #     padding = 'same',
    #     activation = 'relu',
    #     kernel_initializer = tfk.initializers.GlorotUniform(seed)
    # )(pool4)
    # pool5 = tfkl.MaxPooling2D(
    #     pool_size = (2, 2)
    # )(conv5)

    # flattening_layer = tfkl.Flatten(name='Flatten')(pool5)

    gap = tfkl.GlobalAveragePooling2D()(pool4)
    gap = tfkl.Dropout(0.3, seed=seed)(gap)

    classifier_layer = tfkl.Dense(units=64, name='Classifier', kernel_initializer=tfk.initializers.GlorotUniform(seed), activation='relu')(gap)
    classifier_layer = tfkl.Dropout(0.3, seed=seed)(classifier_layer)
    output_layer = tfkl.Dense(units=14, activation='softmax', kernel_initializer=tfk.initializers.GlorotUniform(seed), name='Output')(classifier_layer)

    # Connect input and output through the Model class
    model = tfk.Model(inputs=input_layer, outputs=output_layer, name='model')

    # Compile the model
    model.compile(loss=tfk.losses.CategoricalCrossentropy(), optimizer=tfk.optimizers.Adam(), metrics='accuracy')

    # Return the model
    return model

# Training

In [None]:
# Utility function to create folders and callbacks for training
from datetime import datetime

def create_folders_and_callbacks(model_name):

  exps_dir = os.path.join('dataset_augmentation_experiment')
  if not os.path.exists(exps_dir):
      os.makedirs(exps_dir)

  now = datetime.now().strftime('%b%d_%H-%M-%S')

  exp_dir = os.path.join(exps_dir, model_name + '_' + str(now))
  if not os.path.exists(exp_dir):
      os.makedirs(exp_dir)
      
  callbacks = []

  # Model checkpoint
  # ----------------
  ckpt_dir = os.path.join(exp_dir, 'ckpts')
  if not os.path.exists(ckpt_dir):
      os.makedirs(ckpt_dir)

  ckpt_callback = tf.keras.callbacks.ModelCheckpoint(filepath=os.path.join(ckpt_dir, 'cp.ckpt'), 
                                                     save_weights_only=False, # True to save only weights
                                                     save_best_only=True) # True to save only the best epoch 
  callbacks.append(ckpt_callback)

  # Visualize Learning on Tensorboard
  # ---------------------------------
  tb_dir = os.path.join(exp_dir, 'tb_logs')
  if not os.path.exists(tb_dir):
      os.makedirs(tb_dir)
      
  # By default shows losses and metrics for both training and validation
  tb_callback = tf.keras.callbacks.TensorBoard(log_dir=tb_dir, 
                                               profile_batch=0,
                                               histogram_freq=1)  # if > 0 (epochs) shows weights histograms
  callbacks.append(tb_callback)

  # Early Stopping
  # --------------
  es_callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=4, restore_best_weights=True)
  callbacks.append(es_callback)

  return callbacks

In [None]:
# Build model (for augmentation training)
model = build_model(input_shape)
model.summary()

In [None]:
%cd /kaggle/working
%mkdir models
%cd models

In [None]:
# tf.get_logger().setLevel('WARNING') #  if you want to suppress only INFOs
tf.get_logger().setLevel('ERROR') #  if you want to suppress both WARNINGs and INFOs

# Create folders and callbacks and fit
aug_callbacks = create_folders_and_callbacks(model_name='CNN_Aug_2')

# Train the model
history = model.fit(
    x = train_gen2,   #augmented!
    epochs = epochs,
    validation_data = valid_gen,   #not augmented
    callbacks = aug_callbacks,
).history

In [None]:
# Save best epoch model
model.save("data_augmentation_experiments/CNN_Aug_Best")

# Evaluation

In [None]:
# model_aug = tfk.models.load_model("data_augmentation_experiments/CNN_Aug_Best")
model_aug_test_metrics = model_aug.evaluate(valid_gen, return_dict=True)
print("Test metrics with data augmentation")
print(model_aug_test_metrics)

# Load Model & More Training

In [None]:
%cd /kaggle
!ls
%ls input
%cp -r /kaggle/input/submission /kaggle/working/model/

In [None]:
%cd /kaggle/working/model
!ls
%cd CNN_Aug_Best
!ls
%cd ..
%mv CNN_Aug_Best /kaggle/working/model/Model

In [None]:
%cd /kaggle/working/model
!ls


In [None]:
model_aug = tfk.models.load_model("/kaggle/working/model/Model")


In [None]:
# tf.get_logger().setLevel('WARNING') #  if you want to suppress only INFOs
tf.get_logger().setLevel('ERROR') #  if you want to suppress both WARNINGs and INFOs

# Create folders and callbacks and fit
aug_callbacks = create_folders_and_callbacks(model_name='CNN_more')

# Train the model
history = model_aug.fit(
    x = train_gen2,   #augmented!
    epochs = epochs,
    validation_data = valid_gen,   #not augmented
    callbacks = aug_callbacks,
).history

In [None]:
!ls

In [None]:
!ls

In [None]:
model_aug.save("data_augmentation_experiments/CNN_Aug_Best")

# Zip model to download

In [None]:
!ls data_augmentation_experiments/CNN_Aug_Best

In [None]:
!ls data_augmentation_experiments
%cd ./data_augmentation_experiments

!ls
!zip -r Submission.zip CNN_Aug_Best

%mv Submission.zip /kaggle/working

In [None]:
%cd ..