In [None]:
import os
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt

import tensorflow as tf
import random
import pandas as pd
import seaborn as sns
import matplotlib as mpl
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

tfk = tf.keras
tfkl = tf.keras.layers

In [None]:
labels = ['Apple','Blueberry','Cherry','Corn','Grape','Orange','Peach','Pepper','Potato','Raspberry','Soybean','Squash','Strawberry','Tomato']

In [None]:
# Random seed for reproducibility
seed = 42

random.seed(seed)
os.environ['PYTHONHASHSEED'] = str(seed)
np.random.seed(seed)
tf.random.set_seed(seed)
tf.compat.v1.set_random_seed(seed)

In [None]:
# Dataset folders 
dataset_dir = '../input/balanced/BalancedData-2' 
print(os.getcwd())

In [None]:
num_row = 5
num_col = 3
fig, axes = plt.subplots(num_row, num_col, figsize=(2*num_row,6*num_col))
for i in range(num_row*num_col):
  if i < 14:
    class_imgs = next(os.walk('{}../input/balanced/BalancedData-2/train/{}/'.format("", labels[i])))[2]
    class_img = class_imgs[0]
    img = Image.open('{}../input/balanced/BalancedData-2/train/{}/{}'.format("", labels[i], class_img))
    ax = axes[i//num_col, i%num_col]
    ax.imshow(np.array(img))
    ax.set_title('{}'.format(labels[i]))
plt.tight_layout()
plt.show()

In [None]:
# ImageDataGenerator to read images from disk.
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications.vgg16 import preprocess_input

train_dir = dataset_dir + "/train"
valid_dir = dataset_dir + "/validation"

# Create an instance of ImageDataGenerator with Data Augmentation
aug_train_data_gen = ImageDataGenerator(rotation_range=45,
                                        height_shift_range=30,
                                        width_shift_range=30,
                                        zoom_range=0.3,
                                        horizontal_flip=True,
                                        vertical_flip=True, 
                                        fill_mode='nearest',
                                        brightness_range = [0.7, 1],
                                        shear_range = 20,
                                        preprocessing_function = preprocess_input) 

# Create an instance of ImageDataGenerator for training, validation, and test sets
valid_data_gen = ImageDataGenerator(preprocessing_function = preprocess_input) 

# Obtain a data generator with the 'ImageDataGenerator.flow_from_directory' method
train_gen = aug_train_data_gen.flow_from_directory(directory=train_dir,
                                                    target_size=(256,256),
                                                    color_mode='rgb',
                                                    classes=None, # can be set to labels
                                                    class_mode='categorical',
                                                    batch_size=8,
                                                    shuffle=True,
                                                    seed=seed)
valid_gen = valid_data_gen.flow_from_directory(directory=valid_dir,
                                               target_size=(256,256),
                                               color_mode='rgb',
                                               classes=None, # can be set to labels
                                               class_mode='categorical',
                                               batch_size=8,
                                               shuffle=True,
                                               seed=seed)


In [None]:
input_shape = (256,256,3) 
epochs = 100

In [None]:
# Utility function to create folders and callbacks for training
from datetime import datetime

def create_folders_and_callbacks(model_name):

  exps_dir = os.path.join('model_folder')
  if not os.path.exists(exps_dir):
      os.makedirs(exps_dir)

  now = datetime.now().strftime('%b%d_%H-%M-%S')

  exp_dir = os.path.join(exps_dir, model_name + '_' + str(now))
  if not os.path.exists(exp_dir):
      os.makedirs(exp_dir)
      
  callbacks = []

  # Model checkpoint
  # ----------------
  ckpt_dir = os.path.join(exp_dir, 'ckpts')
  if not os.path.exists(ckpt_dir):
      os.makedirs(ckpt_dir)

  ckpt_callback = tf.keras.callbacks.ModelCheckpoint(filepath=os.path.join(ckpt_dir, 'cp.ckpt'), 
                                                     save_weights_only=False, # True to save only weights
                                                     save_best_only=False) # True to save only the best epoch 
  callbacks.append(ckpt_callback)

  # Visualize Learning on Tensorboard
  # ---------------------------------
  tb_dir = os.path.join(exp_dir, 'tb_logs')
  if not os.path.exists(tb_dir):
      os.makedirs(tb_dir)
      
  # By default shows losses and metrics for both training and validation
  tb_callback = tf.keras.callbacks.TensorBoard(log_dir=tb_dir, 
                                               profile_batch=0,
                                               histogram_freq=1)  # if > 0 (epochs) shows weights histograms
  callbacks.append(tb_callback)

  # Early Stopping
  # --------------
  es_callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
  callbacks.append(es_callback)

  return callbacks

In [None]:
# Download and plot the VGG16 model
supernet = tfk.applications.VGG16(
    include_top=False,
    weights="imagenet",
    input_shape=(224,224,3)
)
supernet = 
supernet.summary()
tfk.utils.plot_model(supernet)

In [None]:
# Use the supernet as feature extractor
supernet.trainable = False

inputs = tfk.Input(shape=(256,256,3))
x = tfkl.GaussianNoise(0.07)(inputs)
x = tfkl.Resizing(224, 224, interpolation="bicubic")(x)

x = supernet(x)
x = tfkl.Flatten(name='Flattening')(x)
x = tfkl.Dropout(0.3, seed=seed)(x)
x = tfkl.Dense(256, activation='relu',
               kernel_initializer = tfk.initializers.GlorotUniform(seed))(x)
x = tfkl.Dropout(0.3, seed=seed)(x)
outputs = tfkl.Dense(14, activation='softmax',
                     kernel_initializer = tfk.initializers.GlorotUniform(seed))(x)


# Connect input and output through the Model class
ft_model = tfk.Model(inputs=inputs, outputs=outputs, name='model')


In [None]:
# Set all VGG layers to True
ft_model.get_layer('vgg16').trainable = True

In [None]:
# Freeze first N layers, e.g., until 14th
for i, layer in enumerate(ft_model.get_layer('vgg16').layers[:14]):
  layer.trainable=False
for i, layer in enumerate(ft_model.get_layer('vgg16').layers):
   print(i, layer.name, layer.trainable)
ft_model.summary()

In [None]:
# Compile the model
ft_model.compile(loss=tfk.losses.CategoricalCrossentropy(), optimizer=tfk.optimizers.Adam(1e-4), metrics='accuracy')

In [None]:
#Compute weights:
n_classes = 14;
n_samples = 17728;
n_sample_j = [988,467,583,1206,1458,1748,977,765,716,264,1616,574,673,5693]
weights = list(range(14))
for i in weights:
 weights[i] = n_samples/ (n_classes * n_sample_j[i])


In [None]:
class_weight = {0: weights[0], 1: weights[1], 2: weights[2], 3: weights[3], 4: weights[4], 5: weights[5], 6: weights[6], 7: weights[7], 8: weights[8], 9: weights[9], 10: weights[10], 11: weights[11], 12: weights[12], 13: weights[13]}

In [None]:
# Create folders and callbacks and fit
callbacks = create_folders_and_callbacks(model_name='CNN_Leaves')

# Train the model
history = ft_model.fit(
    x = train_gen,
    epochs = epochs,
    validation_data = valid_gen,
    callbacks = callbacks).history


In [None]:
# Save best epoch model
ft_model.save("savedModel/TransferVGG16Balanced")

In [None]:
# Plot the training
plt.figure(figsize=(15,5))
plt.plot(history['loss'], label='Training', alpha=.8, color='#ff7f0e')
plt.plot(history['val_loss'], label='Validation', alpha=.8, color='#4D61E2')
plt.legend(loc='upper left')
plt.title('Binary Crossentropy')
plt.grid(alpha=.3)

plt.figure(figsize=(15,5))
plt.plot(history['accuracy'], label='Training', alpha=.8, color='#ff7f0e')
plt.plot(history['val_accuracy'], label='Validation', alpha=.8, color='#4D61E2')
plt.legend(loc='upper left')
plt.title('Accuracy')
plt.grid(alpha=.3)

plt.show()

In [None]:
#checkpoint_filepath = "data_augmentation_model_folder/CNN_Leaves_Nov15_23-45-01/ckpts/cp.ckpt"
#model.load_weights(checkpoint_filepath)

In [None]:
import shutil
model = "./savedModel/TransferVGG16Balanced"
shutil.make_archive("TransferL_Balanced", 'zip', model)