In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load
# Imports
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import tensorflow as tf
import tensorflow.keras as keras
import tensorflow.keras.layers as layers 
import tensorflow.keras.layers.experimental.preprocessing as preprocessing
import seaborn as sns
import os, cv2, json, warnings
warnings.simplefilter("ignore")
import matplotlib.pyplot as plt
import IPython.display as display
from matplotlib import gridspec
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications.mobilenet_v2 import MobileNetV2
from tensorflow.python.keras import optimizers
from tensorflow.python.keras.models import Sequential
from tensorflow.python.keras.layers import Dropout, Flatten, Dense, Activation
from tensorflow.python.keras.layers import  Convolution2D, MaxPooling2D, AveragePooling2D
from tensorflow.python.keras.layers import Input
from tensorflow.keras.models import Model
from tensorflow.python.keras import backend as K

def display_training_curves(training, validation, title, subplot):
    if subplot%10==1: # set up the subplots on the first call
        plt.subplots(figsize=(10,10), facecolor='#F0F0F0')
        plt.tight_layout()
    ax = plt.subplot(subplot)
    ax.set_facecolor('#F8F8F8')
    ax.plot(training)
    ax.plot(validation)
    ax.set_title('model '+ title)
    ax.set_ylabel(title)
    #ax.set_ylim(0.28,1.05)
    ax.set_xlabel('epoch')
    ax.legend(['train', 'valid.'])
K.clear_session()
WORK_DIR = '../input/cassava-leaf-disease-classification'
os.listdir(WORK_DIR)
# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
os.listdir(os.path.join(WORK_DIR, "train_images"))

In [None]:
# Load data
train_labels = pd.read_csv(os.path.join(WORK_DIR, "train.csv"))
train_labels

In [None]:
with open(os.path.join(WORK_DIR, "label_num_to_disease_map.json")) as file:
    print(json.dumps(json.loads(file.read()), indent=4))

In [None]:
"""
Parameters
"""
BATCH_SIZE = 32
EPOCHS = 20
TARGET_SIZE = 226
STEPS = 10
VALID_STEPS =5
LR = 0.005

# Reproducability
def set_seed(seed=32):
    np.random.seed(seed)
    tf.random.set_seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    os.environ['TF_DETERMINISTIC_OPS'] = '1'
set_seed()

# Set Matplotlib defaults
plt.rc('figure', autolayout=True)
plt.rc('axes', labelweight='bold', labelsize='large',
       titleweight='bold', titlesize=18, titlepad=10)
plt.rc('image', cmap='magma')
warnings.filterwarnings("ignore") # to clean up output cells


train_labels.label = train_labels.label.astype('str')

train_datagen = ImageDataGenerator(validation_split = 0.2,
                                   rescale=1. / 255,
                                   rotation_range = 20,
                                   zoom_range = 0.2,
                                   horizontal_flip = True,
                                   vertical_flip = True,
                                   fill_mode = 'nearest',
                                   shear_range = 0.2,
                                   height_shift_range = 0.1,
                                   width_shift_range = 0.1)

test_datagen = ImageDataGenerator(validation_split = 0.2,
                                  rescale=1. / 255)

train_generator = train_datagen.flow_from_dataframe(train_labels,
                         directory = os.path.join(WORK_DIR, "train_images"),
                         subset = "training",
                         x_col = "image_id",
                         y_col = "label",
                         target_size = (TARGET_SIZE, TARGET_SIZE),
                         batch_size = BATCH_SIZE,
                         class_mode = "sparse")

test_generator = test_datagen.flow_from_dataframe(train_labels ,
                         directory = os.path.join(WORK_DIR, "train_images"),
                         subset = "validation",
                         x_col = "image_id",
                         y_col = "label",
                         target_size = (TARGET_SIZE, TARGET_SIZE),
                         batch_size = BATCH_SIZE,
                         class_mode = "sparse")


In [None]:
sns.set_style("whitegrid")
fig, ax = plt.subplots(figsize = (10, 4))

for i in ['top', 'right', 'left']:
    ax.spines[i].set_visible(False)
ax.spines['bottom'].set_color('black')

sns.countplot(train_labels.label, edgecolor = 'black',
              palette = reversed(sns.color_palette("Spectral", 5)))
plt.xlabel('Classes', fontfamily = 'serif', size = 15)
plt.ylabel('Count', fontfamily = 'serif', size = 15)
plt.xticks(fontfamily = 'serif', size = 12)
plt.yticks(fontfamily = 'serif', size = 12)
ax.grid(axis = 'y', linestyle = '--', alpha = 0.9)
plt.show()

In [None]:
# load the MobileNetV2 network, ensuring the head FC layer sets are
# left off
#baseModel = MobileNetV2(weights= None,input_shape=(TARGET_SIZE, TARGET_SIZE, 3), include_top=False,
#	input_tensor=Input(shape=(TARGET_SIZE, TARGET_SIZE, 3)))
#baseModel.load_weights('../input/mobilenetv2weights/mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.0_224_no_top.h5')
# construct the head of the model that will be placed on top of the
# the base model
#headModel = baseModel.output
headModel = AveragePooling2D(pool_size=(3, 3))(headModel)
headModel = Flatten(name="flatten")(headModel)
headModel = Dense(991, activation="relu")(headModel)
headModel = Dropout(0.3)(headModel)
headModel = Dense(991, activation="relu")(headModel)
headModel = Dropout(0.4)(headModel)
headModel = Dense(5, activation="softmax")(headModel)

# place the head FC model on top of the base model (this will become
# the actual model we will train)
model_net_2 = Model(inputs=baseModel.input, outputs=headModel)

# loop over all layers in the base model and freeze them so they will
# *not* be updated during the first training process
for layer in baseModel.layers:
	layer.trainable = False


In [None]:
model_net_2 = Sequential()
#model_net_2.add(Convolution2D(filters=32, kernel_size=5, padding ="same", input_shape=(TARGET_SIZE, TARGET_SIZE, 3), activation='relu'))
#model_net_2.add(MaxPooling2D(pool_size=(3,3)))
#cnn.add(Convolution2D(filters=64, kernel_size=3, padding ="same",activation='relu'))
#cnn.add(AveragePooling2D(pool_size=(3,3)))

model_net_2.add(Convolution2D(filters=128, kernel_size=3, padding ="same",activation='relu' , input_shape=(TARGET_SIZE, TARGET_SIZE, 3)))
model_net_2.add(MaxPooling2D(pool_size=(3,3)))

model_net_2.add(Flatten())
model_net_2.add(Dense(991, activation='relu'))
model_net_2.add(Dropout(0.3))
model_net_2.add(Dense(991, activation='relu'))
model_net_2.add(Dropout(0.4))
model_net_2.add(Dense(5, activation='softmax'))

In [None]:
model_net_2.compile(
  optimizer=tf.keras.optimizers.SGD(learning_rate=LR, decay=LR / EPOCHS),
  loss='sparse_categorical_crossentropy',
  metrics=['sparse_categorical_accuracy'])

model_net_2.summary()

In [None]:
history = model_net_2.fit_generator(
    train_generator,
    steps_per_epoch=STEPS,
    epochs=EPOCHS,  
    validation_data=test_generator,  
    validation_steps=VALID_STEPS)
    #callbacks = [early_stop])

In [None]:
model_net_2.save('Model_1Cov_1.h5')    
model_net_2.save_weights('W_Model_1Cov_1.h5') 

In [None]:
display_training_curves( 
    history.history['loss'],
    history.history['val_loss'], 
    'loss',
    211,
)
display_training_curves(
    history.history['sparse_categorical_accuracy'],
    history.history['val_sparse_categorical_accuracy'],
    'accuracy',
    212, 
)

# Custom train CNN

In [None]:
cnn = Sequential()
cnn.add(Convolution2D(filters=32, kernel_size=5, padding ="same", input_shape=(TARGET_SIZE, TARGET_SIZE, 3), activation='relu'))
cnn.add(MaxPooling2D(pool_size=(3,3)))

#cnn.add(Convolution2D(filters=64, kernel_size=3, padding ="same",activation='relu'))
#cnn.add(AveragePooling2D(pool_size=(3,3)))

cnn.add(Convolution2D(filters=128, kernel_size=3, padding ="same",activation='relu'))
cnn.add(MaxPooling2D(pool_size=(3,3)))

cnn.add(Flatten())
cnn.add(Dense(991, activation='relu'))
cnn.add(Dropout(0.3))
cnn.add(Dense(991, activation='relu'))
cnn.add(Dropout(0.4))
cnn.add(Dense(5, activation='softmax'))

In [None]:
cnn.compile(
  optimizer=tf.keras.optimizers.Adam(learning_rate=LR, decay=LR / EPOCHS),
  loss='sparse_categorical_crossentropy',
  metrics=['sparse_categorical_accuracy'])

cnn.summary()

In [None]:
hist = cnn.fit_generator(
    train_generator,
    steps_per_epoch=STEPS,
    epochs=EPOCHS,  
    validation_data=test_generator,  
    validation_steps=VALID_STEPS)
    #callbacks = [early_stop])
    

In [None]:
cnn.save('CNN_Model_3.h5')    
cnn.save_weights('CNN_W_Model_3.h5') 

In [None]:
display_training_curves( 
    hist.history['loss'],
    hist.history['val_loss'], 
    'loss',
    211,
)
display_training_curves(
    hist.history['sparse_categorical_accuracy'],
    hist.history['val_sparse_categorical_accuracy'],
    'accuracy',
    212, 
)