## Import Libraries

Since we will download a dataset from kaggle, we have to hand over our kaggle handle. You can find the handle in your kaggle account. 

In [None]:
#download kaggle api (kaggle.json) and import it here
from google.colab import files
files.upload()

In [None]:
pip install -q tensorflow tensorflow-datasets

In [None]:
pip install mlflow

In [1]:
import os, sys 
sys.path.append(os.path.dirname(os.path.realpath('/Users/paulosgidyelew/Desktop/cassava-classification-capstone/src')))
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

import tensorflow_datasets as tfds
import tensorflow as tf

from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import GlobalAveragePooling2D, Flatten, InputLayer, Dense, Dropout, BatchNormalization, Conv2D, Activation, MaxPooling2D
from tensorflow.keras.optimizers import RMSprop, Adam, SGD, Adagrad
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from tensorflow.keras.losses import SparseCategoricalCrossentropy

from keras.preprocessing.image import ImageDataGenerator
import tensorflow_hub as hub

import warnings
import mlflow

from sklearn.metrics import confusion_matrix, plot_confusion_matrix, classification_report, fbeta_score
from sklearn.utils import class_weight

import itertools, cv2

from src import confusion_matrix

# mlflow parameters:
EXPERIMENT_NAME = "Classava_capstone"
TRACKING_URI = "https://hudsju377cddpoevnjdkfnvpwovniewnipcdsnkvn.mlflow.neuefische.de"

warnings.filterwarnings('ignore')

RSEED = 42
tf.random.set_seed(RSEED)



## Pre-trained Model

In order to save time on training we can use a pre-trained model. This model was already trained with images (imagenet-ilsvrc-2012-cls). It can be found here: <a href="https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet1k_b0/feature_vector/2">https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet1k_b0/feature_vector/2</a>

Let us set up MlFlow in order to track our parameters and results. 

In [None]:
# setting the MLFlow connection and experiment
mlflow.set_tracking_uri(TRACKING_URI)
mlflow.set_experiment(EXPERIMENT_NAME)
mlflow.start_run(run_name='pre-trained model')
run = mlflow.active_run()

## Feature extraction

### Freeze the convolutional base

It is important to freeze the convolutional base before you compile and train the model and use it as a feature extractor. Freezing (by setting `layer.trainable = False`) prevents the weights in a given layer from being updated during training. EfficientNet V2 has many layers, so setting the entire model's trainable flag to False will freeze all of them.

`base_model.trainable = False`

Now we will initialize the model and create its architecture. Afterwards the model gets compiled and is run. The process is stored in the history.

In [None]:
TFL_HUB_HANDLE = 'https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet1k_b0/feature_vector/2'

model = Sequential()
model.add(InputLayer(input_shape=(224,224,3)))
model.add(hub.KerasLayer(TFL_HUB_HANDLE,trainable=False))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(32, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(5,'softmax'))


In [None]:
model.compile(optimizer=Adam(), loss='categorical_crossentropy',metrics=['accuracy'])
model.summary()

The input images should be of the size 224x224. Therefore, we have to specify these dimensions in the imagedatagenerator (target_size)

In [None]:
image_data_generator = ImageDataGenerator(rescale=1./255,
                                          rotation_range=90, 
                                          shear_range=0.2, 
                                          zoom_range=0.2, 
                                          horizontal_flip=True, 
                                          vertical_flip=True,
                                          validation_split=0.2
                                          )

train_set = image_data_generator.flow_from_directory('/content/train', 
                                                     subset='training', 
                                                     target_size=(224,224), 
                                                     class_mode='categorical', 
                                                     batch_size=32, 
                                                     shuffle=True,
                                                     interpolation='nearest',
                                                     color_mode="rgb",
                                                     )
val_set = image_data_generator.flow_from_directory('/content/train', 
                                                   subset='validation', 
                                                   target_size=(224,224), 
                                                   class_mode='categorical', 
                                                   batch_size=32, 
                                                   shuffle=False,
                                                   interpolation='nearest',  
                                                   color_mode="rgb"
                                                   )

In [None]:
model_checkpoint_filepath = '../callbacks/pre-trained.ckpt'
model_check_point = ModelCheckpoint(model_checkpoint_filepath,
                                    verbose=1, 
                                    save_weights_only=True, 
                                    monitor='val_loss', 
                                    save_best_only=True, 
                                    mode='auto'
                                    )
history = model.fit(train_set,
                    epochs=3,
                    verbose=1,
                    callbacks=[model_check_point],
                    validation_data=val_set, 
                    steps_per_epoch=len(train_set),
                    validation_steps=len(val_set),
                    batch_size=32
                    )

Let us plot the training progress:

In [None]:
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model Accuracy')
plt.xlabel('epoch')
plt.ylabel('Accuracy')
plt.legend(['training','validation'], loc='lower right')
plt.show()

In [None]:
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Loss Function')
plt.xlabel('epoch')
plt.ylabel('Loss')
plt.legend(['loss','val_loss'], loc='upper right')
plt.show()

### Construction of the confusion matrix

In [None]:
# we can use model.predict to predict the validation set and argmax gives us the the highest number for each element
results = model.predict(val_set)
results = np.argmax(results, axis=1)

In [None]:
# report = classification_report(list_of_true_labels,results)
report = classification_report(val_set.classes,results)
print (report)

In [None]:
cm = confusion_matrix(val_set.classes,results)
confusion_matrix.plot_confusion_matrix(
    cm, classes=['CBB', 'CBSD','CGM','CMD','Healthy'], 
    title='Pre-trained'
)

Calculation of the F2 score (description can be found in the simple model chapter)

In [None]:
# Due to imbalance in our dataset we have to use 'macro' for averaging
F2_score = fbeta_score(val_set.classes,results, average='macro', beta=2)
print(F2_score)

In [None]:
# These are the parameters that will be transferred to MlFlow for logging our experiments

# Find meaningful parameters!
params = {
            "number of epochs": 10,
            "input_shape": val_set[0][0][0].shape,
            "confusion matrix":cm
        }

In [None]:
# logging params to mlflow
mlflow.log_params(params)
# setting tags
mlflow.set_tag("colab", "True")
# logging metrics
mlflow.log_metric("train-" + "accuracy", history.history['accuracy'][-1])
mlflow.log_metric("val-" + "accuracy", history.history['val_accuracy'][-1])
mlflow.log_metric("train-" + "loss", history.history['loss'][-1])
mlflow.log_metric("val-" + "loss", history.history['val_loss'][-1])
mlflow.log_metric("F2-score", F2_score)

# logging the model to mlflow will not work without a AWS Connection setup.. too complex for now
# but possible if running mlflow locally
# mlflow.log_artifact("../models")
# mlflow.sklearn.log_model(reg, "model")
mlflow.end_run()

## Cassava-specific pre-trained model

The pre-trained model gave us an accuracy of 0.63. The pre-trained model was trained on various images. Now we want to use a pre-trained model that was trained on cassava leaves. This model can be found here: It can be found here: <a href="https://tfhub.dev/google/cropnet/feature_vector/cassava_disease_V1/1">https://tfhub.dev/google/cropnet/feature_vector/cassava_disease_V1/1</a>

Let us set up MlFlow in order to track our parameters and results. 

In [None]:
# setting the MLFlow connection and experiment
mlflow.set_tracking_uri(TRACKING_URI)
mlflow.set_experiment(EXPERIMENT_NAME)
mlflow.start_run(run_name='pre-trained model cassava-specific')
run = mlflow.active_run()

Now we will initialize the model and create its architecture. Afterwards the model gets compiled and is run. The process is stored in the history.

In [None]:
TFL_HUB_HANDLE = 'https://tfhub.dev/google/cropnet/feature_vector/cassava_disease_V1/1'

model = Sequential()
model.add(InputLayer(input_shape=(224,224,3)))
model.add(hub.KerasLayer(TFL_HUB_HANDLE,trainable=False))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(32, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(5,'softmax')) # I am using 6 categories...maybe 5 is better. 


In [None]:
model.compile(optimizer=Adam(), loss='categorical_crossentropy',metrics=['accuracy'])
model.summary()

The input images should be of the size 224x224. Therefore, we have to create the input data again, using the ImageDataGenerator

In [None]:
image_data_generator = ImageDataGenerator(rescale=1./255,
                                          rotation_range=90, 
                                          shear_range=0.2, 
                                          zoom_range=0.2, 
                                          horizontal_flip=True, 
                                          vertical_flip=True,
                                          validation_split=0.2)

train_set = image_data_generator.flow_from_directory('/content/train', 
                                                     subset='training', 
                                                     target_size=(224,224), 
                                                     class_mode='categorical', 
                                                     batch_size=32, 
                                                     shuffle=True,
                                                     interpolation='nearest',
                                                     color_mode="rgb",
                                                     )
val_set = image_data_generator.flow_from_directory('/content/train', 
                                                   subset='validation', 
                                                   target_size=(224,224), 
                                                   class_mode='categorical', 
                                                   batch_size=32, 
                                                   shuffle=False,
                                                   interpolation='nearest',  
                                                   color_mode="rgb"
                                                   )

In [None]:
model_checkpoint_filepath = '../callbacks/pre-trained_cassava.ckpt'
model_check_point = ModelCheckpoint(model_checkpoint_filepath,
                                    verbose=1, 
                                    save_weights_only=True, 
                                    monitor='val_loss', 
                                    save_best_only=True, 
                                    mode='auto'
                                    )
history = model.fit(train_set,
                    epochs=10,
                    verbose=1,
                    callbacks=[model_check_point],
                    validation_data=val_set, 
                    steps_per_epoch=len(train_set),
                    validation_steps=len(val_set)
                    )

We reached an accuracy of 0.84 and a loss of 0.55

Let us plot the training progress:

In [None]:
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model Accuracy')
plt.xlabel('epoch')
plt.ylabel('Accuracy')
plt.legend(['training','validation'], loc='lower right')
plt.show()

In [None]:
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Loss Function')
plt.xlabel('epoch')
plt.ylabel('Loss')
plt.legend(['loss','val_loss'], loc='upper right')
plt.show()

We see that the model is underfitting, mearning that we could have gotten a higher accuracy for the trainset.

### Construction of the confusion matrix

In [None]:
# we can use model.predict to predict the validation set and argmax gives us the the highest number for each element
results = model.predict(val_set)
results = np.argmax(results, axis=1)

In [None]:
# report = classification_report(list_of_true_labels,results)
report = classification_report(val_set.classes,results)
print (report)

In [None]:
cm = confusion_matrix(val_set.classes,results)
confusion_matrix.plot_confusion_matrix(
    cm, classes=['CBB', 'CBSD','CGM','CMD','Healthy'], 
    title='Pre-trained Cassava'
)

Calculation of the F2 score (description can be found in the simple model chapter)

In [None]:
# Due to imbalance in our dataset we have to use 'macro' for averaging
F2_score = fbeta_score(val_set.classes,results, average='macro', beta=2)
print(F2_score)

In [None]:
# These are the parameters that will be transferred to MlFlow for logging our experiments

# Find meaningful parameters!
params = {
            "number of epochs": 10,
            "input_shape": val_set[0][0][0].shape,
            "confusion matrix":cm
        }

In [None]:
# logging params to mlflow
mlflow.log_params(params)
# setting tags
mlflow.set_tag("colab", "True")
# logging metrics
mlflow.log_metric("train-" + "accuracy", history.history['accuracy'][-1])
mlflow.log_metric("val-" + "accuracy", history.history['val_accuracy'][-1])
mlflow.log_metric("train-" + "loss", history.history['loss'][-1])
mlflow.log_metric("val-" + "loss", history.history['val_loss'][-1])
mlflow.log_metric("F2-score", F2_score)

# logging the model to mlflow will not work without a AWS Connection setup.. too complex for now
# but possible if running mlflow locally
# mlflow.log_artifact("../models")
# mlflow.sklearn.log_model(reg, "model")
mlflow.end_run()

## Cassava-specific pre-trained model + pre-processing

The cassava-specific pre-trained model gave us an accuracy of .... Now we want to test out, if a preprocessing step can improve the model. We are using a preprocessing function that is built into Keras. The preprocessing can be found in the step where the image data is produced using the imagedatagenerator. 

Let us set up MlFlow in order to track our parameters and results. 

In [None]:
# setting the MLFlow connection and experiment
mlflow.set_tracking_uri(TRACKING_URI)
mlflow.set_experiment(EXPERIMENT_NAME)
mlflow.start_run(run_name='pre-trained model cassava-specific+pre-processing')
run = mlflow.active_run()

Now we will initialize the model and create its architecture. Afterwards the model gets compiled and is run. The process is stored in the history.

In [None]:
TFL_HUB_HANDLE = 'https://tfhub.dev/google/cropnet/feature_vector/cassava_disease_V1/1'

model = Sequential()
model.add(InputLayer(input_shape=(224,224,3)))
model.add(hub.KerasLayer(TFL_HUB_HANDLE,trainable=False))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(32, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(5,'softmax')) 


In [None]:
model.compile(optimizer=Adam(), loss='categorical_crossentropy',metrics=[f2_micro,'accuracy'])
model.summary()

The input images should be of the size 224x224. Therefore, we have to create the input data again, using the ImageDataGenerator

In [None]:
image_data_generator = ImageDataGenerator(rescale=1./255,
                                          rotation_range=90, 
                                          shear_range=0.2, 
                                          zoom_range=0.2, 
                                          horizontal_flip=True, 
                                          vertical_flip=True,
                                          validation_split=0.2,
                                          preprocessing_function=tf.keras.applications.vgg16.preprocess_input)

train_set = image_data_generator.flow_from_directory('/content/train', 
                                                     subset='training', 
                                                     target_size=(224,224), 
                                                     class_mode='categorical', 
                                                     batch_size=32, 
                                                     shuffle=True,
                                                     interpolation='nearest',
                                                     color_mode="rgb",
                                                     )
val_set = image_data_generator.flow_from_directory('/content/train', 
                                                   subset='validation', 
                                                   target_size=(224,224), 
                                                   class_mode='categorical', 
                                                   batch_size=32, 
                                                   shuffle=False,
                                                   interpolation='nearest',  
                                                   color_mode="rgb"
                                                   )

In [None]:
model_checkpoint_filepath = '../callbacks/pre-trained_cassava_preprocessing.ckpt'
model_check_point = ModelCheckpoint(model_checkpoint_filepath,
                                    verbose=1, 
                                    save_weights_only=True, 
                                    monitor='val_loss', 
                                    save_best_only=True, 
                                    mode='auto'
                                    )
history = model.fit(train_set,
                    epochs=10,
                    verbose=1,
                    callbacks=[model_check_point],
                    validation_data=val_set, 
                    steps_per_epoch=len(train_set),
                    validation_steps=len(val_set)
                    )

Let us plot the training progress:

In [None]:
print (history.history['accuracy'][-1],history.history['val_accuracy'][-1])

In [None]:
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model Accuracy')
plt.xlabel('epoch')
plt.ylabel('Accuracy')
plt.legend(['training','validation'], loc='lower right')
plt.show()

In [None]:
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Loss Function')
plt.xlabel('epoch')
plt.ylabel('Loss')
plt.legend(['loss','val_loss'], loc='upper right')
plt.show()

### Construction of the confusion matrix

In [None]:
# we can use model.predict to predict the validation set and argmax gives us the the highest number for each element
results = model.predict(val_set)
results = np.argmax(results, axis=1)

In [None]:
# report = classification_report(list_of_true_labels,results)
report = classification_report(val_set.classes,results)
print (report)

In [None]:
cm = confusion_matrix(val_set.classes,results)
confusion_matrix.plot_confusion_matrix(
    cm, classes=['CBB', 'CBSD','CGM','CMD','Healthy'], 
    title='Pre-trained Cassava'
)

Calculation of the F2 score (description can be found in the simple model chapter)

In [None]:
# Due to imbalance in our dataset we have to use 'macro' for averaging
F2_score = fbeta_score(val_set.classes,results, average='macro', beta=2)
print(F2_score)

In [None]:
# These are the parameters that will be transferred to MlFlow for logging our experiments

# Find meaningful parameters!
params = {
            "number of epochs": 10,
            "input_shape": val_set[0][0][0].shape,
            "confusion matrix":cm
        }

In [None]:
# logging params to mlflow
mlflow.log_params(params)
# setting tags
mlflow.set_tag("colab", "True")
# logging metrics
mlflow.log_metric("train-" + "accuracy", history.history['accuracy'][-1])
mlflow.log_metric("val-" + "accuracy", history.history['val_accuracy'][-1])
mlflow.log_metric("train-" + "loss", history.history['loss'][-1])
mlflow.log_metric("val-" + "loss", history.history['val_loss'][-1])
mlflow.log_metric("F2-score", F2_score)

# logging the model to mlflow will not work without a AWS Connection setup.. too complex for now
# but possible if running mlflow locally
# mlflow.log_artifact("../models")
# mlflow.sklearn.log_model(reg, "model")
mlflow.end_run()

The preprocessing step made the prediction worse.

## Cassava-specific pre-trained model + less dropout

Since we saw in previous models a tendency for underfitting, we will leave out some ot the dropout layers. It is likely that the model was too regularized, which made the training performance worse than the validation performance.
Introducing more dropouts to the model will decrease the complexity of the neural network. this reduction in dimensionality of the network will induce less trainable parameters to be learned.

Let us set up MlFlow in order to track our parameters and results. 

In [None]:
# setting the MLFlow connection and experiment
mlflow.set_tracking_uri(TRACKING_URI)
mlflow.set_experiment(EXPERIMENT_NAME)
mlflow.start_run(run_name='pre-trained model cassava-specific')
run = mlflow.active_run()

Now we will initialize the model and create its architecture. Afterwards the model gets compiled and is run. The process is stored in the history.

In [None]:
TFL_HUB_HANDLE = 'https://tfhub.dev/google/cropnet/feature_vector/cassava_disease_V1/1'

model = Sequential()
model.add(InputLayer(input_shape=(224,224,3)))
model.add(hub.KerasLayer(TFL_HUB_HANDLE,trainable=False))
model.add(Dense(64, activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(5,'softmax')) 

In [None]:
model.compile(optimizer=Adam(), loss='categorical_crossentropy',metrics=['accuracy'])
model.summary()

The input images should be of the size 224x224. Therefore, we have to create the input data again, using the ImageDataGenerator

In [None]:
image_data_generator = ImageDataGenerator(rescale=1./255,
                                          rotation_range=90, 
                                          shear_range=0.2, 
                                          zoom_range=0.2, 
                                          horizontal_flip=True, 
                                          vertical_flip=True,
                                          validation_split=0.2
                                          )

train_set = image_data_generator.flow_from_directory('/content/train', 
                                                     subset='training', 
                                                     target_size=(224,224), 
                                                     class_mode='categorical', 
                                                     batch_size=32, 
                                                     shuffle=True,
                                                     interpolation='nearest',
                                                     color_mode="rgb",
                                                     )
val_set = image_data_generator.flow_from_directory('/content/train', 
                                                   subset='validation', 
                                                   target_size=(224,224), 
                                                   class_mode='categorical', 
                                                   batch_size=32, 
                                                   shuffle=False,
                                                   interpolation='nearest',  
                                                   color_mode="rgb"
                                                   )

In [None]:
model_checkpoint_filepath = '../callbacks/pre-trained_cassava-less-dropout.ckpt'
model_check_point = ModelCheckpoint(model_checkpoint_filepath,
                                    verbose=1, 
                                    save_weights_only=True, 
                                    monitor='val_loss', 
                                    save_best_only=True, 
                                    mode='auto'
                                    )
history = model.fit(train_set,
                    epochs=1,
                    verbose=1,
                    callbacks=[model_check_point],
                    validation_data=val_set, 
                    steps_per_epoch=len(train_set),
                    validation_steps=len(val_set)
                    )

We reached an accuracy of 0.88 and a loss of 0.39

Let us plot the training progress:

In [None]:
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model Accuracy')
plt.xlabel('epoch')
plt.ylabel('Accuracy')
plt.legend(['training','validation'], loc='lower right')
plt.show()

In [None]:
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Loss Function')
plt.xlabel('epoch')
plt.ylabel('Loss')
plt.legend(['loss','val_loss'], loc='upper right')
plt.show()

### Construction of the confusion matrix

In [None]:
# we can use model.predict to predict the validation set and argmax gives us the the highest number for each element
results = model.predict(val_set)
results = np.argmax(results, axis=1)

In [None]:
# report = classification_report(list_of_true_labels,results)
report = classification_report(val_set.classes,results)
print (report)

In [None]:
cm = confusion_matrix(val_set.classes,results)
confusion_matrix.plot_confusion_matrix(
    cm, classes=['CBB', 'CBSD','CGM','CMD','Healthy'], 
    title='Pre-trained Cassava'
)

Calculation of the F2 score (description can be found in the simple model chapter)

In [None]:
# Due to imbalance in our dataset we have to use 'macro' for averaging
F2_score = fbeta_score(val_set.classes,results, average='macro', beta=2)
print(F2_score)

In [None]:
# These are the parameters that will be transferred to MlFlow for logging our experiments

# Find meaningful parameters!
params = {
            "number of epochs": 10,
            "input_shape": val_set[0][0][0].shape,
            "confusion matrix":cm
        }

In [None]:
# logging params to mlflow
mlflow.log_params(params)
# setting tags
mlflow.set_tag("colab", "True")
# logging metrics
mlflow.log_metric("train-" + "accuracy", history.history['accuracy'][-1])
mlflow.log_metric("val-" + "accuracy", history.history['val_accuracy'][-1])
mlflow.log_metric("train-" + "loss", history.history['loss'][-1])
mlflow.log_metric("val-" + "loss", history.history['val_loss'][-1])
mlflow.log_metric("F2-score", F2_score)

# logging the model to mlflow will not work without a AWS Connection setup.. too complex for now
# but possible if running mlflow locally
# mlflow.log_artifact("../models")
# mlflow.sklearn.log_model(reg, "model")
mlflow.end_run()

We want to save the model to use it later again.

In [None]:
model.save('../saved_model/my_model')