In [None]:
!pip install -q kaggle

In [None]:
from google.colab import files

files.upload()

In [None]:
! mkdir ~/.kaggle

! cp kaggle.json ~/.kaggle/

In [None]:
! chmod 600 ~/.kaggle/kaggle.json

In [None]:
# PlantVillage Dataset
!kaggle datasets download -d emmarex/plantdisease

In [None]:
# Flavia Dataset
!kaggle datasets download -d abdulhasibuddin/malayakew-plant-leaf-dataset

In [None]:
!unzip '/content/plantdisease.zip'

In [None]:
!rm -rf plantvillage

In [None]:
!unzip '/content/malayakew-plant-leaf-dataset.zip'

In [None]:
mobileNet_V3_feat = 'https://tfhub.dev/google/imagenet/mobilenet_v3_large_075_224/feature_vector/5' #224
efficientNet_V2_feat = 'https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_ft1k_b3/feature_vector/2' #300
cnn_model_one = mobileNet_V3_feat
cnn_model_two = efficientNet_V2_feat
model_name_one = "MobileNet-V3-Large"
model_name_two = "EfficientNet"

In [None]:
import tensorflow as tf
import tensorflow_hub as hub
import os
import numpy as np
import matplotlib.pylab as plt
import datetime
from tensorflow import keras

%load_ext tensorboard

In [None]:
#used to regulate parameters for image resizing and tensorboard names and experiments
batch_size = 64
img_height = 224
img_width = 224
transfer = "Y"
dataset = "PlantVillage"

In [None]:
cnn_model_layer_one = hub.KerasLayer(
    handle=cnn_model_one,
    input_shape=(img_height, img_width, 3),
    trainable=False
)
cnn_model_layer_two = hub.KerasLayer(
    handle=cnn_model_two,
    input_shape=(img_height, img_width, 3),
    trainable=False
)

In [None]:
train_ds = tf.keras.utils.image_dataset_from_directory(
  str('/content/PlantVillage'),
  validation_split=0.25,
  subset="training",
  seed=123,
  image_size=(img_height, img_width),
  batch_size=batch_size,
)

val_ds = tf.keras.utils.image_dataset_from_directory(
  str('/content/PlantVillage'),
  validation_split=0.25,
  subset="validation",
  seed=123,
  image_size=(img_height, img_width),
  batch_size=batch_size
)

In [None]:
for image_batch, labels_batch in train_ds:
  print(image_batch.shape)
  print(labels_batch.shape)
  break

train_ds_labels = np.array([])
for x,y in train_ds:
  train_ds_labels = np.concatenate([train_ds_labels, y.numpy()])

In [None]:
feature_one_batch = cnn_model_layer_one(image_batch)
print(feature_one_batch.shape)

feature_two_batch = cnn_model_layer_two(image_batch)
print(feature_two_batch.shape)

In [None]:
class_names = np.array(train_ds.class_names)
print(class_names)
num_classes = len(class_names)
print(num_classes)

# Add normalization layer to reduce loss and improve accuracy
normalization_layer = tf.keras.layers.Rescaling(1./255)
train_ds = train_ds.map(lambda x, y: (normalization_layer(x), y))
val_ds = val_ds.map(lambda x, y: (normalization_layer(x), y))

# This causes instance to crash in COLAB due to overuse of RAM
# AUTOTUNE = tf.data.AUTOTUNE
# train_ds = train_ds.cache().prefetch(buffer_size=AUTOTUNE)
# val_ds = val_ds.cache().prefetch(buffer_size=AUTOTUNE)

['Pepper__bell___Bacterial_spot' 'Pepper__bell___healthy'
 'Potato___Early_blight' 'Potato___Late_blight' 'Potato___healthy'
 'Tomato_Bacterial_spot' 'Tomato_Early_blight' 'Tomato_Late_blight'
 'Tomato_Leaf_Mold' 'Tomato_Septoria_leaf_spot'
 'Tomato_Spider_mites_Two_spotted_spider_mite' 'Tomato__Target_Spot'
 'Tomato__Tomato_YellowLeaf__Curl_Virus' 'Tomato__Tomato_mosaic_virus'
 'Tomato_healthy']
15


In [None]:
model_one = tf.keras.Sequential([
  cnn_model_layer_one,
  tf.keras.layers.Dense(num_classes, activation=tf.keras.activations.softmax)
])

model_two = tf.keras.Sequential([
  cnn_model_layer_two,
  tf.keras.layers.Dense(num_classes, activation=tf.keras.activations.softmax)
])

In [None]:
prediction_one = model_one(image_batch)
prediction_one.shape

prediction_two = model_two(image_batch)
prediction_two.shape

In [None]:
model_one.compile(
  optimizer=tf.keras.optimizers.Adam(),
  loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False),
  metrics=['acc'])

model_two.compile(
  optimizer=tf.keras.optimizers.Adam(),
  loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False),
  metrics=['acc'])

curr_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")

exp_name_one = model_name_one + "_"  + dataset + "_" + str(batch_size) + "_" + transfer
log_dir_one = "logs/fit/" + model_name_one + "/" + curr_time + "/"

exp_name_two = model_name_two + "_"  + dataset + "_" + str(batch_size) + "_" + transfer
log_dir_two = "logs/fit/" + model_name_two + "/" + curr_time + "/"

tensorboard_callback_one = tf.keras.callbacks.TensorBoard(
    log_dir=log_dir_one,
    histogram_freq=1
) # Enable histogram computation for every epoch.

tensorboard_callback_two = tf.keras.callbacks.TensorBoard(
    log_dir=log_dir_two,
    histogram_freq=1
) # Enable histogram computation for every epoch.

In [None]:
NUM_EPOCHS = 10

callbacks = [
    keras.callbacks.EarlyStopping(monitor='val_acc', patience=3, mode='max'), 
    keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_filepath,
    save_weights_only=True,
    monitor='acc',
    mode='max',
    save_best_only=True
    )
]

history_one = model_one.fit(
  train_ds,
  validation_data=val_ds,
  epochs=NUM_EPOCHS,
  callbacks=[tensorboard_callback_one, callbacks]
)

history_two = model_two.fit(
  train_ds,
  validation_data=val_ds,
  epochs=NUM_EPOCHS,
  callbacks=[tensorboard_callback_two, callbacks]
)

In [None]:
%tensorboard --logdir {log_dir_one} #MobileNetV3-Large model without FS logs

In [None]:
%tensorboard --logdir {log_dir_two} #EfficientNetV2 model without FS logs

In [None]:
# Create transfer learning model
feature_extractor_model_one = tf.keras.Sequential()
feature_extractor_model_one.add(hub.KerasLayer(
    handle=cnn_model_one,
    input_shape=(img_height, img_width, 3),
    trainable=False
))

feature_extractor_model_two = tf.keras.Sequential()
feature_extractor_model_two.add(hub.KerasLayer(
    handle=cnn_model_two,
    input_shape=(img_height, img_width, 3),
    trainable=False
))

feature_extractor_model_one.add(tf.keras.layers.Flatten())
feature_extractor_model_two.add(tf.keras.layers.Flatten())

feature_extractor_model_one.compile(
  optimizer=tf.keras.optimizers.Adam(),
  loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False),
  metrics=['acc'])

feature_extractor_model_two.compile(
  optimizer=tf.keras.optimizers.Adam(),
  loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False),
  metrics=['acc'])

In [None]:
# Get the feature vectors
train_results_feature_vector_one = feature_extractor_model_one.predict(train_ds)
train_results_feature_vector_two = feature_extractor_model_two.predict(train_ds)

val_results_feature_vector_one = feature_extractor_model_one.predict(val_ds)
val_results_feature_vector_two = feature_extractor_model_two.predict(val_ds)

# Merging the feature vectors
merged_train_feature_vector = keras.layers.Concatenate()([train_results_feature_vector_one, train_results_feature_vector_two])
merged_val_feature_vector = keras.layers.Concatenate()([val_results_feature_vector_one, val_results_feature_vector_two])

In [None]:
# Feature Selection

In [None]:
!pip install Py-FS

In [None]:
# Hybrid Feature Selection Algorithm
from Py_FS.filter.Relief import Relief
from Py_FS.wrapper.nature_inspired import GA


relief_train_result = Relief(train_results_feature_vector, train_ds_labels).run()

relief_train_result_ranks = relief_train_result.ranks()
sorted_train_result_rank_subset = relief_train_result_ranks[:700]
sorted_train_result_rank_subset_tensor = tf.convert_to_tensor(sorted_train_result_rank_subset, dtype=tf.int32)

# This is the subset of the feature vector after ReliefF is used
subset_train_feature_vector = tf.gather(merged_train_feature_vector, sorted_train_result_rank_subset_tensor, axis=1)

ga_train_result = GA(10, 10, subset_train_feature_vector, train_ds_labels).run()
final_train_feature_vector_indices = ga_train_result.Leader_agent[:300]
final_train_feature_vector_indices_tensor = tf.convert_to_tensor(final_train_feature_vector_indices, dtype=tf.int32)

# This is the subset of the feature vector after Genetic Algorithm is used
final_train_subset_feature_vector = tf.gather(subset_train_feature_vector, final_train_feature_vector_indices_tensor, axis=1)


relief_val_result = Relief(val_results_feature_vector, val_ds_labels).run()

relief_val_result_ranks = relief_val_result.ranks()
sorted_val_result_rank_subset = relief_val_result_ranks[:700]
sorted_val_result_rank_subset_tensor = tf.convert_to_tensor(sorted_val_result_rank_subset, dtype=tf.int32)

# This is the subset of the feature vector after ReliefF is used
subset_val_feature_vector = tf.gather(merged_val_feature_vector, sorted_val_result_rank_subset_tensor, axis=1)

ga_val_result = GA(10, 10, subset_val_feature_vector, val_ds_labels).run()
final_val_feature_vector_indices = ga_val_result.Leader_agent[:300]
final_val_feature_vector_indices_tensor = tf.convert_to_tensor(final_val_feature_vector_indices, dtype=tf.int32)

# This is the subset of the feature vector after Genetic Algorithm is used
final_val_subset_feature_vector = tf.gather(subset_val_feature_vector, final_val_feature_vector_indices_tensor, axis=1)

In [None]:
log_dir_final = "logs/fit/final/" + curr_time + "/"
final_model_after_fs = tf.keras.Sequential()
final_model_after_fs.add(tf.keras.layers.Dense(num_classes, activation=tf.keras.activations.softmax))

# Final model to get the dense layer extraction after the feature selection is done
final_model_after_fs.compile(
  optimizer=tf.keras.optimizers.Adam(),
  loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False),
  metrics=['acc'])

exp_name_final = "final" + "_"  + dataset + "_" + str(batch_size)

tensorboard_callback_final = tf.keras.callbacks.TensorBoard(
    log_dir=log_dir_final,
    histogram_freq=1
) # Enable histogram computation for every epoch.

history_one = final_model_after_fs.fit(
  final_train_subset_feature_vector,
  validation_data=final_val_subset_feature_vector,
  epochs=NUM_EPOCHS,
  callbacks=[tensorboard_callback_final, callbacks]
)

%tensorboard --logdir {log_dir_final} #Final model after feature selection logs

In [None]:
!tensorboard dev upload \
  --logdir {log_dir_one} \
  --name {exp_name_one} \
  --one_shot

!tensorboard dev upload \
  --logdir {log_dir_two} \
  --name {exp_name_two} \
  --one_shot

!tensorboard dev upload \
  --logdir {log_dir_final} \
  --name {exp_name_final} \
  --one_shot


# To do it for **Flavia dataset** just change the dataset that the directory is using to flavia
## Change the img_height and img_width to 300 
## Change the directory that train_ds and val_ds is refering to /content/MK/D2