<a href="https://colab.research.google.com/github/Gabriel-git24/Cacao_disease_classification/blob/main/project_ds2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [9]:
!pip install keras-tuner --quiet
import os
import shutil
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import keras_tuner as kt
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint


from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [10]:
# need to sort images into folders based on their labels
# # DO NOT UNCOMMENT THIS,
# THIS IS TO DIVIDE THE IMAGES INTO FOLDERS BASED ON THEIR LABELS
# AND THEN USES SPLITFOLDER TO CREATE TRAIN TEST VALIDATION
'''
sana = "/content/drive/MyDrive/Data_Science_Project_fall2024/Data Science Project/Enfermedades Cacao/Sana"
fito = "/content/drive/MyDrive/Data_Science_Project_fall2024/Data Science Project/Enfermedades Cacao/Fito"
monilia = "/content/drive/MyDrive/Data_Science_Project_fall2024/Data Science Project/Enfermedades Cacao/Monilia"

def filter_images(directory):
  image_extension = [".jpg", ".jpeg", ".png"]
  destination_dir = directory + "_images" # defining image destination path

  os.makedirs(destination_dir, exist_ok=True) # creating the destination

  for filename in os.listdir(directory):
    file_path = os.path.join(directory, filename)
    if os.path.isfile(file_path) and filename.lower().endswith(tuple(image_extension)):
      shutil.copy2(file_path, destination_dir)

filter_images(sana)
filter_images(fito)
filter_images(monilia)'''

'''!pip install split-folders --quiet

import splitfolders

input_folder = "/content/drive/MyDrive/Data_Science_Project_fall2024/Data Science Project/Enfermedades Cacao/binary_classification"
output_folder = "/content/drive/MyDrive/Data_Science_Project_fall2024/Data Science Project/Enfermedades Cacao/binary_classification/Cacao_Splits"

splitfolders.ratio(
    input_folder,
    output=output_folder,
    seed=42,
    ratio = (0.7, 0.15, 0.15),
    group_prefix=None,
    move=False
)'''

'!pip install split-folders --quiet\n\nimport splitfolders\n\ninput_folder = "/content/drive/MyDrive/Data_Science_Project_fall2024/Data Science Project/Enfermedades Cacao/binary_classification"\noutput_folder = "/content/drive/MyDrive/Data_Science_Project_fall2024/Data Science Project/Enfermedades Cacao/binary_classification/Cacao_Splits"\n\nsplitfolders.ratio(\n    input_folder,\n    output=output_folder,\n    seed=42,\n    ratio = (0.7, 0.15, 0.15),\n    group_prefix=None,\n    move=False\n)'

In [11]:
image_size = (224, 224)
batch_size = 32

split_dir = "/content/drive/MyDrive/Data_Science_Project_fall2024/Data Science Project/Enfermedades Cacao/binary_classification/Cacao_Splits"
checkpoint_path = "/content/drive/MyDrive/Data_Science_Project_fall2024/Data Science Project/Enfermedades Cacao/binary_classification/checkpoint/best_model.keras"

datagen = ImageDataGenerator(rescale=1./255)

train_generator = datagen.flow_from_directory(
    directory=f"{split_dir}/train",
    target_size=image_size,
    batch_size=batch_size,
    class_mode="binary"
)

validation_generator = datagen.flow_from_directory(
    directory=f"{split_dir}/val",
    target_size=image_size,
    batch_size=batch_size,
    class_mode="binary"
)

test_generator = datagen.flow_from_directory(
    directory=f"{split_dir}/test",
    target_size=image_size,
    batch_size=batch_size,
    class_mode="binary"
)

Found 218 images belonging to 2 classes.
Found 46 images belonging to 2 classes.
Found 48 images belonging to 2 classes.


In [12]:
#HYPERPARAMETER TUNING USING KERAS TUNER

def build_model(hp):
  base_model = MobileNetV2(
      weights = "imagenet",
      include_top = False,
      input_shape = image_size + (3,) # the 3 is for the color channel, in this case the three primary colors
  )
  base_model.trainable = False

  x = base_model.output
  x = GlobalAveragePooling2D()(x)

  #tuning the number of units in the dense layer
  hp_units = hp.Int("units", min_value=32, max_value=256, step=32)
  x = Dense(units=hp_units, activation="relu")(x)

  #tuning the dropout rate
  hp_dropout = hp.Float("dropout", min_value=0.2, max_value=0.5, step=0.1)
  x = Dropout(hp_dropout)(x)

  predictions = Dense(1, activation="sigmoid")(x)

  model = Model(
      inputs = base_model.input,
      outputs = predictions
  )

  #tuning the learning rate
  hp_learning_rate = hp.Choice("learning_rate", values=[1e-2, 1e-3, 1e-4])

  # compiling the model
  model.compile(
      optimizer = Adam(learning_rate=hp_learning_rate),
      loss = "binary_crossentropy",
      metrics = ["accuracy"]
  )

  return model

# tuner = kt.Hyperband(
#     build_model,
#     objective="val_accuracy",
#     max_epochs=10,
#     factor=3,
#     directory="my_dir",
#     project_name="my_project"
# )

early_stopping = EarlyStopping(
    monitor = "val_loss",
    patience = 5,
    restore_best_weights = True
)

tuner = kt.RandomSearch(
    build_model,
    objective="val_accuracy",
    max_trials=10,
    executions_per_trial=2,
    directory="/content/drive/MyDrive/Data_Science_Project_fall2024/Data Science Project/Enfermedades Cacao/binary_classification/hyperparameter_stuff",
    project_name="mobile_net_tuning"
)

tuner.search(
    train_generator,
    epochs=10,
    validation_data=validation_generator,
    callbacks=[early_stopping]
)

Reloading Tuner from /content/drive/MyDrive/Data_Science_Project_fall2024/Data Science Project/Enfermedades Cacao/binary_classification/hyperparameter_stuff/mobile_net_tuning/tuner0.json


In [13]:
best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]

print(f"""
The optimal hyperparameters are:
- Units in Dense layer: {best_hps.get('units')}
- Dropout rate: {best_hps.get('dropout'):.2f}
- Learning rate for Adam: {best_hps.get('learning_rate'):.5f}
""")

# 2. Get the best model
best_model = tuner.get_best_models(num_models=1)[0]



The optimal hyperparameters are:
- Units in Dense layer: 128
- Dropout rate: 0.30
- Learning rate for Adam: 0.00100



  saveable.load_own_variables(weights_store.get(inner_path))


In [14]:
#creating a checkpointing to beat timeout on colab

model_checkpoint_callback = ModelCheckpoint(
    filepath = checkpoint_path,
    save_weights_only = False,
    monitor = "val_loss",
    save_best_only = True,
    verbose = 1
)

#final_model on training set
history = best_model.fit(
    train_generator,
    epochs=50,
    validation_data=validation_generator,
    callbacks=[early_stopping, model_checkpoint_callback]
)

Epoch 1/50
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6s/step - accuracy: 0.8070 - loss: 0.4416
Epoch 1: val_loss improved from inf to 0.64689, saving model to /content/drive/MyDrive/Data_Science_Project_fall2024/Data Science Project/Enfermedades Cacao/binary_classification/checkpoint/best_model.keras
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m63s[0m 8s/step - accuracy: 0.8047 - loss: 0.4428 - val_accuracy: 0.6957 - val_loss: 0.6469
Epoch 2/50
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5s/step - accuracy: 0.8608 - loss: 0.3215
Epoch 2: val_loss did not improve from 0.64689
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 7s/step - accuracy: 0.8593 - loss: 0.3250 - val_accuracy: 0.7391 - val_loss: 0.6556
Epoch 3/50
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6s/step - accuracy: 0.8659 - loss: 0.3366
Epoch 3: val_loss improved from 0.64689 to 0.49479, saving model to /content/drive/MyDrive/Data_Science_Pro

In [15]:
import tensorflow as tf

best_model = tf.keras.models.load_model(checkpoint_path)
test_loss, test_accuracy = best_model.evaluate(test_generator)

print(f"Test Loss: {test_loss}")
print(f"Test Accuracy: {test_accuracy}")

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 1s/step - accuracy: 0.7222 - loss: 0.5974
Test Loss: 0.5907720923423767
Test Accuracy: 0.7083333134651184


In [16]:
'''# will be using transferred learning because of limitted data, computational efficient, and more accurate

# loading a pretrained base model

base_model = MobileNetV2(
    weights="imagenet",
    include_top = False,
    input_shape = image_size + (3,),
)

base_model.trainable = False

x = base_model.output

# Adding three layers to MobileNetV2
x = GlobalAveragePooling2D()(x)
x = Dense(64, activation='relu')(x)
x = Dropout(0.5)(x)
predictions = Dense(1, activation="sigmoid")(x)

model = Model(
    inputs = base_model.input,
    outputs = predictions
)

model.compile(
    optimizer = Adam(learning_rate=0.0001),
    loss="binary_crossentropy",
    metrics=["accuracy"]
)

#stop training if validation loss doesn't improve for 5 consecutive epochs
early_stopping = EarlyStopping(
    monitor="val_loss",
    patience=5,
    restore_best_weights=True
)

#use this to enable checkpointing (save your progress)
model_checkpoint_callback = ModelCheckpoint(
    filepath=checkpoint_path,
    save_weights_only=False,
    monitor="val_loss",
    mode="min",
    save_best_only=True,
    verbose=1
)

#Training the model
history = model.fit(
    train_generator,
    epochs=50,
    validation_data = validation_generator,
    callbacks=[early_stopping, model_checkpoint_callback]
)'''

'# will be using transferred learning because of limitted data, computational efficient, and more accurate\n\n# loading a pretrained base model\n\nbase_model = MobileNetV2(\n    weights="imagenet",\n    include_top = False,\n    input_shape = image_size + (3,),\n)\n\nbase_model.trainable = False\n\nx = base_model.output\n\n# Adding three layers to MobileNetV2\nx = GlobalAveragePooling2D()(x)\nx = Dense(64, activation=\'relu\')(x)\nx = Dropout(0.5)(x)\npredictions = Dense(1, activation="sigmoid")(x)\n\nmodel = Model(\n    inputs = base_model.input,\n    outputs = predictions\n)\n\nmodel.compile(\n    optimizer = Adam(learning_rate=0.0001),\n    loss="binary_crossentropy",\n    metrics=["accuracy"]\n)\n\n#stop training if validation loss doesn\'t improve for 5 consecutive epochs\nearly_stopping = EarlyStopping(\n    monitor="val_loss",\n    patience=5,\n    restore_best_weights=True\n)\n\n#use this to enable checkpointing (save your progress)\nmodel_checkpoint_callback = ModelCheckpoint(