In [None]:
import numpy as np # linear algebra
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Activation, Dense, Flatten, BatchNormalization, Conv2D, MaxPool2D
from tensorflow.keras.optimizers import Adam 
from tensorflow.keras.metrics import categorical_crossentropy
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import Dropout
from tensorflow.keras import regularizers
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.metrics import confusion_matrix
import itertools
import shutil
import random
import glob
import matplotlib.pyplot as plt
import warnings
from sklearn.metrics import accuracy_score

warnings.simplefilter(action='ignore', category=FutureWarning)
%matplotlib inline

import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
# Define the path to the training dataset
train_path = '/kaggle/input/vgg16-model-laguna-dataset/laguna_dataset_preprocessed/train'

# Define the path to the validation dataset
valid_path = '/kaggle/input/vgg16-model-laguna-dataset/laguna_dataset_preprocessed/valid'

# Define the path to the test dataset
test_path = '/kaggle/input/vgg16-model-laguna-dataset/laguna_dataset_preprocessed/test'

In [None]:
# Check if the 'train_path' is a directory and print the result
print(os.path.isdir(train_path))

# Check if the 'valid_path' is a directory and print the result
print(os.path.isdir(valid_path))

# Check if the 'test_path' is a directory and print the result
print(os.path.isdir(test_path))

In [None]:
# Define an ImageDataGenerator for data augmentation and preprocessing using VGG16 settings
train_datagen = ImageDataGenerator(
    rotation_range=20,  # Increased rotation range
    width_shift_range=0.2,  # Increased shift range
    height_shift_range=0.2,  # Increased shift range
    shear_range=0.2,  # Increased shear range
    zoom_range=0.2,  # Increased zoom range
    channel_shift_range=20.,  # Increased channel shift range
    horizontal_flip=True,
    preprocessing_function=tf.keras.applications.vgg16.preprocess_input
)

In [None]:
# Define ImageDataGenerator and generate batches of images
# Apply data augmentation to the training batch
train_batches = train_datagen.flow_from_directory(
    directory=train_path,
    target_size=(224, 224), classes=['Black sigatoka', 'Bunchy top', 'Healthy'], batch_size=10)

# Validation set
valid_batches = ImageDataGenerator(preprocessing_function=tf.keras.applications.vgg16.preprocess_input).flow_from_directory(directory=valid_path, target_size=(224,224), classes=['Black sigatoka', 'Bunchy top', 'Healthy'], batch_size=10)

# Test set
test_batches = ImageDataGenerator(preprocessing_function=tf.keras.applications.vgg16.preprocess_input).flow_from_directory(directory=test_path, target_size=(224,224), classes=['Black sigatoka', 'Bunchy top', 'Healthy'], batch_size=10, shuffle=False)

In [None]:
# Assert that the number of samples in the training batch is 627
assert train_batches.n == 627

# Assert that the number of samples in the validation batch is 177
assert valid_batches.n == 177

# Assert that the number of samples in the test batch is 79
assert test_batches.n == 79

# Assert that the number of classes in all batches is 3 (binary classification, 'Healthy', 'Bunchy top', and 'Black sigatoka')
assert train_batches.num_classes == valid_batches.num_classes == test_batches.num_classes == 3

In [None]:
# Get a batch of images and their corresponding labels from the training dataset
imgs, labels = next(train_batches)

In [None]:
def plotImages(images_arr):
    # Create a figure with 10 subplots arranged in a single row
    fig, axes = plt.subplots(1, 10, figsize=(20, 20))
    axes = axes.flatten()

    # Display each image in the 'images_arr' on a separate subplot
    for img, ax in zip(images_arr, axes):
        ax.imshow(img)
        ax.axis('off')

    # Adjust the layout and display the plot
    plt.tight_layout()
    plt.show()

In [None]:
# Plot the batch of images using the 'plotImages' function
plotImages(imgs)

# Print the corresponding labels for the batch of images
print(labels)

In [None]:
# Load the VGG16 model with random weights (pre-trained weights are not used)
vgg16_model = tf.keras.applications.vgg16.VGG16(weights=None)

In [None]:
# Display a summary of the VGG16 model's architecture and parameter information
vgg16_model.summary()

In [None]:
# Determine and print the type of the 'vgg16_model' variable
model_type = type(vgg16_model)
print(model_type)

In [None]:
def count_params(model):
    """
    Calculate the number of trainable and non-trainable parameters in a given model.
    
    Args:
    model (tf.keras.Model): The model for which parameter count is calculated.
    
    Returns:
    dict: A dictionary containing the counts of trainable and non-trainable parameters.
    """
    non_trainable_params = np.sum([np.prod(v.get_shape().as_list()) for v in model.non_trainable_weights])
    trainable_params = np.sum([np.prod(v.get_shape().as_list()) for v in model.trainable_weights])
    return {'non_trainable_params': non_trainable_params, 'trainable_params': trainable_params}

In [None]:
# Calculate the number of trainable and non-trainable parameters in the VGG16 model
params = count_params(vgg16_model)

# Assert that there are no non-trainable parameters
assert params['non_trainable_params'] == 0

# Assert the number of trainable parameters in the VGG16 model
assert params['trainable_params'] == 138357544

In [None]:
# Create a new model that excludes the last layer of the VGG16 model
model = tf.keras.Model(inputs=vgg16_model.input, outputs=vgg16_model.layers[-2].output)

# Add more dense layers
x = Dense(units=125, activation='relu')(model.output)

# Output layer with 3 units and 'softmax' activation
output = Dense(units=3, activation='softmax')(x)

# Create a new model with the added layers
model = tf.keras.Model(inputs=model.input, outputs=output)

In [None]:
# Iterate through the layers and set trainable to False for all layers except the last two
for layer in model.layers[:-2]:
    layer.trainable = False

# Ensure that the last two layers are trainable
for layer in model.layers[-2:]:
    layer.trainable = True

In [None]:
# Display a summary of the new model's architecture and parameter information
model.summary()

In [None]:
# Calculate the number of trainable and non-trainable parameters in the new model
params = count_params(model)

# Assert that there are no non-trainable parameters
assert params['non_trainable_params'] == 134260544

# Assert the number of trainable parameters in the new model
assert params['trainable_params'] == 512503

In [None]:
num_Bunchy_top_samples = len(os.listdir('/kaggle/input/vgg16-model-laguna-dataset/laguna_dataset_preprocessed/test/Bunchy top'))
num_Black_sigatoka_samples = len(os.listdir('/kaggle/input/vgg16-model-laguna-dataset/laguna_dataset_preprocessed/test/Black sigatoka'))
num_Healthy_samples = len(os.listdir('/kaggle/input/vgg16-model-laguna-dataset/laguna_dataset_preprocessed/test/Healthy'))

print("Number of Bunchy top samples:", num_Bunchy_top_samples)
print("Number of Black sigatoka samples:", num_Black_sigatoka_samples)
print("Number of Healthy samples:", num_Healthy_samples)

In [None]:
# Adjust patience as needed
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

In [None]:
lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate=1e-4,
    decay_steps=1000,
    decay_rate=0.9)
optimizer = Adam(learning_rate=lr_schedule)

In [None]:
# Compile the model with the Adam optimizer, a learning rate of 0.0001, categorical cross-entropy loss, and accuracy as the evaluation metric
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
history = model.fit(x=train_batches, steps_per_epoch=len(train_batches),validation_data=valid_batches, validation_steps=len(valid_batches), epochs=16, verbose=2, callbacks=[early_stopping]) 

In [None]:
# Retrieve training and validation accuracy from the training history
train_accuracy = history.history['accuracy']
val_accuracy = history.history['val_accuracy']

# Create a list of epochs for the x-axis
epochs = range(1, len(train_accuracy) + 1)

# Plot the training and validation accuracy
plt.plot(epochs, train_accuracy, 'b', label='Training Accuracy')  # Blue line for training accuracy
plt.plot(epochs, val_accuracy, 'r', label='Validation Accuracy')  # Red line for validation accuracy

# Add title and labels to the plot
plt.title('Training and Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')

# Add a legend to differentiate between training and validation accuracy
plt.legend()

# Show the plot
plt.show()

In [None]:
# Retrieve training and validation accuracy from the training history
train_loss = history.history['loss']
val_loss = history.history['val_loss']

# Create a list of epochs for the x-axis
epochs = range(1, len(train_loss) + 1)

# Plot the training and validation accuracy
plt.plot(epochs, train_loss, 'b', label='Training Loss')  # Blue line for training accuracy
plt.plot(epochs, val_loss, 'r', label='Validation Loss')  # Red line for validation accuracy

# Add title and labels to the plot
plt.title('Training and Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')

# Add a legend to differentiate between training and validation accuracy
plt.legend()

# Show the plot
plt.show()

In [None]:
# Load a batch of test images and their corresponding labels from your test dataset
test_imgs, test_labels = next(test_batches)

# Plot the test images in a grid for visualization
plotImages(test_imgs)

# Print the labels of the test images (these labels correspond to the ground truth)
print(test_labels)

In [None]:
# Make predictions using the model on the test batches
predictions = model.predict(x=test_batches, verbose=0)

In [None]:
# Access the 'classes' attribute of the 'test_batches' object
test_batches.classes

In [None]:
# Compute the confusion matrix using ground truth classes and predicted class labels
cm = confusion_matrix(y_true=test_batches.classes, y_pred=np.argmax(predictions, axis=-1))

In [None]:
from sklearn.metrics import confusion_matrix, accuracy_score

# Compute the confusion matrix using ground truth classes and predicted class labels
cm = confusion_matrix(y_true=test_batches.classes, y_pred=np.argmax(predictions, axis=-1))

# Calculate the accuracy
accuracy = accuracy_score(test_batches.classes, np.argmax(predictions, axis=1))

# Convert the accuracy to a percentage
accuracy_percentage = accuracy * 100.0

# Print the test accuracy and confusion matrix
print("Test Accuracy: {:.2f}%".format(accuracy_percentage))

In [None]:
def plot_confusion_matrix(cm, classes,
                          normalize=False,
                          title='Confusion matrix',
                          cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting 'normalize=True'.
    """
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)
    
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')
        
    print(cm)
    
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape [0]), range(cm.shape[1])):
        plt.text(j, i, cm[i, j],
                horizontalalignment="center",
                color="white" if cm[i, j] > thresh else "black")
        
    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')

In [None]:
# Access the 'class_indices' attribute of the 'test_batches' object
test_batches.class_indices

In [None]:
# Define class labels for plotting the confusion matrix
cm_plot_labels = ['Black sigatoka', 'Bunchy top', 'Healthy']
plot_confusion_matrix(cm=cm, classes=cm_plot_labels, title='Confusion Matrix')

In [None]:
# Get the class labels for the images in the test set
test_classes = test_batches.classes

# Make predictions on the test set
predictions = model.predict(x=test_batches, verbose=0)

# Convert the predicted probabilities to class labels
predicted_classes = np.argmax(predictions, axis=-1)

# Get the filenames of the test images
filenames = test_batches.filenames

# Print out images along with their true and predicted labels
for i in range(len(filenames)):
    print(f"True Label: {test_classes[i]}, Predicted Label: {predicted_classes[i]}, File Name: {filenames[i]}")

In [None]:
if os.path.isfile('/kaggle/working/models/125units-epoch16-81.01%-VGG16-model.h5') is False:
    model.save('/kaggle/working/models/125units-epoch16-83.54%-VGG16-model.h5')