# Model Evaluation on PlantVillage Dataset

This notebook evaluates the trained model using the test set. It includes metrics, confusion matrix, ROC curves, and a classification report.

In [1]:
# Initial imports
import numpy as np
import random
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
from tensorflow import keras
import sys
sys.path.append('./src')
import os
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, roc_curve, roc_auc_score, classification_report


In [2]:
# Colab optional setup
IS_COLAB = "google.colab" in sys.modules
print("Running on Colab:", IS_COLAB)
if IS_COLAB:
  from google.colab import drive
  drive.mount('/content/drive/', force_remount=True)
  #Adapt the folder to your specific one where you have downloaded the code
  %cd /content/drive/Othercomputers/My Mac/exam-project


Running on Colab: True
Mounted at /content/drive/
/content/drive/Othercomputers/My Mac/exam-project


In [3]:
# setup constants

ARCH_CHOICE = 0  # Choose the architecture version

WEIGHTS_DIR = './weights'  # Directory to save model weights
CONFUSION_DIR = './conf-matrix'  # Directory to save confusion matrices
ROC_AUC_DIR = './roc-curves'  # Directory to save ROC AUC plots
REPORTS_DIR = './reports'  # Directory to save reports
os.makedirs(WEIGHTS_DIR, exist_ok=True)
os.makedirs(CONFUSION_DIR, exist_ok=True)
os.makedirs(ROC_AUC_DIR, exist_ok=True)
os.makedirs(REPORTS_DIR, exist_ok=True)
WEIGHTS_FILE = f'{WEIGHTS_DIR}/best_model_v{ARCH_CHOICE}.h5'  # Path to save the best model weights
CONFUSION_FILE = f'{CONFUSION_DIR}/confusion_matrix_v{ARCH_CHOICE}.png'  # Path to save confusion matrix
ROC_AUC_FILE = f'{ROC_AUC_DIR}/roc_curve_v{ARCH_CHOICE}.png'  # Path to save ROC AUC plot
REPORT_TEXT_FILE = f'{REPORTS_DIR}/report_v{ARCH_CHOICE}.txt'  # Path to save classification report
REPORT_HEATMAP_FILE = f'{REPORTS_DIR}/report_heatmap_v{ARCH_CHOICE}.png'  # Path to save heatmap of classification report

IMG_SIZE = (128, 128)  # Image size for resizing
BATCH_SIZE = 64  # Batch size dataset loading



In [4]:
# Seed setting for reproducibility
random.seed(42)
np.random.seed(42)
tf.random.set_seed(42)
tf.keras.utils.set_random_seed(42)


## Load the trained model and the test dataset

In [5]:
import tensorflow_datasets as tfds
from preprocessing import preprocess

model = keras.models.load_model(WEIGHTS_FILE)

ds_test, ds_info = tfds.load(
    'plant_village',
    split='train[95%:]',
    shuffle_files=False,
    as_supervised=True,  # returns (image, label) pairs
    with_info=True
)

# Preprocess the dataset
test_ds = ds_test.map(lambda image, label: preprocess(image, label, ds_info, IMG_SIZE)).batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)
class_names = ds_info.features['label'].names
print(f"Number of test images: {ds_info.splits['train[95%:]'].num_examples}")


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


TypeError: <class 'keras.src.losses.losses.CategoricalCrossentropy'> could not be deserialized properly. Please ensure that components that are Python object instances (layers, models, etc.) returned by `get_config()` are explicitly deserialized in the model's `from_config()` method.

config={'module': 'keras.losses', 'class_name': 'CategoricalCrossentropy', 'config': {'reduction': 'auto', 'name': 'categorical_crossentropy', 'from_logits': False, 'label_smoothing': 0.0, 'axis': -1, 'fn': 'categorical_crossentropy'}, 'registered_name': None}.

Exception encountered: CategoricalCrossentropy.__init__() got an unexpected keyword argument 'fn'

## Generate predictions on the test dataset using the loaded model

In [None]:
y_true = []
y_pred = []
y_score = []
for images, labels in test_ds:
    y_true.extend(np.argmax(labels.numpy(), axis=1))
    preds = model.predict(images)
    y_pred.extend(np.argmax(preds, axis=1))
    y_score.append(preds)
y_score = np.concatenate(y_score)


### With the calculated predictions, evaluate overall model metrics on the test dataset (weighted by class)

In [None]:
accuracy = accuracy_score(y_true, y_pred)
precision = precision_score(y_true, y_pred, average='weighted')
recall = recall_score(y_true, y_pred, average='weighted')
f1 = f1_score(y_true, y_pred, average='weighted')
print(f"Accuracy: {accuracy:.6f}")
print(f"Precision: {precision:.6f}")
print(f"Recall: {recall:.6f}")
print(f"F1-score: {f1:.6f}")


### Plot the confusion matrix

In [None]:
import matplotlib.pyplot as plt
from plotting import plot_confusion_matrix

plot_confusion_matrix(
    np.array(y_true),
    np.array(y_pred),
    classes=class_names,
    normalize=True,
    title="Confusion Matrix (Test Set)",
    cmap="plasma"
)
plt.tight_layout()
plt.savefig(CONFUSION_FILE, dpi=400, bbox_inches='tight')
plt.show()


### Plot the ROC curves and evaluate the AUC metric for each class

In [None]:
# Plot ROC Curves for Each Class
n_classes = y_score.shape[1]
fpr = dict()
tpr = dict()
roc_auc = dict()
for i in range(n_classes):
    fpr[i], tpr[i], _ = roc_curve(np.eye(n_classes)[y_true][:, i], y_score[:, i])
    roc_auc[i] = roc_auc_score(np.eye(n_classes)[y_true][:, i], y_score[:, i])
plt.figure(figsize=(12, 12))
auc_and_idx = sorted([(roc_auc[i], i) for i in range(n_classes)], reverse=True)
for auc, i in auc_and_idx:
    plt.plot(fpr[i], tpr[i], label=f'{class_names[i]} (AUC = {auc:.4f})')
plt.plot([0, 1], [0, 1], 'r--', lw=2, label='Random Classifier (AUC = 0.5)')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve - One vs Rest (Test Set)')
plt.legend(fontsize='small', bbox_to_anchor=(1.05, 1), loc='best')
plt.savefig(ROC_AUC_FILE, dpi=400, bbox_inches='tight')
plt.show()


### Save and plot the per-class classification report

In [None]:
# Display Classification Report
report = classification_report(y_true, y_pred, target_names=class_names)
print(report)

# Save the classification report to a text file
with open(REPORT_TEXT_FILE, 'w') as f:
    f.write(report)


In [None]:
# create a sns heatmap of the classification report
import seaborn as sns
import pandas as pd
report_df = pd.DataFrame.from_dict(
    classification_report(y_true, y_pred, target_names=class_names, output_dict=True)
).T

fig, ax = plt.subplots(figsize=(12, 8))
hm = sns.heatmap(
    report_df.iloc[:-3, :-1],
    annot=True,
    fmt='.2f',
    cmap='viridis',
    cbar=False,
    linewidths=0.5,
    ax=ax
)
cbar = fig.colorbar(hm.collections[0], ax=ax)
cbar.set_label('Score')

plt.title('Classification Report Heatmap')
plt.xlabel('Metrics')
plt.ylabel('Classes')
plt.savefig(REPORT_HEATMAP_FILE, dpi=400, bbox_inches='tight')
plt.show()
