In [None]:
# Import Required Libraries
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
from tensorflow import keras
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, roc_curve, roc_auc_score, classification_report


# Model Evaluation on PlantVillage Dataset

This notebook evaluates the trained model using the test set. It includes metrics, confusion matrix, ROC curves, and a classification report.

In [None]:
IMG_SIZE = (128, 128)
BATCH_SIZE = 32


In [None]:
import tensorflow_datasets as tfds
from preprocessing import preprocess

# Load Trained Model and Test Data
arch_choice = 1  # Choose the architecture version
if arch_choice not in [1, 2, 3]:
    raise ValueError("Invalid architecture choice")

model = keras.models.load_model(f'best_model_v{arch_choice}.h5')

ds_info = tfds.builder('plant_village').info
ds_test = tfds.load('plant_village', split='train[95%:]', as_supervised=True)

test_ds = ds_test.map(lambda image, label: preprocess(image, label, ds_info, IMG_SIZE)).batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)
class_names = ds_info.features['label'].names


In [None]:
(ds_train, ds_val, ds_test), ds_info = tfds.load(
    'plant_village',
    split=['train[:80%]', 'train[80%:90%]', 'train[90%:]'],
    as_supervised=True,
    shuffle_files=True,
    with_info=True,
    download=False
)

In [None]:
class_names = ds_info.features['label'].names
families   = sorted({n.split('___')[0] for n in class_names})
NUM_FAM    = len(families)
family_map = tf.constant(
    [families.index(n.split('___')[0]) for n in class_names],
    dtype=tf.int32
)

In [None]:
def folder_to_fam_ohe(image, folder_label):
    fam_idx = tf.gather(family_map, folder_label)
    fam_ohe = tf.one_hot(fam_idx, NUM_FAM)
    return image, fam_ohe

In [None]:
ds_test_fam = (
    ds_test
      .map(folder_to_fam_ohe)
      .map(preprocess)
      .batch(BATCH_SIZE)
      .prefetch(tf.data.AUTOTUNE)
)

In [None]:
from tensorflow.keras.models import load_model
model = load_model('best_model_wo_augmentation.h5')

In [None]:
# Generate Predictions on Test Set
y_true = []
y_pred = []
y_score = []
for images, labels in ds_test_fam:
    y_true.extend(np.argmax(labels.numpy(), axis=1))
    preds = model.predict(images)
    y_pred.extend(np.argmax(preds, axis=1))
    y_score.append(preds)
y_score = np.concatenate(y_score)


In [None]:
# Calculate Evaluation Metrics (Accuracy, Precision, Recall, F1)
accuracy = accuracy_score(y_true, y_pred)
precision = precision_score(y_true, y_pred, average='weighted')
recall = recall_score(y_true, y_pred, average='weighted')
f1 = f1_score(y_true, y_pred, average='weighted')
print(f"Accuracy: {accuracy:.6f}")
print(f"Precision: {precision:.6f}")
print(f"Recall: {recall:.6f}")
print(f"F1-score: {f1:.6f}")

In [None]:
# # Plot Confusion Matrix
# Compute confusion matrix
cm = confusion_matrix(y_true, y_pred, normalize='true')

#Plot confusion matrix
plt.figure(figsize=(14, 12))
sns.heatmap(cm, annot=False, fmt='d', cmap='viridis', xticklabels=families, yticklabels=families)
plt.xlabel('Predicted label')
plt.ylabel('True label')
plt.title('Confusion Matrix (Test Set)')
plt.xticks(rotation=90)
plt.yticks(rotation=0)
plt.tight_layout()
plt.show()


In [None]:
'''
import matplotlib.pyplot as plt
from plotting import plot_confusion_matrix

plt.figure()  # Set a larger figure size for better readability
plot_confusion_matrix(
    np.array(y_true),
    np.array(y_pred),
    classes=class_names,
    normalize=True,
    title="Confusion Matrix (Test Set)",
    cmap="plasma"
)
plt.tight_layout()  # Ensure labels and ticks are not cut off
'''

In [None]:
# Plot ROC Curves for Each Class
n_classes = y_score.shape[1]
fpr = dict()
tpr = dict()
roc_auc = dict()
for i in range(n_classes):
    fpr[i], tpr[i], _ = roc_curve(np.eye(n_classes)[y_true][:, i], y_score[:, i])
    roc_auc[i] = roc_auc_score(np.eye(n_classes)[y_true][:, i], y_score[:, i])
plt.figure(figsize=(12, 12))
auc_and_idx = sorted([(roc_auc[i], i) for i in range(n_classes)], reverse=True)
for auc, i in auc_and_idx:
    plt.plot(fpr[i], tpr[i], label=f'{families[i]} (AUC = {auc:.4f})')
plt.plot([0, 1], [0, 1], 'r--', lw=2, label='Random Classifier (AUC = 0.5)')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve - One vs Rest (Test Set)')
plt.legend(fontsize='small', bbox_to_anchor=(1.05, 1), loc='best')
plt.show()


In [None]:
# from plotting import make_roc
# fig, ax = plt.subplots(figsize=(16, 8))
# make_roc(np.eye(len(class_names))[y_true], y_score, class_names)
# # Move legend to a dedicated column outside the plot
# plt.legend(loc="center left", bbox_to_anchor=(1.02, 0.5), fontsize="x-small", frameon=False)
# plt.subplots_adjust(right=0.75)  # Make space for the legend
# plt.show()


In [None]:
# Display Classification Report
report = classification_report(y_true, y_pred, target_names=families)
print(report)
