In [None]:
#from google.colab import drive
#drive.mount('/content/drive')

In [None]:
# To modify based on the src path in the DRIVE
#!mkdir -p src
#!cp /content/drive/MyDrive/MACHINE_LEARNING/progetto-daml/src/preprocessing.py src/
#!cp /content/drive/MyDrive/MACHINE_LEARNING/progetto-daml/src/plotting.py src/plotting.py

In [None]:
# Import Required Libraries
import numpy as np
import random
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
import tensorflow_datasets as tfds
from tensorflow import keras
import sys
sys.path.append('/content/src')
import os
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, roc_curve, roc_auc_score, classification_report


# Model Evaluation on PlantVillage Dataset

This notebook evaluates the trained model using the test set. It includes metrics, confusion matrix, ROC curves, and a classification report.

In [None]:
# setup

# Choose number of samples per class:
#'max': # of samples in largest class
#'mean': mean # of samples
#'t': 1000 samples
#'min': # of samples in the smallest class

target_type = input("Choose the number of samples per class('max', 'mean', 't', 'min'): ").strip().lower()
if target_type not in ['max', 'mean', 't', 'min']:
    raise ValueError("Invalid choice! Please enter 'max', 'mean', 't', or 'min'.")

print(f"You chose: {target_type}")

if target_type == 'max':
  target = 4399

elif target_type == 'mean':
  target = 1143

elif target_type == 't':
  target = 1000

elif target_type == 'min':
  target = 122
samplesize = str(target)

# Choose augmentation type:
#'comb': geometric and color augmentation
#'geo': geometric augmentation
#'color': color augmentation

augmentation_type = input("Choose augmentation type ('comb', 'geo', 'color'): ").strip().lower()
if augmentation_type not in ['comb', 'geo', 'color']:
    raise ValueError("Invalid choice! Please enter 'none', 'geo', or 'color'.")

print(f"You chose: {augmentation_type}")


COMMON_FILENAME = augmentation_type+'_aug_'+samplesize

WEIGHTS_DIR = '/content/weights'  # Directory to save model weights
CONFUSION_DIR = '/content/conf-matrix'  # Directory to save confusion matrices
ROC_AUC_DIR = '/content/roc-curves'  # Directory to save ROC AUC plots
REPORTS_DIR = '/content/reports'  # Directory to save reports
os.makedirs(WEIGHTS_DIR, exist_ok=True)
os.makedirs(CONFUSION_DIR, exist_ok=True)
os.makedirs(ROC_AUC_DIR, exist_ok=True)
os.makedirs(REPORTS_DIR, exist_ok=True)
WEIGHTS_FILE = f'{WEIGHTS_DIR}/{COMMON_FILENAME}.h5'  # Path to save the best model weights
CONFUSION_FILE = f'{CONFUSION_DIR}/confusion_matrix_{COMMON_FILENAME}.png'  # Path to save confusion matrix
ROC_AUC_FILE = f'{ROC_AUC_DIR}/roc_curve_{COMMON_FILENAME}.png'  # Path to save ROC AUC plot
REPORT_TEXT_FILE = f'{REPORTS_DIR}/report_{COMMON_FILENAME}.txt'  # Path to save classification report
REPORT_HEATMAP_FILE = f'{REPORTS_DIR}/report_heatmap_{COMMON_FILENAME}.png'  # Path to save heatmap of classification report

IMG_SIZE = (128, 128)  # Image size for resizing
BATCH_SIZE = 64  # Batch size dataset loading

# Copying the weights from the training of interest
weights_loc = '/content/weights/'+ augmentation_type +'_aug_'+ samplesize+'.h5'
print(weights_loc)
!cp "$weights_loc" /content/weights/  #expect 'cp: 'file #1' and 'file #2' are the same file'

In [None]:
# Seed setting for reproducibility
random.seed(42)
np.random.seed(42)
tf.random.set_seed(42)
tf.keras.utils.set_random_seed(42)

In [None]:
from preprocessing import preprocess

model = keras.models.load_model(WEIGHTS_FILE, compile=False)

ds_test, ds_info = tfds.load(
    'plant_village',
    split='train[95%:]',
    shuffle_files=False,
    as_supervised=True,  # returns (image, label) pairs
    with_info=True
)

test_ds = ds_test.map(lambda image, label: preprocess(image, label, ds_info, IMG_SIZE)).batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)
class_names = ds_info.features['label'].names
print(f"Number of test images: {ds_info.splits['train[95%:]'].num_examples}")


In [None]:
# Generate Predictions on Test Set
y_true = []
y_pred = []
y_score = []
for images, labels in test_ds:
    y_true.extend(np.argmax(labels.numpy(), axis=1))
    preds = model.predict(images)
    y_pred.extend(np.argmax(preds, axis=1))
    y_score.append(preds)
y_score = np.concatenate(y_score)


In [None]:
# Calculate Evaluation Metrics (Accuracy, Precision, Recall, F1)
accuracy = accuracy_score(y_true, y_pred)
precision = precision_score(y_true, y_pred, average='weighted')
recall = recall_score(y_true, y_pred, average='weighted')
f1 = f1_score(y_true, y_pred, average='weighted')
print(f"Accuracy: {accuracy:.6f}")
print(f"Precision: {precision:.6f}")
print(f"Recall: {recall:.6f}")
print(f"F1-score: {f1:.6f}")


In [None]:
import matplotlib.pyplot as plt
from plotting import plot_confusion_matrix

plt.figure()  # Set a larger figure size for better readability
plot_confusion_matrix(
    np.array(y_true),
    np.array(y_pred),
    classes=class_names,
    normalize=True,
    title="Confusion Matrix (Test Set)",
    cmap="plasma"
)
plt.tight_layout()  # Ensure labels and ticks are not cut off
plt.savefig(CONFUSION_FILE, dpi=400, bbox_inches='tight')
plt.show()


In [None]:
# Plot ROC Curves for Each Class
n_classes = y_score.shape[1]
fpr = dict()
tpr = dict()
roc_auc = dict()
for i in range(n_classes):
    fpr[i], tpr[i], _ = roc_curve(np.eye(n_classes)[y_true][:, i], y_score[:, i])
    roc_auc[i] = roc_auc_score(np.eye(n_classes)[y_true][:, i], y_score[:, i])
plt.figure(figsize=(12, 12))
auc_and_idx = sorted([(roc_auc[i], i) for i in range(n_classes)], reverse=True)
for auc, i in auc_and_idx:
    plt.plot(fpr[i], tpr[i], label=f'{class_names[i]} (AUC = {auc:.4f})')
plt.plot([0, 1], [0, 1], 'r--', lw=2, label='Random Classifier (AUC = 0.5)')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve - One vs Rest (Test Set)')
plt.legend(fontsize='small', bbox_to_anchor=(1.05, 1), loc='best')
plt.savefig(ROC_AUC_FILE, dpi=400, bbox_inches='tight')
plt.show()


In [None]:
# Display Classification Report
report = classification_report(y_true, y_pred, target_names=class_names)
print(report)

# Save the classification report to a text file
with open(REPORT_TEXT_FILE, 'w') as f:
    f.write(report)


In [None]:
# Create a sns heatmap of the classification report
import seaborn as sns
import pandas as pd
report_df = pd.DataFrame.from_dict(
    classification_report(y_true, y_pred, target_names=class_names, output_dict=True)
).T

fig, ax = plt.subplots(figsize=(12, 8))
hm = sns.heatmap(
    report_df.iloc[:-3, :-1],
    annot=True,
    fmt='.2f',
    cmap='viridis',
    cbar=False,
    linewidths=0.5,
    ax=ax
)
cbar = fig.colorbar(hm.collections[0], ax=ax)
cbar.set_label('Score')

plt.title('Classification Report Heatmap')
plt.xlabel('Metrics')
plt.ylabel('Classes')
plt.savefig(REPORT_HEATMAP_FILE, dpi=400, bbox_inches='tight')
plt.show()
