In [None]:
# Import Required Libraries
import numpy as np
import random
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
from tensorflow import keras
import sys
sys.path.append('./src')
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, roc_curve, roc_auc_score, classification_report


# Model Evaluation on PlantVillage Dataset

This notebook evaluates the trained model using the test set. It includes metrics, confusion matrix, ROC curves, and a classification report.

In [None]:
from src.config_par import IMG_SIZE, BATCH_SIZE


In [None]:
# Seed setting for reproducibility
random.seed(42)
np.random.seed(42)
tf.random.set_seed(42)
tf.keras.utils.set_random_seed(42)


In [None]:
import tensorflow_datasets as tfds
from preprocessing import preprocess

# Load Trained Model and Test Data
arch_choice = 2  # Choose the architecture version
if arch_choice not in [0, 1, 2, 3, 4]:
    raise ValueError("Invalid architecture choice")

model = keras.models.load_model(f'./weights/best_model_v{arch_choice}.h5')

ds_info = tfds.builder('plant_village').info
ds_test = tfds.load('plant_village', split='train[95%:]', as_supervised=True)

test_ds = ds_test.map(lambda image, label: preprocess(image, label, ds_info, IMG_SIZE)).batch(BATCH_SIZE[arch_choice]).prefetch(tf.data.AUTOTUNE)
class_names = ds_info.features['label'].names


In [None]:
# Generate Predictions on Test Set
y_true = []
y_pred = []
y_score = []
for images, labels in test_ds:
    y_true.extend(np.argmax(labels.numpy(), axis=1))
    preds = model.predict(images)
    y_pred.extend(np.argmax(preds, axis=1))
    y_score.append(preds)
y_score = np.concatenate(y_score)


In [None]:
# Calculate Evaluation Metrics (Accuracy, Precision, Recall, F1)
accuracy = accuracy_score(y_true, y_pred)
precision = precision_score(y_true, y_pred, average='weighted')
recall = recall_score(y_true, y_pred, average='weighted')
f1 = f1_score(y_true, y_pred, average='weighted')
print(f"Accuracy: {accuracy:.6f}")
print(f"Precision: {precision:.6f}")
print(f"Recall: {recall:.6f}")
print(f"F1-score: {f1:.6f}")


In [None]:
import matplotlib.pyplot as plt
from plotting import plot_confusion_matrix

plt.figure()  # Set a larger figure size for better readability
plot_confusion_matrix(
    np.array(y_true),
    np.array(y_pred),
    classes=class_names,
    normalize=True,
    title="Confusion Matrix (Test Set)",
    cmap="plasma"
)
plt.tight_layout()  # Ensure labels and ticks are not cut off
plt.savefig(f'./conf-matrix/confusion_matrix_v{arch_choice}.png', dpi=400, bbox_inches='tight')
plt.show()


In [None]:
# Plot ROC Curves for Each Class
n_classes = y_score.shape[1]
fpr = dict()
tpr = dict()
roc_auc = dict()
for i in range(n_classes):
    fpr[i], tpr[i], _ = roc_curve(np.eye(n_classes)[y_true][:, i], y_score[:, i])
    roc_auc[i] = roc_auc_score(np.eye(n_classes)[y_true][:, i], y_score[:, i])
plt.figure(figsize=(12, 12))
auc_and_idx = sorted([(roc_auc[i], i) for i in range(n_classes)], reverse=True)
for auc, i in auc_and_idx:
    plt.plot(fpr[i], tpr[i], label=f'{class_names[i]} (AUC = {auc:.4f})')
plt.plot([0, 1], [0, 1], 'r--', lw=2, label='Random Classifier (AUC = 0.5)')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve - One vs Rest (Test Set)')
plt.legend(fontsize='small', bbox_to_anchor=(1.05, 1), loc='best')
plt.savefig(f'./roc-curves/roc_curve_v{arch_choice}.png', dpi=400, bbox_inches='tight')
plt.show()


In [None]:
# Display Classification Report
report = classification_report(y_true, y_pred, target_names=class_names)
print(report)

# Save the classification report to a text file
with open(f'./reports/report_v{arch_choice}.txt', 'w') as f:
    f.write(report)
