In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score, accuracy_score
from sklearn.preprocessing import LabelBinarizer
import matplotlib.pyplot as plt
import seaborn as sns
import sys
import os

In [None]:
sys.path.append(os.path.abspath('..'))
from src.data_loader import get_data_generators

In [None]:
MODEL_PATH = '../models/cloud_model_best.h5'
DATA_DIR = '../data/raw'
IMG_SIZE = (224, 224)
BATCH_SIZE = 32

In [None]:
print("--- 1. Preparing validation environment ---")

_, val_gen = get_data_generators(DATA_DIR, img_size=IMG_SIZE, batch_size=BATCH_SIZE)
val_gen.shuffle = False
val_gen.reset()

In [None]:
print(f"--- 2. Model loading: {MODEL_PATH} ---")
model = tf.keras.models.load_model(MODEL_PATH)

In [None]:
print("--- 3. Making predictions ---")

Y_pred_probs = model.predict(val_gen, verbose=1)
y_pred = np.argmax(Y_pred_probs, axis=1)
y_true = val_gen.classes
class_names = list(val_gen.class_indices.keys())

Quantitative analysis

In [None]:
print("\n" + "="*50)
print("Evaluation raport on cloud classification model:")
print("="*50)

# Basic metrics
accuracy = accuracy_score(y_true, y_pred)
print(f"Accuracy: {accuracy:.2%}")

# top-2 accuracy
top2_acc = tf.keras.metrics.top_k_categorical_accuracy(
    tf.keras.utils.to_categorical(y_true), Y_pred_probs, k=2
)
print(f"Top-2 Accuracy: {np.mean(top2_acc):.2%}")

# Classification report
print("\n Classification Report (Precision, Recall, F1-Score):")
report = classification_report(y_true, y_pred, target_names=class_names, output_dict=True)
df_report = pd.DataFrame(report).transpose()
print(df_report.round(2))
df_report.to_csv('evaluation_metrics.csv')

# ROC AUC Score (Multiclass)
lb = LabelBinarizer()
lb.fit(y_true)
y_true_binary = lb.transform(y_true)

roc_auc = roc_auc_score(y_true_binary, Y_pred_probs, multi_class='ovr', average='weighted')
print(f"\n Weighted ROC AUC Score: {roc_auc:.4f} (Ideally = 1.0)")
print("-" * 50)



Statistical visualization

In [None]:
# Plot 1: Confusion Matrix (Heatmap)
plt.figure(figsize=(10, 8))
cm = confusion_matrix(y_true, y_pred)
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
            xticklabels=class_names, yticklabels=class_names)
plt.title('Confusion Matrix')
plt.ylabel('Ground Truth')
plt.xlabel('Predicted Label')
plt.show()

In [None]:
# Plot 2: ROC Curves for each class
from sklearn.metrics import roc_curve, auc

plt.figure(figsize=(12, 8))
for i, class_name in enumerate(class_names):
    fpr, tpr, _ = roc_curve(y_true_binary[:, i], Y_pred_probs[:, i])
    roc_auc_class = auc(fpr, tpr)

    plt.plot(fpr, tpr, label=f'{class_name} (AUC = {roc_auc_class:.2f})')


plt.plot([0, 1], [0, 1], 'k--', lw=2, label='Random Guess')


plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Multiclass ROC Curves (Receiver Operating Characteristic)')
plt.legend(loc="lower right")
plt.show()