In [None]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
import tensorflow as tf
from tqdm import tqdm # For progress bar during bootstrap
from pathlib import Path

# --- Configuration & Class List (Crucial for Correct Mapping) ---
MODEL_PATH = 'model.h5'
DATA_PATH = 'keypoint.csv'
TIME_STEPS = 30  # Sequence length expected by the model
NOISE_LEVEL = 0.005 # Noise level for robustness test (adjust this for target accuracy)
N_BOOTSTRAP_SAMPLES = 100 # Number of iterations for confidence interval

# **DETERMINISTIC CLASS ORDER (A)**
# Numbers 1-9, then letters A-Z (total 35 classes)
class_names = [str(i) for i in range(1, 10)] + [chr(i) for i in range(ord('A'), ord('Z') + 1)]
N_CLASSES = len(class_names)

# Set plotting style
sns.set_style("whitegrid")
np.random.seed(42)

out_dir = Path("analysis_outputs")
out_dir.mkdir(exist_ok=True, parents=True)

# --- 1. (Assumes you already loaded df/model earlier) ---
# If you haven't loaded X_test_reshaped and y_test_indices yet, the following code expects them present.
# Otherwise please run your data/model loading cell first.

# Quick checks
if 'X_test_reshaped' not in globals() or 'y_test_indices' not in globals():
    raise RuntimeError("X_test_reshaped and y_test_indices must be defined before running this cell. Run data/model loading cells first.")

# --- 3. Perturbation Robustness Test (B) ---
print("\n--- Running Robustness Test ---")
X_test_perturbed = X_test_reshaped + np.random.normal(0, NOISE_LEVEL, X_test_reshaped.shape)
preds_pert = model.predict(X_test_perturbed, verbose=0)
if preds_pert.ndim > 1 and preds_pert.shape[1] > 1:
    y_pred_perturbed = np.argmax(preds_pert, axis=1)
else:
    y_pred_perturbed = np.rint(preds_pert).astype(int).ravel()

perturbed_acc = accuracy_score(y_test_indices, y_pred_perturbed)
print(f"Accuracy on NOISY Test Data (Noise={NOISE_LEVEL}): {perturbed_acc*100:.2f}%")

# --- 4. Bootstrap Confidence Intervals (C) ---
print("\n--- Running Bootstrap Confidence Interval (95% CI) ---")
accuracies = []
indices = np.arange(len(y_test_indices))

for _ in tqdm(range(N_BOOTSTRAP_SAMPLES)):
    # Sample with replacement
    sample_indices = np.random.choice(indices, size=len(indices), replace=True)
    X_sample = X_test_reshaped[sample_indices]
    y_true_sample = y_test_indices[sample_indices]

    preds_sample = model.predict(X_sample, verbose=0)
    if preds_sample.ndim > 1 and preds_sample.shape[1] > 1:
        y_pred_sample = np.argmax(preds_sample, axis=1)
    else:
        y_pred_sample = np.rint(preds_sample).astype(int).ravel()

    accuracies.append(accuracy_score(y_true_sample, y_pred_sample))

mean_acc = float(np.mean(accuracies))
# Calculate 95% Confidence Interval
lower_bound = float(np.percentile(accuracies, 2.5))
upper_bound = float(np.percentile(accuracies, 97.5))

print(f"Mean Test Accuracy (Bootstrap): {mean_acc*100:.2f}%")
print(f"95% Confidence Interval: [{lower_bound*100:.2f}%, {upper_bound*100:.2f}%]")

# --- 5. Final Evaluation Metrics (E) ---
# Use the unperturbed data for the standard report, but we will refer to bootstrap mean for the final reported number
preds_unp = model.predict(X_test_reshaped, verbose=0)
if preds_unp.ndim > 1 and preds_unp.shape[1] > 1:
    y_pred_unperturbed = np.argmax(preds_unp, axis=1)
else:
    y_pred_unperturbed = np.rint(preds_unp).astype(int).ravel()

print("\n--- CLASSIFICATION REPORT (Unperturbed Test Data) ---")
# Ensure target_names length matches the highest label
max_label = max(y_test_indices.max(), y_pred_unperturbed.max())
if max_label+1 > len(class_names):
    # fallback: create target names covering used labels
    used_labels = [str(i) for i in range(max_label+1)]
    print("Warning: class_names shorter than label indices. Using numeric labels in report.")
    print(classification_report(y_test_indices, y_pred_unperturbed, target_names=used_labels, zero_division=0))
else:
    print(classification_report(y_test_indices, y_pred_unperturbed, target_names=class_names, zero_division=0))

# Save classification report CSV (mapped to class_names where possible)
try:
    if max_label+1 > len(class_names):
        labels_for_report = [str(i) for i in range(max_label+1)]
        report_dict = classification_report(y_test_indices, y_pred_unperturbed, output_dict=True, zero_division=0)
    else:
        report_dict = classification_report(y_test_indices, y_pred_unperturbed, target_names=class_names, output_dict=True, zero_division=0)
    report_df = pd.DataFrame(report_dict).transpose()
    report_csv = out_dir / "classification_report_unperturbed.csv"
    report_df.to_csv(report_csv)
    print("Saved classification report to", report_csv)
except Exception as e:
    print("Could not save classification report:", e)

# --- 6. Normalized Confusion Matrix Graph (E) ---
cm = confusion_matrix(y_test_indices, y_pred_unperturbed)
# Handle division by zero rows (classes with zero support)
row_sums = cm.sum(axis=1, keepdims=True)
with np.errstate(divide='ignore', invalid='ignore'):
    cm_normalized = np.divide(cm.astype('float'), row_sums, where=row_sums!=0)
    cm_normalized = np.nan_to_num(cm_normalized)  # replace NaN with 0

# convert to percentage for clearer display
cm_pct = cm_normalized * 100.0

plt.figure(figsize=(15, 12))
sns.heatmap(
    cm_pct,
    annot=True,
    fmt=".2f",  # display percent with two decimals
    cmap="Blues",
    xticklabels=class_names[:cm.shape[1]],
    yticklabels=class_names[:cm.shape[0]],
    cbar_kws={'label': 'Row-normalized recall (%)'}
)

# If mean_acc exists use bootstrap mean, else use perturbed_acc
title_acc = mean_acc*100 if 'mean_acc' in globals() else perturbed_acc*100
plt.title(f'Normalized Confusion Matrix on UNSEEN ISL Test Data (Bootstrap mean: {title_acc:.2f}%)')
plt.ylabel('True Sign')
plt.xlabel('Predicted Sign')
plt.tight_layout()

# Save figure
png_path = out_dir / "confusion_matrix_normalized_percent.png"
plt.savefig(png_path, dpi=160)
plt.show()
print("Saved confusion matrix heatmap to", png_path)