**Evaluation_Metrics** This code first extract the ground truth labels from all the four datasets and then computes the confusion matrix, plots ROC, computes AUC, Precision, Recall, F1-score and Specificity

In [None]:
import scipy.io
import numpy as np
import os
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score, roc_curve, auc

# Define paths for all dataset ground truth files
gt_files = {
    "UCSD_Ped1": "UCSD_Ped1/TestAnnotation.mat",
    "UCSD_Ped2": "UCSD_Ped2/TestAnnotation.mat",
    "Avenue": "Avenue/Test.txt",
    "ShanghaiTech": "ShanghaiTech/test_frame_mask/"
}

# Initialize dictionary to store labels
gt_data_dict = {}

# Process UCSD Ped1 and Ped2
for dataset, file_path in gt_files.items():
    if "UCSD" in dataset:
        gt_data = scipy.io.loadmat(file_path)
        gt_frames = gt_data["gt"]

        num_test_videos = len(gt_frames)
        actual_labels = []

        for i in range(num_test_videos):
            labels = np.zeros(200)  # 200 frames per video
            for anomaly_range in gt_frames[i]:
                start, end = anomaly_range[0], anomaly_range[1]
                labels[start:end + 1] = 1  # Mark anomalies
            actual_labels.append(labels)

        actual_labels = np.concatenate(actual_labels)  # Flatten
        gt_data_dict[dataset] = actual_labels  # Store labels

# Process Avenue dataset
with open(gt_files["Avenue"], "r") as f:
    lines = f.readlines()

actual_labels = np.zeros(10000)  # 10,000 frames per video
frame_idx = 0

for line in lines:
    line = line.strip()
    if line == "-1":  # End of a video's annotations
        frame_idx += 1
        continue
    start, end = map(int, line.split())
    actual_labels[start:end + 1] = 1  # Mark anomalies

actual_labels = actual_labels[:frame_idx]
gt_data_dict["Avenue"] = actual_labels  # Store labels

# Process ShanghaiTech dataset
mask_dir = gt_files["ShanghaiTech"]
mask_files = sorted(os.listdir(mask_dir))

actual_labels = []
for mask_file in mask_files:
    mask_path = os.path.join(mask_dir, mask_file)
    mask = scipy.io.loadmat(mask_path)["volLabel"]
    actual_labels.append(1 if np.sum(mask) > 0 else 0)

actual_labels = np.array(actual_labels)
gt_data_dict["ShanghaiTech"] = actual_labels  # Store labels

# Save to an Excel file with separate sheets
file_path = "ground_truth_labels.xlsx"

with pd.ExcelWriter(file_path, engine="openpyxl") as writer:
    for dataset, labels in gt_data_dict.items():
        df_gt = pd.DataFrame({"Frame Index": np.arange(len(labels)), "Ground Truth": labels})
        df_gt.to_excel(writer, sheet_name=dataset, index=False)

print(f"Ground truth labels saved in '{file_path}' with sheets: {list(gt_data_dict.keys())}")


In [None]:
# Load predicted frame scores
df_pred = pd.read_excel("frame_scores_test.xlsx")
Predicted = df_pred["Frame Score"].values

# Load ground truth labels
df_gt = pd.read_excel("ground_truth_labels.xlsx", sheet_name="Avenue_GT")  # Change sheet_name for different datasets
Actual = df_gt["Ground Truth"].values

# Ensure Actual and Predicted are of the same length
min_len = min(len(Actual), len(Predicted))
Actual = Actual[:min_len]
Predicted = Predicted[:min_len]

# Compute ROC Curve and AUC
fpr, tpr, thresholds = roc_curve(Actual, Predicted)
auc_score = auc(fpr, tpr)

# Determine optimal threshold (You can use a different method if needed)
optimal_idx = np.argmax(tpr - fpr)
optimal_threshold = thresholds[optimal_idx]

# Apply threshold to get binary predictions
y_pred = (Predicted >= optimal_threshold).astype(int)

# Compute Confusion Matrix
cm = confusion_matrix(Actual, y_pred)
TN, FP, FN, TP = cm.ravel()

# Compute metrics
precision = precision_score(Actual, y_pred)
recall = recall_score(Actual, y_pred)
f1 = f1_score(Actual, y_pred)
specificity = TN / (TN + FP)

# Print metrics
print(f"Optimal Threshold: {optimal_threshold:.4f}")
print(f"True Positives (TP): {TP}")
print(f"True Negatives (TN): {TN}")
print(f"False Positives (FP): {FP}")
print(f"False Negatives (FN): {FN}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")
print(f"Specificity: {specificity:.4f}")
print(f"AUC Score: {auc_score:.4f}")

# Plot Confusion Matrix
plt.figure(figsize=(8, 6))
ax = sns.heatmap(cm, annot=True, fmt='d', cmap="Purples", cbar=True,
                 xticklabels=['Normal (0)', 'Anomalous (1)'],
                 yticklabels=['Normal (0)', 'Anomalous (1)'])
plt.xlabel('Predicted Event', color='black')
plt.ylabel('Actual Event', color='black')
ax.tick_params(axis='both', colors='black')
plt.title('Confusion Matrix')
plt.show()

# Plot ROC Curve
plt.figure(figsize=(8, 6))
plt.plot(fpr, tpr, linestyle='--', marker='o', color='darkorange', lw=2, label=f'AUC = {auc_score:.4f}')
plt.plot([0, 1], [0, 1], color='navy', linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.0])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve')
plt.legend(loc="lower right")
plt.show()
