In [None]:
import os
import sys
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

In [None]:
# Add your modified HarDNet repository path
sys.path.append('./Pytorch-HarDNet_new_SE_resi')
from hardnet import HarDNet

NUM_CLASSES = 3

def get_model():
    model = HarDNet(arch=85, pretrained=False)
    model.base[-1][3] = nn.Linear(model.base[-1][3].in_features, NUM_CLASSES)
    model.to(device)
    return model

model = get_model()

In [None]:
MODEL_PATH = "/app/.pth"

model.load_state_dict(torch.load(MODEL_PATH, map_location=device))
model.eval()

print(" Model weights loaded successfully.")

In [None]:
DATA_DIR = "/app//"
TEST_DIR = os.path.join(DATA_DIR, "test")

test_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])

test_dataset = datasets.ImageFolder(TEST_DIR, transform=test_transform)

test_loader = DataLoader(
    test_dataset,
    batch_size=16,
    shuffle=False,
    num_workers=4,
    pin_memory=True
)

class_names = test_dataset.classes

print("Test samples:", len(test_dataset))
print("Classes:", class_names)

In [None]:
import torch.nn.functional as F
import pandas as pd
import numpy as np

def evaluate_and_save(model, loader, class_names,
                      save_path="/app/.csv",
                      overconf_threshold=0.9):

    model.eval()

    all_probs = []
    all_preds = []
    all_labels = []
    all_paths = []

    idx_counter = 0

    with torch.no_grad():
        for inputs, labels in loader:
            bsz = inputs.size(0)

            inputs = inputs.to(device)
            labels = labels.to(device)

            logits = model(inputs)
            probs = F.softmax(logits, dim=1)

            conf, preds = torch.max(probs, dim=1)

            all_probs.append(probs.cpu().numpy())
            all_preds.append(preds.cpu().numpy())
            all_labels.append(labels.cpu().numpy())

            batch_paths = [loader.dataset.samples[i][0]
                           for i in range(idx_counter, idx_counter + bsz)]
            all_paths.extend(batch_paths)
            idx_counter += bsz

    all_probs = np.concatenate(all_probs)
    all_preds = np.concatenate(all_preds)
    all_labels = np.concatenate(all_labels)

    confidences = all_probs.max(axis=1)
    correctness = (all_preds == all_labels)

    # Overconfidence condition
    overconfident_errors = (~correctness) & (confidences >= overconf_threshold)

    # Create dataframe
    df = pd.DataFrame({
        "path": all_paths,
        "y_true": all_labels,
        "y_true_name": [class_names[i] for i in all_labels],
        "y_pred": all_preds,
        "y_pred_name": [class_names[i] for i in all_preds],
        "confidence": confidences,
        "correct": correctness,
        "overconfident_error": overconfident_errors
    })

    # Add class probabilities
    for k, cname in enumerate(class_names):
        df[f"prob_{cname}"] = all_probs[:, k]

    df.to_csv(save_path, index=False)
    print(f" CSV saved to: {save_path}")

    # Print summary statistics
    total_samples = len(df)
    total_errors = (~correctness).sum()
    total_overconf_errors = overconfident_errors.sum()

    print("\n Test Confidence Statistics")
    print("--------------------------------------------------")
    print(f"Total test samples: {total_samples}")
    print(f"Total misclassifications: {total_errors}")
    print(f"Overconfident misclassifications (≥ {overconf_threshold}): {total_overconf_errors}")

    if total_errors > 0:
        print(f"Percentage of errors that are overconfident: "
              f"{(total_overconf_errors / total_errors) * 100:.2f}%")

    print(f"Percentage of total test samples that are overconfident errors: "
          f"{(total_overconf_errors / total_samples) * 100:.2f}%")

    return df


df_test = evaluate_and_save(model, test_loader, class_names)

In [None]:
print("\n Overconfident Errors by True Class")
print("--------------------------------------------------")

for cname in class_names:
    class_mask = (df_test["y_true_name"] == cname)
    overconf_class = df_test[class_mask & (df_test["overconfident_error"])]
    print(f"{cname}: {len(overconf_class)}")

In [None]:
for t in [0.8, 0.85, 0.9, 0.95]:
    count = ((~df_test["correct"]) & (df_test["confidence"] >= t)).sum()
    print(f"Overconfident errors (≥ {t}): {count}")