In [5]:
%env CUBLAS_WORKSPACE_CONFIG=:4096:8

env: CUBLAS_WORKSPACE_CONFIG=:4096:8


In [6]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, random_split, DataLoader
from torchvision import models, transforms
from PIL import Image
import numpy as np
from tqdm import tqdm
import pandas as pd
import os
from sklearn.model_selection import train_test_split
from torch.optim.lr_scheduler import ReduceLROnPlateau
from torchinfo import summary
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
import glob
from torch.utils.data import DataLoader
import os
import cv2
import torch
from torch.utils.data import Dataset, DataLoader,Subset
from tqdm import tqdm
import json
import random
import math
import re
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score,confusion_matrix
import time
import matplotlib.pyplot as plt
from sklearn.metrics import roc_auc_score, roc_curve, auc

In [7]:
def set_seed(seed: int):
    """Seed everything for reproducibility."""
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    # enforce deterministic algorithms (may slow things down)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    # torch 2.x: fully deterministic
    if hasattr(torch, "use_deterministic_algorithms"):
        torch.use_deterministic_algorithms(True)
# os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8"
# choose your seed
seed_list = [3,5,11,1344,2506]
SEED = 3
set_seed(SEED)

In [8]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

# Load Test Data

In [9]:
test_df = pd.read_csv('test_df_encoded_80_20.csv')
test_df

Unnamed: 0,index,image_path,label
0,611,C:\Users\Anwesh\Desktop\Anwesh\Oralpathology D...,0
1,2325,C:\Users\Anwesh\Desktop\Anwesh\Oralpathology D...,1
2,99,C:\Users\Anwesh\Desktop\Anwesh\Oralpathology D...,0
3,144,C:\Users\Anwesh\Desktop\Anwesh\Oralpathology D...,0
4,612,C:\Users\Anwesh\Desktop\Anwesh\Oralpathology D...,0
...,...,...,...
489,1216,C:\Users\Anwesh\Desktop\Anwesh\Oralpathology D...,0
490,2165,C:\Users\Anwesh\Desktop\Anwesh\Oralpathology D...,1
491,398,C:\Users\Anwesh\Desktop\Anwesh\Oralpathology D...,0
492,668,C:\Users\Anwesh\Desktop\Anwesh\Oralpathology D...,0


In [10]:
total_abnormal_count = (test_df['label'] == 1).sum() 
total_normal_count = (test_df['label'] == 0).sum()
total_samples = len(test_df)

# Print
print("Total abnormal images:", total_abnormal_count)
print("Total normal images:", total_normal_count)
print("Total samples:", total_samples)

Total abnormal images: 64
Total normal images: 430
Total samples: 494


In [11]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize to fit most CNNs
    transforms.ToTensor(),          # Convert to tensor
    transforms.Normalize(mean=[0.485, 0.456, 0.406],  # ImageNet mean/std
                         std=[0.229, 0.224, 0.225])
])

class ImageDataset(Dataset):
    def __init__(self, df, transform=None):
        self.df = df.reset_index(drop=True)
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        img_path = self.df.loc[idx, 'image_path']
        label = self.df.loc[idx, 'label']

        image = Image.open(img_path).convert("RGB")
        if self.transform:
            image = self.transform(image)

        return image, torch.tensor(label, dtype=torch.long)


test_dataset = ImageDataset(test_df, transform=transform)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=0)

# Test Model

In [12]:
def get_model_by_name(name: str, num_classes=2, device='cuda'):
    base_model = models.resnet18
    model = base_model(weights=None)
    
    # --- Modifications ---
    in_features = model.fc.in_features
    dropout_match = re.search(r'dp\(([\d.]+)\)', name)
    dropout_p = float(dropout_match.group(1)) if dropout_match else None
    
    if "mod2" in name:
        # Mod 2: Two-layer MLP with dropout
        model.fc = nn.Sequential(
            nn.Linear(in_features, 256),
            nn.ReLU(inplace=True),
            nn.Dropout(p=dropout_p if dropout_p is not None else 0.5),
            nn.Linear(256, num_classes)
        )
        for param in model.parameters():
            param.requires_grad = False
        for param in model.layer4[:].parameters():
            param.requires_grad = True
        for param in model.fc.parameters():
            param.requires_grad = True
            
    elif "mod1" in name:
        # Mod 1: Dropout + final layer
        model.fc = nn.Sequential(
            nn.Linear(in_features, in_features),
            nn.ReLU(inplace=True),
            nn.Dropout(p=dropout_p if dropout_p is not None else 0.5),
            nn.Linear(in_features, num_classes)
                                )
    else:
        # Default
        model.fc = nn.Linear(in_features, num_classes)
        for param in model.parameters():
            param.requires_grad = False
        for param in model.layer4[:].parameters():
            param.requires_grad = True
        for param in model.fc.parameters():
            param.requires_grad = True

    
    return model.to(device)

In [13]:
import os
import torch
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score,
    f1_score, confusion_matrix, roc_auc_score, roc_curve
)
from tqdm import tqdm


def evaluate_model_across_folds(model_name_prefix,
                                get_model_fn = get_model_by_name,
                                test_loader = test_loader,
                                device='cuda',
                                folds=range(1, 6)):

    model_dir = os.path.join(os.getcwd(), model_name_prefix)
    os.makedirs(model_dir, exist_ok=True)
    results = []

    print("🔍 Testing:", model_name_prefix)
    print("📂 Saving results in:", model_dir)

    def test_epoch(model, loader, criterion, device):
        model.eval()
        total_loss, total_correct, total_samples = 0.0, 0, 0
        all_preds, all_labels, all_probs = [], [], []
        inference_times = []

        with torch.no_grad():
            for images, labels in tqdm(loader, desc='Test', leave=False):
                images, labels = images.to(device), labels.to(device).long()
                start_time = time.time()
                outputs = model(images)
                end_time = time.time()

                loss = criterion(outputs, labels)
                preds = outputs.argmax(dim=1)
                probs = torch.softmax(outputs, dim=1)[:, 1]

                all_preds.extend(preds.cpu().numpy())
                all_probs.extend(probs.cpu().numpy())
                all_labels.extend(labels.cpu().numpy())

                total_loss += loss.item() * labels.size(0)
                total_correct += (preds == labels).sum().item()
                total_samples += labels.size(0)

                inference_times.append(end_time - start_time)

        avg_loss = total_loss / total_samples
        avg_acc = total_correct / total_samples
        avg_inference_time = np.sum(inference_times) / total_samples  # time per image

        return avg_loss, avg_acc, all_labels, all_preds, all_probs, avg_inference_time

    criterion = torch.nn.CrossEntropyLoss()


    model_path_final = os.path.join(model_dir, f"{model_name_prefix}.pth")

    model = get_model_fn(model_name_prefix)
    model.load_state_dict(torch.load(model_path_final, map_location=device))
    model.to(device)

    test_loss, test_acc, y_true, y_pred, y_scores, avg_infer_time = test_epoch(
        model, test_loader, criterion, device
    )

    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred, zero_division=0)
    sensitivity = recall_score(y_true, y_pred, zero_division=0)
    f1 = f1_score(y_true, y_pred, zero_division=0)
    roc_auc = roc_auc_score(y_true, y_scores)
    cm = confusion_matrix(y_true, y_pred)
    tn, fp, fn, tp = cm.ravel()
    specificity = tn / (tn + fp) if (tn + fp) != 0 else 0.0

    results.append({
        "Loss": test_loss,
        "Accuracy (%)": accuracy * 100,
        "Precision (%)": precision * 100,
        "Sensitivity (%)": sensitivity * 100,
        "Specificity (%)": specificity * 100,
        "F1 Score (%)": f1 * 100,
        "ROC AUC": roc_auc,
        "Avg Inference Time (s)": avg_infer_time
    })

    # Save Confusion Matrix
    cm_path = os.path.join(model_dir, f"{model_name_prefix}_confusion_matrix.png")
    plt.figure(figsize=(6, 5))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
                xticklabels=["Normal", "Lesion"], yticklabels=["Normal", "Lesion"])
    plt.xlabel("Predicted")
    plt.ylabel("True")
    plt.title(f"Confusion Matrix")
    plt.tight_layout()
    plt.savefig(cm_path, dpi=300)
    plt.close()

    # Save ROC Curve
    fpr, tpr, _ = roc_curve(y_true, y_scores)
    roc_path = os.path.join(model_dir, f"{model_name_prefix}_roc_curve.png")
    plt.figure(figsize=(7, 6))
    plt.plot(fpr, tpr, color='darkorange', lw=2, label=f'AUC = {roc_auc:.4f}')
    plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
    plt.xlim([-0.02, 1.0])
    plt.ylim([0.0, 1.02])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title(f'ROC Curve')
    plt.legend(loc="lower right")
    plt.grid(alpha=0.3)
    plt.tight_layout()
    plt.savefig(roc_path, dpi=300)
    plt.close()

    # Save Results CSV with Mean ± Std
    df = pd.DataFrame(results)

    # avg_row = {"Fold": "Mean ± Std"}
    # for col in df.columns[1:]:
    #     if df[col].dtype in [np.float64, float, np.int64]:
    #         mean = df[col].mean()
    #         std = df[col].std()
    #         avg_row[col] = f"{mean:.4f} ± {std:.4f}" if "Time" in col else f"{mean:.2f} ± {std:.2f}"

    # df = pd.concat([df, pd.DataFrame([avg_row])], ignore_index=True)

    csv_path = os.path.join(model_dir, f"{model_name_prefix}_results.csv")
    df.to_csv(csv_path, index=False)

    print(f"\n✅ Evaluation complete. All results saved to: {csv_path}")
    return csv_path, df

## Model Config

In [16]:
# model_list = [
    # "ResNet18_scratch",
    # "ResNet18_weighted_scratch",
    # "ResNet18",
    # "ResNet18_weighted",
    # "ResNet18_mod1",
    # "ResNet18_mod1_weighted",
    # "ResNet18_mod2",
    # "ResNet18_mod2_weighted",
    # "ResNet18_mod1_weighted_dp(0.7)",
    # "ResNet18_mod1_weighted_dp(0.2)",
    # 'ResNet18_mod1_weighted_test'
    # "ResNet18_mod2_weighted_aug",
    # "ResNet18_mod1_weighted_aug",
    # "ResNet18_mod1_weighted_dp(0)",
#     "ResNet50_mod1",
#     "ResNet50_mod1_weighted",
#     "ResNet50_mod1_weighted_dp(0.2)",
#     "ResNet34_mod1",
#     "ResNet34_mod1_weighted",
#     "ResNet34_mod1_weighted_dp(0.2)",
    
# ]

model_list = [
    # "ResNet18_scratch",
    # "ResNet18_weighted_scratch",
    "ResNet_18_clf",
    "ResNet_18_clf_weighted",
    # "ResNet18_mod1",
    # "ResNet18_mod1_weighted",
    # "ResNet18_mod1_test",
    # "ResNet18_mod1_weighted_test",
    # "ResNet18_mod2",
    # "ResNet18_mod2_weighted",
    # "ResNet18_mod1_weighted_dp(0.2)",
    # "ResNet18_mod1_weighted_dp(0.4)",
    # "ResNet18_mod1_weighted_dp(0.6)"
]

In [17]:
csv_paths = []
for model_name in model_list:
    path,df = evaluate_model_across_folds(model_name)
    csv_paths.append(path)

🔍 Testing: ResNet_18_clf
📂 Saving results in: C:\Users\Anwesh\Desktop\Anwesh\Oralpathology Dataset Technical Validation\Model\Final Code No Val\ResNet_18_clf


                                                                                                                                                                                                                


✅ Evaluation complete. All results saved to: C:\Users\Anwesh\Desktop\Anwesh\Oralpathology Dataset Technical Validation\Model\Final Code No Val\ResNet_18_clf\ResNet_18_clf_results.csv
🔍 Testing: ResNet_18_clf_weighted
📂 Saving results in: C:\Users\Anwesh\Desktop\Anwesh\Oralpathology Dataset Technical Validation\Model\Final Code No Val\ResNet_18_clf_weighted


                                                                                                                                                                                                                


✅ Evaluation complete. All results saved to: C:\Users\Anwesh\Desktop\Anwesh\Oralpathology Dataset Technical Validation\Model\Final Code No Val\ResNet_18_clf_weighted\ResNet_18_clf_weighted_results.csv


In [None]:
df