In [8]:
import os, random
from collections import Counter
import numpy as np
import torch
from torch.utils.data import DataLoader, WeightedRandomSampler
from torchvision import transforms, datasets
import matplotlib.pyplot as plt

In [9]:
SEED = 42
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)

train = r'E:\MEDICAL PROJECT\data\train_split'
test = r'E:\MEDICAL PROJECT\data\test'
val = r'E:\MEDICAL PROJECT\data\val_split'

mean = [0.485, 0.456, 0.406]
std  = [0.229, 0.224, 0.225]

train_transform = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.ToTensor(),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(8),
    transforms.ColorJitter(brightness=0.08, contrast=0.08),
    transforms.Grayscale(num_output_channels=3),
    transforms.Normalize(mean=mean, std=std)
])

val_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.Grayscale(num_output_channels=3),
    transforms.ToTensor(),
    transforms.Normalize(mean=mean, std=std),
])

train_ds = datasets.ImageFolder(train, transform=train_transform)
val_ds   = datasets.ImageFolder(val, transform=val_transform)
test_ds  = datasets.ImageFolder(test, transform=val_transform)

targets = np.array(train_ds.targets)
unique, counts = np.unique(targets, return_counts=True)
class_weights = 1.0 / counts
sample_weights = class_weights[targets]
sample_weights = torch.from_numpy(sample_weights).double()
sampler = WeightedRandomSampler(sample_weights, num_samples=len(sample_weights), replacement=True)

train_loader = DataLoader(train_ds,batch_size=32, sampler= sampler, num_workers= 4,pin_memory= True )
val_loader = DataLoader(val_ds,batch_size=32, shuffle= False, num_workers= 4,pin_memory= True )
test_loader = DataLoader(test_ds,batch_size=32, shuffle= False, num_workers= 4,pin_memory= True )

# Device info
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device:", device)
if device.type == "cuda":
    print("CUDA device:", torch.cuda.get_device_name(0))


Device: cuda
CUDA device: NVIDIA GeForce GTX 1650 Ti


In [6]:
import os
import time
import copy
import numpy as np
from tqdm import tqdm
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import roc_auc_score
from torchvision import models

In [None]:
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
OUT_DIR = "checkpoints"
os.makedirs(OUT_DIR, exist_ok=True)

HEAD_EPOCHS = 5
FT_EPOCHS = 12
BATCH_SIZE = None
LR_HEAD = 1e-3
LR_FT = 1e-4
WEIGHT_DECAY = 1e-4
PATIENCE = 3
MIN_LR = 1e-7

print("Device:", DEVICE)
print("Torch:", torch.__version__)

def build_model(num_classes = 2):
    model = models.resnet18(pretrained = True)
    in_features = model.fc.in_features
    model.fc = nn.Linear(in_features, num_classes)
    return model

model = build_model(2).to(DEVICE)

criterion = nn.CrossEntropyLoss()  ### LOSS

# Stage 1: freeze backbone except final fc
for name, param in model.named_parameters():
    param.requires_grad = False
for name, param in model.fc.named_parameters():
    param.requires_grad = True

opt_head = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=LR_HEAD, weight_decay=WEIGHT_DECAY)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(
    opt_head, mode='min', factor=0.5, patience=PATIENCE, min_lr=MIN_LR
)

def run_epoch(model, loader, optimizer=None, train=False, device=DEVICE, use_amp=False):
    if train:
        model.train()
    else:
        model.eval()
    losses = []
    all_labels = []
    all_probs = []
    scaler = torch.cuda.amp.GradScaler() if use_amp and device.type == 'cuda' else None

    loop = tqdm(loader, desc='Train' if train else 'EVAL', leave= False)
    for imgs, labels in loop:
        imgs = imgs.to(DEVICE)
        labels = labels.to(DEVICE)

        with torch.set_grad_enabled(train):
            if scaler:
                with torch.cuda.amp.autocast():
                    logits = model(imgs)
                    loss = criterion(logits, labels)
            else:
                logits = model(imgs)
                loss = criterion(logits, labels)

            probs = torch.softmax(logits, dim=1)[:,1].detach().cpu().numpy()
            all_probs.append(probs)
            all_labels.append(labels.detach().cpu().numpy())

            if train:
                if scaler:
                    scaler.scale(loss).backward()
                    scaler.step(optimizer)
                    scaler.update()
                    optimizer.zero_grad()
                else:
                    loss.backward()
                    optimizer.step()
                    optimizer.zero_grad()

        losses.append(loss.item())
        loop.set_postfix(loss = np.mean(
            
        ))
    all_probs = np.concatenate(all_probs)
    all_labels = np.concatenate(all_labels)

    try:
        auc = roc_auc_score(all_labels, all_probs)
    except ValueError:
        auc = float('nan')

    return np.mean(losses), auc


# TRAINING HELPER: runs stages and save best model by val AUC
best_model_wts = copy.deepcopy(model.state_dict())
best_val_auc = -1.0
history = {"train_loss":[], "train_auc":[], "val_loss":[], "val_auc":[]}

### STAGE - 1 HEAD TRAINING
for epoch in range(1,HEAD_EPOCHS+1):
    t0 = time.time()
    train_loss, train_auc = run_epoch(model, train_loader, optimizer=opt_head, train=True, use_amp=False)
    val_loss, val_auc = run_epoch(model, val_loader, train=False, use_amp=False)
    scheduler.step(val_loss)
    history["train_loss"].append(train_loss); history["train_auc"].append(train_auc)
    history["val_loss"].append(val_loss); history["val_auc"].append(val_auc)

    if not np.isnan(val_auc) and val_auc > best_val_auc:
        best_val_auc = val_auc
        best_model_wts = copy.deepcopy(model.state_dict())
        torch.save({"model_state": best_model_wts, "val_auc": best_val_auc, "epoch": epoch},
                    os.path.join(OUT_DIR, f"best_head_epoch{epoch:.0f}_valauc{val_auc:.4f}.pth"))

    print(f"Epoch {epoch}/{HEAD_EPOCHS}  train_loss={train_loss:.4f} train_auc={train_auc:.4f}  val_loss={val_loss:.4f} val_auc={val_auc:.4f}  time={(time.time()-t0):.1f}s")


## UNFREEZE BACKBONE AND FINE TUNEEEEEE

for param in model.parameters():
    param.requires_grad = True

opt_ft = optim.Adam(model.parameters(), lr=LR_FT, weight_decay=WEIGHT_DECAY)
scheduler_ft = optim.lr_scheduler.ReduceLROnPlateau(
    opt_ft, mode='min', factor=0.5, patience=PATIENCE, min_lr=MIN_LR
)

for epoch in range(1,FT_EPOCHS+1):
    t0 = time.time()
    train_loss, train_auc = run_epoch(model, train_loader, optimizer=opt_ft, train=True, use_amp=False)
    val_loss, val_auc = run_epoch(model, val_loader, train=False, use_amp=False)
    scheduler_ft.step(val_loss)

    if not np.isnan(val_auc) and val_auc > best_val_auc:
        best_val_auc = val_auc
        best_model_wts = copy.deepcopy(model.state_dict())
        torch.save({"model_state": best_model_wts, "val_auc": best_val_auc, "epoch": HEAD_EPOCHS + epoch},
                   os.path.join(OUT_DIR, f"best_ft_epoch{epoch:.0f}_valauc{val_auc:.4f}.pth"))

    print(f"FT Epoch {epoch}/{FT_EPOCHS}  train_loss={train_loss:.4f} train_auc={train_auc:.4f}  val_loss={val_loss:.4f} val_auc={val_auc:.4f}  time={(time.time()-t0):.1f}s")


# restore best weights and save final
model.load_state_dict(best_model_wts)
torch.save({"model_state": model.state_dict(), "val_auc": best_val_auc}, os.path.join(OUT_DIR, f"best_overall_valauc{best_val_auc:.4f}.pth"))
print("\nBEST VAL AUC:", best_val_auc)
print("Saved best model to", OUT_DIR)


Device: cuda
Torch: 2.8.0+cu129


                                                                    

Epoch 1/5  train_loss=0.3352 train_auc=0.9399  val_loss=0.3776 val_auc=0.9816  time=61.7s


                                                                    

Epoch 2/5  train_loss=0.2045 train_auc=0.9766  val_loss=0.1731 val_auc=0.9847  time=62.3s


                                                                    

Epoch 3/5  train_loss=0.1829 train_auc=0.9803  val_loss=0.1602 val_auc=0.9854  time=60.3s


                                                                    

Epoch 4/5  train_loss=0.1888 train_auc=0.9781  val_loss=0.1472 val_auc=0.9874  time=59.7s


                                                                    

Epoch 5/5  train_loss=0.1541 train_auc=0.9863  val_loss=0.2422 val_auc=0.9880  time=61.6s


                                                                     

FT Epoch 1/12  train_loss=0.0961 train_auc=0.9945  val_loss=0.0949 val_auc=0.9981  time=63.2s


                                                                     

FT Epoch 2/12  train_loss=0.0634 train_auc=0.9974  val_loss=0.0563 val_auc=0.9981  time=63.5s


                                                                     

FT Epoch 3/12  train_loss=0.0564 train_auc=0.9980  val_loss=0.0434 val_auc=0.9992  time=63.0s


                                                                     

FT Epoch 4/12  train_loss=0.0347 train_auc=0.9991  val_loss=0.0262 val_auc=0.9995  time=62.5s


                                                                     

FT Epoch 5/12  train_loss=0.0233 train_auc=0.9996  val_loss=0.0414 val_auc=0.9994  time=62.6s


                                                                     

FT Epoch 6/12  train_loss=0.0303 train_auc=0.9994  val_loss=0.0943 val_auc=0.9995  time=62.4s


                                                                     

FT Epoch 7/12  train_loss=0.0341 train_auc=0.9993  val_loss=0.0355 val_auc=0.9997  time=62.2s


                                                                     

FT Epoch 8/12  train_loss=0.0213 train_auc=0.9996  val_loss=0.0312 val_auc=0.9996  time=62.2s


                                                                      

FT Epoch 9/12  train_loss=0.0093 train_auc=1.0000  val_loss=0.0196 val_auc=0.9997  time=62.3s


                                                                      

FT Epoch 10/12  train_loss=0.0092 train_auc=0.9999  val_loss=0.0160 val_auc=0.9998  time=76.1s


                                                                      

FT Epoch 11/12  train_loss=0.0113 train_auc=0.9998  val_loss=0.0161 val_auc=0.9999  time=124.4s


                                                                      

FT Epoch 12/12  train_loss=0.0042 train_auc=1.0000  val_loss=0.0166 val_auc=0.9999  time=121.1s

BEST VAL AUC: 0.9998844487241214
Saved best model to checkpoints


In [14]:
# Make sure you have your criterion and DEVICE defined
ckpt = torch.load(
    r"E:\MEDICAL PROJECT\notebooks\checkpoints\best_overall_valauc0.9999.pth",
    weights_only=False
)

model.load_state_dict(ckpt["model_state"])  # pull the correct state dict


model.to(DEVICE)
model.eval()

test_loss, test_auc = run_epoch(model, test_loader, train=False, device=DEVICE, use_amp=False)  
print(f"TEST LOSS: {test_loss:.4f}  TEST AUC: {test_auc:.4f}")


                                                                 

TEST LOSS: 0.9909  TEST AUC: 0.9587




In [3]:
### ADDING GRAD CAM TO THIS FOR EXPLAINABILITY

import torch 
import torch.nn.functional as F
import cv2
import matplotlib.pyplot as plt

class GradCAM:
    def __init__(self, model, target_layer):
        self.model = model
        self.target_layer = target_layer

        #to store activations and gradients
        self.activation = None
        self.gradients = None

        ##HOOK THE FORWARD AND BACKWARD PASSES
        target_layer.register_forward_hook(self.save_activation)
        target_layer.register_backward_hook(self.save_gradients)

    def save_activation(self, module, input, output):
        self.activations = output.detach()

    def save_gradients(slf, module, grad_input, grad_output):
        self.gradients = grad_output.detach()

    def generate(self, input_tensor, class_idx = None):
        """
        input_tensor: single image tensor [1, C, H, W]
        class_idx: class index for which Grad-CAM is computed (default = predicted class)
        """

        #forward pass
        output = self.model(input_tensor)

        if class_idx is None:
            class_idx = output.argmax(dim=1).items()

        ##backward pass: get gradients wrt chosen class
        self.model.zero_grad()
        output[:class_idx].backward()

        ### Global average pool gradients -> weights
        weights = self.gradients.mean(dim=(2, 3), keepdim=True)

        ###weighted sum of activation
        cam = (weights * self.activations).sum(dim=1, keepdim=True)

        # # Apply ReLU
        cam = F.relu(cam)

        #Normalize
        cam = cam.squeeze().cpu().numpy()
        cam = (cam - cam.min())/ (cam.max() - cam.min())

        return cam

In [4]:
def show_cam_on_image(img, cam, alpha=0.5):
    """
    img: numpy image (H, W, 3), values in [0, 1]
    cam: heatmap (H, W), values in [0, 1]
    """
    heatmap = cv2.applyColorMap(np.uint8(255 * cam), cv2.COLORMAP_JET)
    heatmap = cv2.cvtColor(heatmap, cv2.COLOR_BGR2RGB)
    overlay = np.float32(heatmap) / 255
    result = overlay * alpha + img * (1 - alpha)
    result = np.clip(result, 0, 1)
    plt.imshow(result)
    plt.axis("off")
    plt.show()


In [5]:
import torch
import torch.nn as nn
from torchvision import models

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# --- 1. Rebuild the model architecture ---
def build_model(num_classes=2):
    model = models.resnet18(pretrained=True)
    in_features = model.fc.in_features
    model.fc = nn.Linear(in_features, num_classes)
    return model

model = build_model(num_classes=2).to(DEVICE)

# --- 2. Load checkpoint ---
ckpt_path = r"E:\MEDICAL PROJECT\models\best_overall_valauc0.9999.pth"  # update if needed
ckpt = torch.load(ckpt_path, map_location=DEVICE, weights_only=False)

# your weights are stored under the "model_state" key
model.load_state_dict(ckpt["model_state"])
print("✅ Model loaded! Best Val AUC =", ckpt.get("val_auc", "unknown"))

model.load_state_dict(ckpt["model_state"])
model.eval()

print("✅ Model loaded! Best Val AUC =", ckpt.get("val_auc", "unknown"))




✅ Model loaded! Best Val AUC = 0.9998844487241214
✅ Model loaded! Best Val AUC = 0.9998844487241214


In [10]:
# Make sure model is loaded & in eval mode
model.eval()

# Init Grad-CAM with last conv block of ResNet18
gradcam = GradCAM(model, model.layer4[-1])

# ---- Pick one batch from validation/test ----
images, labels = next(iter(val_loader))   # or test_loader
img_tensor = images[0].unsqueeze(0).to(DEVICE)  # single image
label = labels[0].item()

# Save original image (for overlay)
# Undo normalization for visualization
mean = np.array([0.485, 0.456, 0.406])
std  = np.array([0.229, 0.224, 0.225])
img_np = images[0].permute(1, 2, 0).numpy() * std + mean
img_np = np.clip(img_np, 0, 1)

# ---- Run Grad-CAM ----
cam = gradcam.generate(img_tensor)

# ---- Show result ----
plt.figure(figsize=(10,4))

plt.subplot(1,2,1)
plt.imshow(img_np)
plt.title(f"Original (Label={label})")
plt.axis("off")

plt.subplot(1,2,2)
show_cam_on_image(img_np, cam)
plt.title("Grad-CAM Heatmap")
plt.axis("off")

plt.show()


  self._maybe_warn_non_full_backward_hook(args, result, grad_fn)


AttributeError: 'Tensor' object has no attribute 'items'