this is the first stage, we train 20 first epock

In [6]:

# ========================== SETUP ==========================
# !pip install -q pandas torch torchvision scikit-learn tqdm

import os
from pathlib import Path
import numpy as np
import pandas as pd
from PIL import Image, ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models
from sklearn.metrics import roc_auc_score, f1_score
from tqdm import tqdm

torch.backends.cudnn.benchmark = True
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print("Device:", DEVICE)

# ========================== 1) PATHS ==========================
CSV_PATH = Path("./chest-xray/Data_Entry_2017.csv")    # <- change if needed
BASE     = Path("./chest-xray")                        # contains images_001, images_002, ...
assert CSV_PATH.exists(), f"CSV not found: {CSV_PATH}"
assert BASE.exists(), f"Base images folder not found: {BASE}"

# ========================== 2) LOAD CSV ==========================
df = pd.read_csv(CSV_PATH)
print("df shape:", df.shape)

# ========================== 3) LABELS + MULTI-HOT ==========================
LABELS = [
    'Atelectasis','Cardiomegaly','Effusion','Infiltration','Mass','Nodule',
    'Pneumonia','Pneumothorax','Consolidation','Edema','Emphysema',
    'Fibrosis','Pleural_Thickening','Hernia'
]

def to_multi_hot(lbl_str: str):
    y = np.zeros(len(LABELS), dtype=np.float32)
    if isinstance(lbl_str, str) and lbl_str != "No Finding":
        for t in lbl_str.split("|"):
            if t in LABELS:
                y[LABELS.index(t)] = 1.0
    return y

Y = np.stack([to_multi_hot(s) for s in df["Finding Labels"].astype(str)], axis=0)
print("Y shape:", Y.shape)
print("Positives per class:", dict(zip(LABELS, Y.sum(axis=0).astype(int))))

# ========================== 4) PATIENT-LEVEL SPLIT (80/10/10) ==========================
df["Patient ID"] = df["Patient ID"].astype(str)
bucket = df["Patient ID"].apply(lambda x: hash(x) % 10)  # 0..9
train_df = df[bucket < 8].reset_index(drop=True)
val_df   = df[bucket == 8].reset_index(drop=True)
test_df  = df[bucket == 9].reset_index(drop=True)
print("Split sizes -> Train:", len(train_df), "Val:", len(val_df), "Test:", len(test_df))

# ========================== 5) INDEX FILES ACROSS SHARDS ==========================
# Your layout: BASE / images_XXX / images / *.png
name_to_path = {}
for p in BASE.glob("images_*/images/*.png"):
    name_to_path[p.name] = str(p)

print("Indexed files:", len(name_to_path))
first20 = df["Image Index"].head(20).tolist()
missing20 = [n for n in first20 if n not in name_to_path]
print("Missing among first 20:", len(missing20))
if missing20:
    print("Example missing:", missing20[:5])

# ========================== 6) DATASET / DATALOADERS ==========================
IMG_SIZE = 384
train_tfms = transforms.Compose([
    transforms.Resize(int(IMG_SIZE*1.1)),
    transforms.RandomResizedCrop(IMG_SIZE, scale=(0.8,1.0)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225]),
])
val_tfms = transforms.Compose([
    transforms.Resize(IMG_SIZE),
    transforms.CenterCrop(IMG_SIZE),
    transforms.ToTensor(),
    transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225]),
])

def row_to_multi_hot_tensor(row):
    return torch.tensor(to_multi_hot(row["Finding Labels"]), dtype=torch.float32)

class ChestXray(Dataset):
    def __init__(self, df, index_map, tfm):
        self.df = df.reset_index(drop=True)
        self.idx = index_map
        self.tfm = tfm
    def __len__(self):
        return len(self.df)
    def __getitem__(self, i):
        r = self.df.iloc[i]
        fname = r["Image Index"]
        img_path = self.idx.get(fname)
        if img_path is None:
            raise FileNotFoundError(f"Image not indexed: {fname}")
        img = Image.open(img_path).convert("RGB")
        x = self.tfm(img)
        y = row_to_multi_hot_tensor(r)
        return x, y

train_ds = ChestXray(train_df, name_to_path, train_tfms)
val_ds   = ChestXray(val_df,   name_to_path, val_tfms)
test_ds  = ChestXray(test_df,  name_to_path, val_tfms)

BATCH_SIZE  = 32
NUM_WORKERS = 2  # set 0 if multiprocessing issues

train_dl = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True,  num_workers=NUM_WORKERS, pin_memory=True)
val_dl   = DataLoader(val_ds,   batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS, pin_memory=True)
test_dl  = DataLoader(test_ds,  batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS, pin_memory=True)

print("DL sizes ->", len(train_ds), len(val_ds), len(test_ds))

# ========================== 7) MODEL ==========================
model = models.densenet121(weights=models.DenseNet121_Weights.IMAGENET1K_V1)
in_features = model.classifier.in_features
model.classifier = nn.Linear(in_features, len(LABELS))
model = model.to(DEVICE)

# ========================== 8) LOSS (pos_weight from TRAIN) ==========================
train_multi = np.vstack(train_df["Finding Labels"].astype(str).map(
    lambda s: np.array(to_multi_hot(s), dtype=np.float32)
).values)
pos = train_multi.sum(axis=0)    # per-class positives in TRAIN
N = len(train_df)
pos = np.clip(pos, 1.0, None)    # avoid div-by-zero
pos_weight = torch.tensor((N - pos) / pos, dtype=torch.float32, device=DEVICE)

criterion = nn.BCEWithLogitsLoss(pos_weight=pos_weight)
optimizer = torch.optim.AdamW(model.parameters(), lr=3e-4, weight_decay=1e-4)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=10)
scaler = torch.cuda.amp.GradScaler(enabled=(DEVICE == "cuda"))

# ========================== 9) EVALUATION ==========================
def evaluate(model, loader, threshold=0.5):
    model.eval()
    ys, ps = [], []
    with torch.no_grad():
        for xb, yb in loader:
            xb, yb = xb.to(DEVICE), yb.to(DEVICE)
            p = torch.sigmoid(model(xb))
            ys.append(yb.cpu()); ps.append(p.cpu())
    ys = torch.cat(ys, 0).numpy()
    ps = torch.cat(ps, 0).numpy()

    # AUROC per class
    aurocs = []
    for c in range(len(LABELS)):
        y_c, p_c = ys[:, c], ps[:, c]
        try:
            aurocs.append(roc_auc_score(y_c, p_c))
        except ValueError:
            aurocs.append(np.nan)
    mean_auc = float(np.nanmean(aurocs))

    # F1 at fixed threshold (reference only)
    preds = (ps >= threshold).astype("int32")
    micro_f1 = f1_score(ys, preds, average="micro", zero_division=0)
    macro_f1 = f1_score(ys, preds, average="macro", zero_division=0)
    return mean_auc, dict(zip(LABELS, aurocs)), micro_f1, macro_f1

# ========================== 10) TRAIN with CHECKPOINTS ==========================
best_path = "densenet121_best.pt"
last_path = "densenet121_last.pt"
RESUME    = True
EPOCHS    = 20

def make_ckpt(epoch, best_auc):
    return {
        "epoch": epoch,
        "best_auc": best_auc,
        "model_state": model.state_dict(),
        "optimizer_state": optimizer.state_dict(),
        "scheduler_state": scheduler.state_dict(),
        "scaler_state": scaler.state_dict(),
    }

def save_last(epoch, best_auc):
    torch.save(make_ckpt(epoch, best_auc), last_path)
    print(f" Saved last: {last_path} (epoch={epoch})")

def save_best(epoch, best_auc):
    torch.save(make_ckpt(epoch, best_auc), best_path)
    print(f" Saved BEST: {best_path} (epoch={epoch}, best_auc={best_auc:.4f})")

# Resume
start_epoch = 1
best_auc = -1.0
if RESUME and os.path.exists(last_path):
    ckpt = torch.load(last_path, map_location=DEVICE)
    model.load_state_dict(ckpt["model_state"])
    optimizer.load_state_dict(ckpt["optimizer_state"])
    scheduler.load_state_dict(ckpt["scheduler_state"])
    scaler.load_state_dict(ckpt["scaler_state"])
    start_epoch = ckpt["epoch"] + 1
    best_auc    = ckpt.get("best_auc", best_auc)
    print(f"   Resuming from epoch {start_epoch} (best_auc={best_auc:.4f})")
else:
    print(" Starting fresh training")

# Loop
for ep in range(start_epoch, EPOCHS + 1):
    model.train()
    running_loss = 0.0
    for xb, yb in tqdm(train_dl, desc=f"Epoch {ep}/{EPOCHS}"):
        xb, yb = xb.to(DEVICE, non_blocking=True), yb.to(DEVICE, non_blocking=True)
        optimizer.zero_grad(set_to_none=True)
        with torch.cuda.amp.autocast(enabled=(DEVICE == "cuda")):
            logits = model(xb)
            loss = criterion(logits, yb)
        scaler.scale(loss).backward()
        scaler.step(optimizer); scaler.update()
        running_loss += loss.item() * xb.size(0)

    scheduler.step()
    train_loss = running_loss / len(train_ds)

    val_mean_auc, _, val_micro_f1, val_macro_f1 = evaluate(model, val_dl)
    print(f"[Val] mean AUROC={val_mean_auc:.4f} | microF1={val_micro_f1:.4f} | macroF1={val_macro_f1:.4f} | train_loss={train_loss:.4f}")

    # Always save "last"
    save_last(ep, best_auc)

    # Save "best" if improved
    if val_mean_auc > best_auc:
        best_auc = val_mean_auc
        save_best(ep, best_auc)

# ========================== 11) TEST ==========================
if Path(best_path).exists():
    ckpt = torch.load(best_path, map_location=DEVICE)
    model.load_state_dict(ckpt["model_state"])
    print(f" Loaded BEST from epoch {ckpt['epoch']} (best_auc={ckpt['best_auc']:.4f})")
else:
    print("⚠ BEST checkpoint not found, using last model weights in memory.")

test_mean_auc, test_per_cls, test_micro_f1, test_macro_f1 = evaluate(model, test_dl)
print(f"[TEST] mean AUROC={test_mean_auc:.4f} | microF1={test_micro_f1:.4f} | macroF1={test_macro_f1:.4f}")
print("Per-class AUROC:", {k: (None if np.isnan(v) else float(v)) for k, v in test_per_cls.items()})

# ========================== 12) (Optional) SINGLE-IMAGE PREDICT ==========================
@torch.no_grad()
def predict_image(img_path, threshold=0.5, top_k=5):
    model.eval()
    img = Image.open(img_path).convert("RGB")
    x = val_tfms(img).unsqueeze(0).to(DEVICE)
    probs = torch.sigmoid(model(x)).squeeze(0).cpu().numpy()
    pred_labels = [LABELS[i] for i, p in enumerate(probs) if p >= threshold]
    top_idx = np.argsort(-probs)[:top_k]
    top = [(LABELS[i], float(probs[i])) for i in top_idx]
    return dict(zip(LABELS, map(float, probs))), pred_labels, top

# Example:
# img_example = "/chest-xray/images_001/images/00001335_006.png"
# probs, preds, top5 = predict_image(img_example, threshold=0.5, top_k=5)
# print("Predicted (>=0.5):", preds)
# print("Top-5:", top5)

Device: cuda
df shape: (112120, 12)
Y shape: (112120, 14)
Positives per class: {'Atelectasis': 11559, 'Cardiomegaly': 2776, 'Effusion': 13317, 'Infiltration': 19894, 'Mass': 5782, 'Nodule': 6331, 'Pneumonia': 1431, 'Pneumothorax': 5302, 'Consolidation': 4667, 'Edema': 2303, 'Emphysema': 2516, 'Fibrosis': 1686, 'Pleural_Thickening': 3385, 'Hernia': 227}
Split sizes -> Train: 89618 Val: 11254 Test: 11248
Indexed files: 112120
Missing among first 20: 0
DL sizes -> 89618 11254 11248
 Starting fresh training


Epoch 1/20: 100%|██████████| 2801/2801 [18:29<00:00,  2.52it/s]


[Val] mean AUROC=0.7545 | microF1=0.1608 | macroF1=0.1584 | train_loss=1.1880
 Saved last: densenet121_last.pt (epoch=1)
 Saved BEST: densenet121_best.pt (epoch=1, best_auc=0.7545)


Epoch 2/20: 100%|██████████| 2801/2801 [18:25<00:00,  2.53it/s]


[Val] mean AUROC=0.7721 | microF1=0.2022 | macroF1=0.1865 | train_loss=1.1045
 Saved last: densenet121_last.pt (epoch=2)
 Saved BEST: densenet121_best.pt (epoch=2, best_auc=0.7721)


Epoch 3/20: 100%|██████████| 2801/2801 [18:23<00:00,  2.54it/s]


[Val] mean AUROC=0.7952 | microF1=0.1951 | macroF1=0.1844 | train_loss=1.0546
 Saved last: densenet121_last.pt (epoch=3)
 Saved BEST: densenet121_best.pt (epoch=3, best_auc=0.7952)


Epoch 4/20: 100%|██████████| 2801/2801 [18:22<00:00,  2.54it/s]


[Val] mean AUROC=0.8222 | microF1=0.2222 | macroF1=0.2090 | train_loss=1.0119
 Saved last: densenet121_last.pt (epoch=4)
 Saved BEST: densenet121_best.pt (epoch=4, best_auc=0.8222)


Epoch 5/20: 100%|██████████| 2801/2801 [18:18<00:00,  2.55it/s]


[Val] mean AUROC=0.8234 | microF1=0.2365 | macroF1=0.2163 | train_loss=0.9701
 Saved last: densenet121_last.pt (epoch=5)
 Saved BEST: densenet121_best.pt (epoch=5, best_auc=0.8234)


Epoch 6/20: 100%|██████████| 2801/2801 [19:28<00:00,  2.40it/s]


[Val] mean AUROC=0.8335 | microF1=0.2389 | macroF1=0.2078 | train_loss=0.9354
 Saved last: densenet121_last.pt (epoch=6)
 Saved BEST: densenet121_best.pt (epoch=6, best_auc=0.8335)


Epoch 7/20: 100%|██████████| 2801/2801 [20:29<00:00,  2.28it/s]


[Val] mean AUROC=0.8391 | microF1=0.2633 | macroF1=0.2360 | train_loss=0.8987
 Saved last: densenet121_last.pt (epoch=7)
 Saved BEST: densenet121_best.pt (epoch=7, best_auc=0.8391)


Epoch 8/20: 100%|██████████| 2801/2801 [20:26<00:00,  2.28it/s]


[Val] mean AUROC=0.8439 | microF1=0.2698 | macroF1=0.2383 | train_loss=0.8618
 Saved last: densenet121_last.pt (epoch=8)
 Saved BEST: densenet121_best.pt (epoch=8, best_auc=0.8439)


Epoch 9/20: 100%|██████████| 2801/2801 [19:47<00:00,  2.36it/s]


[Val] mean AUROC=0.8462 | microF1=0.2718 | macroF1=0.2405 | train_loss=0.8357
 Saved last: densenet121_last.pt (epoch=9)
 Saved BEST: densenet121_best.pt (epoch=9, best_auc=0.8462)


Epoch 10/20: 100%|██████████| 2801/2801 [18:29<00:00,  2.52it/s]


[Val] mean AUROC=0.8466 | microF1=0.2741 | macroF1=0.2429 | train_loss=0.8218
 Saved last: densenet121_last.pt (epoch=10)
 Saved BEST: densenet121_best.pt (epoch=10, best_auc=0.8466)


Epoch 11/20: 100%|██████████| 2801/2801 [18:38<00:00,  2.50it/s]


[Val] mean AUROC=0.8465 | microF1=0.2738 | macroF1=0.2417 | train_loss=0.8131
 Saved last: densenet121_last.pt (epoch=11)


Epoch 12/20: 100%|██████████| 2801/2801 [18:34<00:00,  2.51it/s]


[Val] mean AUROC=0.8479 | microF1=0.2688 | macroF1=0.2396 | train_loss=0.8137
 Saved last: densenet121_last.pt (epoch=12)
 Saved BEST: densenet121_best.pt (epoch=12, best_auc=0.8479)


Epoch 13/20: 100%|██████████| 2801/2801 [18:31<00:00,  2.52it/s]


[Val] mean AUROC=0.8468 | microF1=0.2701 | macroF1=0.2417 | train_loss=0.8159
 Saved last: densenet121_last.pt (epoch=13)


Epoch 14/20:  88%|████████▊ | 2464/2801 [16:18<02:13,  2.52it/s]


KeyboardInterrupt: 

the model  start, lear well, but after some epock the larning whill more stabil, the reasen should be that the model is overfitting and not learn the data, 
to find out the reasn that the  model is not learning we did stop the model, we want to continue the with the model to incre the accurant.  some of the acurrancy is stable it can be noise data, or some lables that re in the traing sett that we need to fix.

Second training

In [None]:

# ========================== SETUP ==========================
# !pip install -q pandas torch torchvision scikit-learn tqdm

import os
from pathlib import Path
import numpy as np
import pandas as pd
from PIL import Image, ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models
from sklearn.metrics import roc_auc_score, f1_score
from tqdm import tqdm

torch.backends.cudnn.benchmark = True
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print("Device:", DEVICE)

# ========================== 1) PATHS ==========================
CSV_PATH = Path("./chest-xray/Data_Entry_2017.csv")    # <- change if needed
BASE     = Path("./chest-xray")                        # contains images_001, images_002, ...
assert CSV_PATH.exists(), f"CSV not found: {CSV_PATH}"
assert BASE.exists(), f"Base images folder not found: {BASE}"

# ========================== 2) LOAD CSV ==========================
df = pd.read_csv(CSV_PATH)
print("df shape:", df.shape)

# ========================== 3) LABELS + MULTI-HOT ==========================
LABELS = [
    'Atelectasis','Cardiomegaly','Effusion','Infiltration','Mass','Nodule',
    'Pneumonia','Pneumothorax','Consolidation','Edema','Emphysema',
    'Fibrosis','Pleural_Thickening','Hernia'
]

def to_multi_hot(lbl_str: str):
    y = np.zeros(len(LABELS), dtype=np.float32)
    if isinstance(lbl_str, str) and lbl_str != "No Finding":
        for t in lbl_str.split("|"):
            if t in LABELS:
                y[LABELS.index(t)] = 1.0
    return y

Y = np.stack([to_multi_hot(s) for s in df["Finding Labels"].astype(str)], axis=0)
print("Y shape:", Y.shape)
print("Positives per class:", dict(zip(LABELS, Y.sum(axis=0).astype(int))))

# ========================== 4) PATIENT-LEVEL SPLIT (80/10/10) ==========================
df["Patient ID"] = df["Patient ID"].astype(str)
bucket = df["Patient ID"].apply(lambda x: hash(x) % 10)  # 0..9
train_df = df[bucket < 8].reset_index(drop=True)
val_df   = df[bucket == 8].reset_index(drop=True)
test_df  = df[bucket == 9].reset_index(drop=True)
print("Split sizes -> Train:", len(train_df), "Val:", len(val_df), "Test:", len(test_df))

# ========================== 5) INDEX FILES ACROSS SHARDS ==========================
# Your layout: BASE / images_XXX / images / *.png
name_to_path = {}
for p in BASE.glob("images_*/images/*.png"):
    name_to_path[p.name] = str(p)

print("Indexed files:", len(name_to_path))
first20 = df["Image Index"].head(20).tolist()
missing20 = [n for n in first20 if n not in name_to_path]
print("Missing among first 20:", len(missing20))
if missing20:
    print("Example missing:", missing20[:5])

# ========================== 6) DATASET / DATALOADERS ==========================
IMG_SIZE = 384
train_tfms = transforms.Compose([
    transforms.Resize(int(IMG_SIZE*1.1)),
    transforms.RandomResizedCrop(IMG_SIZE, scale=(0.8,1.0)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225]),
])
val_tfms = transforms.Compose([
    transforms.Resize(IMG_SIZE),
    transforms.CenterCrop(IMG_SIZE),
    transforms.ToTensor(),
    transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225]),
])

def row_to_multi_hot_tensor(row):
    return torch.tensor(to_multi_hot(row["Finding Labels"]), dtype=torch.float32)

class ChestXray(Dataset):
    def __init__(self, df, index_map, tfm):
        self.df = df.reset_index(drop=True)
        self.idx = index_map
        self.tfm = tfm
    def __len__(self):
        return len(self.df)
    def __getitem__(self, i):
        r = self.df.iloc[i]
        fname = r["Image Index"]
        img_path = self.idx.get(fname)
        if img_path is None:
            raise FileNotFoundError(f"Image not indexed: {fname}")
        img = Image.open(img_path).convert("RGB")
        x = self.tfm(img)
        y = row_to_multi_hot_tensor(r)
        return x, y

train_ds = ChestXray(train_df, name_to_path, train_tfms)
val_ds   = ChestXray(val_df,   name_to_path, val_tfms)
test_ds  = ChestXray(test_df,  name_to_path, val_tfms)

BATCH_SIZE  = 64
NUM_WORKERS = 2  # set 0 if multiprocessing issues

train_dl = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True,  num_workers=NUM_WORKERS, pin_memory=True)
val_dl   = DataLoader(val_ds,   batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS, pin_memory=True)
test_dl  = DataLoader(test_ds,  batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS, pin_memory=True)

print("DL sizes ->", len(train_ds), len(val_ds), len(test_ds))

# ========================== 7) MODEL ==========================
model = models.densenet121(weights=models.DenseNet121_Weights.IMAGENET1K_V1)
in_features = model.classifier.in_features
model.classifier = nn.Linear(in_features, len(LABELS))
model = model.to(DEVICE)

# ========================== 8) LOSS (pos_weight from TRAIN) ==========================
train_multi = np.vstack(train_df["Finding Labels"].astype(str).map(
    lambda s: np.array(to_multi_hot(s), dtype=np.float32)
).values)
pos = train_multi.sum(axis=0)    # per-class positives in TRAIN
N = len(train_df)
pos = np.clip(pos, 1.0, None)    # avoid div-by-zero
pos_weight = torch.tensor((N - pos) / pos, dtype=torch.float32, device=DEVICE)

criterion = nn.BCEWithLogitsLoss(pos_weight=pos_weight)
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-4, weight_decay=1e-4)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=10)
scaler = torch.cuda.amp.GradScaler(enabled=(DEVICE == "cuda"))

# ========================== 9) EVALUATION ==========================
def evaluate(model, loader, threshold=0.5):
    model.eval()
    ys, ps = [], []
    with torch.no_grad():
        for xb, yb in loader:
            xb, yb = xb.to(DEVICE), yb.to(DEVICE)
            p = torch.sigmoid(model(xb))
            ys.append(yb.cpu()); ps.append(p.cpu())
    ys = torch.cat(ys, 0).numpy()
    ps = torch.cat(ps, 0).numpy()

    # AUROC per class
    aurocs = []
    for c in range(len(LABELS)):
        y_c, p_c = ys[:, c], ps[:, c]
        try:
            aurocs.append(roc_auc_score(y_c, p_c))
        except ValueError:
            aurocs.append(np.nan)
    mean_auc = float(np.nanmean(aurocs))

    # F1 at fixed threshold (reference only)
    preds = (ps >= threshold).astype("int32")
    micro_f1 = f1_score(ys, preds, average="micro", zero_division=0)
    macro_f1 = f1_score(ys, preds, average="macro", zero_division=0)
    return mean_auc, dict(zip(LABELS, aurocs)), micro_f1, macro_f1

# ========================== 10) TRAIN with CHECKPOINTS ==========================
best_path = "densenet121_best.pt"
last_path = "densenet121_last.pt"
RESUME    = False
EPOCHS    = 20

def make_ckpt(epoch, best_auc):
    return {
        "epoch": epoch,
        "best_auc": best_auc,
        "model_state": model.state_dict(),
        "optimizer_state": optimizer.state_dict(),
        "scheduler_state": scheduler.state_dict(),
        "scaler_state": scaler.state_dict(),
    }

def save_last(epoch, best_auc):
    torch.save(make_ckpt(epoch, best_auc), last_path)
    print(f" Saved last: {last_path} (epoch={epoch})")

def save_best(epoch, best_auc):
    torch.save(make_ckpt(epoch, best_auc), best_path)
    print(f" Saved BEST: {best_path} (epoch={epoch}, best_auc={best_auc:.4f})")

# Resume
start_epoch = 1
best_auc = -1.0
if RESUME and os.path.exists(last_path):
    ckpt = torch.load(last_path, map_location=DEVICE)
    model.load_state_dict(ckpt["model_state"])
    optimizer.load_state_dict(ckpt["optimizer_state"])
    scheduler.load_state_dict(ckpt["scheduler_state"])
    scaler.load_state_dict(ckpt["scaler_state"])
    start_epoch = ckpt["epoch"] + 1
    best_auc    = ckpt.get("best_auc", best_auc)
    print(f"   Resuming from epoch {start_epoch} (best_auc={best_auc:.4f})")
else:
    print(" Starting fresh training")

# Loop
for ep in range(start_epoch, EPOCHS + 1):
    model.train()
    running_loss = 0.0
    for xb, yb in tqdm(train_dl, desc=f"Epoch {ep}/{EPOCHS}"):
        xb, yb = xb.to(DEVICE, non_blocking=True), yb.to(DEVICE, non_blocking=True)
        optimizer.zero_grad(set_to_none=True)
        with torch.cuda.amp.autocast(enabled=(DEVICE == "cuda")):
            logits = model(xb)
            loss = criterion(logits, yb)
        scaler.scale(loss).backward()
        scaler.step(optimizer); scaler.update()
        running_loss += loss.item() * xb.size(0)

    scheduler.step()
    train_loss = running_loss / len(train_ds)

    val_mean_auc, _, val_micro_f1, val_macro_f1 = evaluate(model, val_dl)
    print(f"[Val] mean AUROC={val_mean_auc:.4f} | microF1={val_micro_f1:.4f} | macroF1={val_macro_f1:.4f} | train_loss={train_loss:.4f}")

    # # Always save "last"
    # save_last(ep, best_auc)

    # # Save "best" if improved
    # if val_mean_auc > best_auc:
    #     best_auc = val_mean_auc
    #     save_best(ep, best_auc)

# ========================== 11) TEST ==========================
if Path(best_path).exists():
    ckpt = torch.load(best_path, map_location=DEVICE)
    model.load_state_dict(ckpt["model_state"])
    print(f" Loaded BEST from epoch {ckpt['epoch']} (best_auc={ckpt['best_auc']:.4f})")
else:
    print("⚠ BEST checkpoint not found, using last model weights in memory.")

test_mean_auc, test_per_cls, test_micro_f1, test_macro_f1 = evaluate(model, test_dl)
print(f"[TEST] mean AUROC={test_mean_auc:.4f} | microF1={test_micro_f1:.4f} | macroF1={test_macro_f1:.4f}")
print("Per-class AUROC:", {k: (None if np.isnan(v) else float(v)) for k, v in test_per_cls.items()})

# ========================== 12) (Optional) SINGLE-IMAGE PREDICT ==========================
@torch.no_grad()
def predict_image(img_path, threshold=0.5, top_k=5):
    model.eval()
    img = Image.open(img_path).convert("RGB")
    x = val_tfms(img).unsqueeze(0).to(DEVICE)
    probs = torch.sigmoid(model(x)).squeeze(0).cpu().numpy()
    pred_labels = [LABELS[i] for i, p in enumerate(probs) if p >= threshold]
    top_idx = np.argsort(-probs)[:top_k]
    top = [(LABELS[i], float(probs[i])) for i in top_idx]
    return dict(zip(LABELS, map(float, probs))), pred_labels, top

# Example:
# img_example = "/chest-xray/images_001/images/00001335_006.png"
# probs, preds, top5 = predict_image(img_example, threshold=0.5, top_k=5)
# print("Predicted (>=0.5):", preds)
# print("Top-5:", top5)

Device: cuda
df shape: (112120, 12)
Y shape: (112120, 14)
Positives per class: {'Atelectasis': 11559, 'Cardiomegaly': 2776, 'Effusion': 13317, 'Infiltration': 19894, 'Mass': 5782, 'Nodule': 6331, 'Pneumonia': 1431, 'Pneumothorax': 5302, 'Consolidation': 4667, 'Edema': 2303, 'Emphysema': 2516, 'Fibrosis': 1686, 'Pleural_Thickening': 3385, 'Hernia': 227}
Split sizes -> Train: 89425 Val: 11395 Test: 11300
Indexed files: 112120
Missing among first 20: 0
DL sizes -> 89425 11395 11300
 Starting fresh training


Epoch 1/20: 100%|██████████| 1398/1398 [20:19<00:00,  1.15it/s]


[Val] mean AUROC=0.7883 | microF1=0.2181 | macroF1=0.1903 | train_loss=1.0652


Epoch 2/20: 100%|██████████| 1398/1398 [20:02<00:00,  1.16it/s]


[Val] mean AUROC=0.8035 | microF1=0.2343 | macroF1=0.2136 | train_loss=0.9594


Epoch 3/20: 100%|██████████| 1398/1398 [20:05<00:00,  1.16it/s]


[Val] mean AUROC=0.8098 | microF1=0.2514 | macroF1=0.2212 | train_loss=0.9028


Epoch 4/20: 100%|██████████| 1398/1398 [20:26<00:00,  1.14it/s]


[Val] mean AUROC=0.8279 | microF1=0.2603 | macroF1=0.2262 | train_loss=0.8703


Epoch 5/20: 100%|██████████| 1398/1398 [20:06<00:00,  1.16it/s]


[Val] mean AUROC=0.8292 | microF1=0.2761 | macroF1=0.2422 | train_loss=0.8280


Epoch 6/20: 100%|██████████| 1398/1398 [19:24<00:00,  1.20it/s]


[Val] mean AUROC=0.8282 | microF1=0.2858 | macroF1=0.2425 | train_loss=0.7821


Epoch 7/20: 100%|██████████| 1398/1398 [18:19<00:00,  1.27it/s]


[Val] mean AUROC=0.8353 | microF1=0.2790 | macroF1=0.2469 | train_loss=0.7376


Epoch 8/20: 100%|██████████| 1398/1398 [18:23<00:00,  1.27it/s]


[Val] mean AUROC=0.8355 | microF1=0.2835 | macroF1=0.2500 | train_loss=0.6955


Epoch 9/20: 100%|██████████| 1398/1398 [18:18<00:00,  1.27it/s]


[Val] mean AUROC=0.8357 | microF1=0.2870 | macroF1=0.2543 | train_loss=0.6629


Epoch 10/20: 100%|██████████| 1398/1398 [18:17<00:00,  1.27it/s]


[Val] mean AUROC=0.8351 | microF1=0.3004 | macroF1=0.2622 | train_loss=0.6445


Epoch 11/20: 100%|██████████| 1398/1398 [18:18<00:00,  1.27it/s]


[Val] mean AUROC=0.8351 | microF1=0.3004 | macroF1=0.2653 | train_loss=0.6392


Epoch 12/20: 100%|██████████| 1398/1398 [18:17<00:00,  1.27it/s]


[Val] mean AUROC=0.8347 | microF1=0.3006 | macroF1=0.2615 | train_loss=0.6386


Epoch 13/20:  33%|███▎      | 462/1398 [06:04<10:51,  1.44it/s]

even that i incereas the modell to the 100 epch with the batch isze of 64, the result didnt get beder then 84, which we reach the same result with the previpus one. the under code we change the leraning rate fraom the 1 to 5, with a bache size of 64 

In [1]:

# ========================== SETUP ==========================
# !pip install -q pandas torch torchvision scikit-learn tqdm

import os
from pathlib import Path
import numpy as np
import pandas as pd
from PIL import Image, ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models
from sklearn.metrics import roc_auc_score, f1_score
from tqdm import tqdm

torch.backends.cudnn.benchmark = True
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print("Device:", DEVICE)

# ========================== 1) PATHS ==========================
CSV_PATH = Path("./chest-xray/Data_Entry_2017.csv")    # <- change if needed
BASE     = Path("./chest-xray")                        # contains images_001, images_002, ...
assert CSV_PATH.exists(), f"CSV not found: {CSV_PATH}"
assert BASE.exists(), f"Base images folder not found: {BASE}"

# ========================== 2) LOAD CSV ==========================
df = pd.read_csv(CSV_PATH)
print("df shape:", df.shape)

# ========================== 3) LABELS + MULTI-HOT ==========================
LABELS = [
    'Atelectasis','Cardiomegaly','Effusion','Infiltration','Mass','Nodule',
    'Pneumonia','Pneumothorax','Consolidation','Edema','Emphysema',
    'Fibrosis','Pleural_Thickening','Hernia'
]

def to_multi_hot(lbl_str: str):
    y = np.zeros(len(LABELS), dtype=np.float32)
    if isinstance(lbl_str, str) and lbl_str != "No Finding":
        for t in lbl_str.split("|"):
            if t in LABELS:
                y[LABELS.index(t)] = 1.0
    return y

Y = np.stack([to_multi_hot(s) for s in df["Finding Labels"].astype(str)], axis=0)
print("Y shape:", Y.shape)
print("Positives per class:", dict(zip(LABELS, Y.sum(axis=0).astype(int))))

# ========================== 4) PATIENT-LEVEL SPLIT (80/10/10) ==========================
df["Patient ID"] = df["Patient ID"].astype(str)
bucket = df["Patient ID"].apply(lambda x: hash(x) % 10)  # 0..9
train_df = df[bucket < 8].reset_index(drop=True)
val_df   = df[bucket == 8].reset_index(drop=True)
test_df  = df[bucket == 9].reset_index(drop=True)
print("Split sizes -> Train:", len(train_df), "Val:", len(val_df), "Test:", len(test_df))

# ========================== 5) INDEX FILES ACROSS SHARDS ==========================
# Your layout: BASE / images_XXX / images / *.png
name_to_path = {}
for p in BASE.glob("images_*/images/*.png"):
    name_to_path[p.name] = str(p)

print("Indexed files:", len(name_to_path))
first20 = df["Image Index"].head(20).tolist()
missing20 = [n for n in first20 if n not in name_to_path]
print("Missing among first 20:", len(missing20))
if missing20:
    print("Example missing:", missing20[:5])

# ========================== 6) DATASET / DATALOADERS ==========================
IMG_SIZE = 384
train_tfms = transforms.Compose([
    transforms.Resize(int(IMG_SIZE*1.1)),
    transforms.RandomResizedCrop(IMG_SIZE, scale=(0.8,1.0)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225]),
])
val_tfms = transforms.Compose([
    transforms.Resize(IMG_SIZE),
    transforms.CenterCrop(IMG_SIZE),
    transforms.ToTensor(),
    transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225]),
])

def row_to_multi_hot_tensor(row):
    return torch.tensor(to_multi_hot(row["Finding Labels"]), dtype=torch.float32)

class ChestXray(Dataset):
    def __init__(self, df, index_map, tfm):
        self.df = df.reset_index(drop=True)
        self.idx = index_map
        self.tfm = tfm
    def __len__(self):
        return len(self.df)
    def __getitem__(self, i):
        r = self.df.iloc[i]
        fname = r["Image Index"]
        img_path = self.idx.get(fname)
        if img_path is None:
            raise FileNotFoundError(f"Image not indexed: {fname}")
        img = Image.open(img_path).convert("RGB")
        x = self.tfm(img)
        y = row_to_multi_hot_tensor(r)
        return x, y

train_ds = ChestXray(train_df, name_to_path, train_tfms)
val_ds   = ChestXray(val_df,   name_to_path, val_tfms)
test_ds  = ChestXray(test_df,  name_to_path, val_tfms)

BATCH_SIZE  = 64
NUM_WORKERS = 2  # set 0 if multiprocessing issues

train_dl = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True,  num_workers=NUM_WORKERS, pin_memory=True)
val_dl   = DataLoader(val_ds,   batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS, pin_memory=True)
test_dl  = DataLoader(test_ds,  batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS, pin_memory=True)

print("DL sizes ->", len(train_ds), len(val_ds), len(test_ds))

# ========================== 7) MODEL ==========================
model = models.densenet121(weights=models.DenseNet121_Weights.IMAGENET1K_V1)
in_features = model.classifier.in_features
model.classifier = nn.Linear(in_features, len(LABELS))
model = model.to(DEVICE)

# ========================== 8) LOSS (pos_weight from TRAIN) ==========================
train_multi = np.vstack(train_df["Finding Labels"].astype(str).map(
    lambda s: np.array(to_multi_hot(s), dtype=np.float32)
).values)
pos = train_multi.sum(axis=0)    # per-class positives in TRAIN
N = len(train_df)
pos = np.clip(pos, 1.0, None)    # avoid div-by-zero
pos_weight = torch.tensor((N - pos) / pos, dtype=torch.float32, device=DEVICE)

criterion = nn.BCEWithLogitsLoss(pos_weight=pos_weight)
optimizer = torch.optim.AdamW(model.parameters(), lr=5e-4, weight_decay=1e-4)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=10)
scaler = torch.cuda.amp.GradScaler(enabled=(DEVICE == "cuda"))

# ========================== 9) EVALUATION ==========================
# def evaluate(model, loader, threshold=0.5):
#     model.eval()
#     ys, ps = [], []
#     with torch.no_grad():
#         for xb, yb in loader:
#             xb, yb = xb.to(DEVICE), yb.to(DEVICE)
#             p = torch.sigmoid(model(xb))
#             ys.append(yb.cpu()); ps.append(p.cpu())
#     ys = torch.cat(ys, 0).numpy()
#     ps = torch.cat(ps, 0).numpy()

#     # AUROC per class
#     aurocs = []
#     for c in range(len(LABELS)):
#         y_c, p_c = ys[:, c], ps[:, c]
#         try:
#             aurocs.append(roc_auc_score(y_c, p_c))
#         except ValueError:
#             aurocs.append(np.nan)
#     mean_auc = float(np.nanmean(aurocs))

#     # F1 at fixed threshold (reference only)
#     preds = (ps >= threshold).astype("int32")
#     micro_f1 = f1_score(ys, preds, average="micro", zero_division=0)
#     macro_f1 = f1_score(ys, preds, average="macro", zero_division=0)
#     return mean_auc, dict(zip(LABELS, aurocs)), micro_f1, macro_f1





# ========================== 9b) EVALUATION WITH LOSS ==========================
# def evaluate_with_loss(model, loader, criterion, threshold=0.5):
#     model.eval()
#     ys, ps = [], []
#     val_loss = 0.0
#     n = 0
#     with torch.no_grad():
#         for xb, yb in loader:
#             xb, yb = xb.to(DEVICE), yb.to(DEVICE)
#             logits = model(xb)
#             loss = criterion(logits, yb)
#             val_loss += loss.item() * xb.size(0)
#             n += xb.size(0)
#             p = torch.sigmoid(logits)
#             ys.append(yb.cpu()); ps.append(p.cpu())

#     val_loss /= max(n, 1)

#     ys = torch.cat(ys, 0).numpy()
#     ps = torch.cat(ps, 0).numpy()

#     # AUROC per class
#     aurocs = []
#     for c in range(len(LABELS)):
#         y_c, p_c = ys[:, c], ps[:, c]
#         try:
#             aurocs.append(roc_auc_score(y_c, p_c))
#         except ValueError:
#             aurocs.append(np.nan)
#     mean_auc = float(np.nanmean(aurocs))

#     # F1 at fixed threshold (reference only)
#     preds = (ps >= threshold).astype("int32")
#     micro_f1 = f1_score(ys, preds, average="micro",  zero_division=0)
#     macro_f1 = f1_score(ys, preds, average="macro",  zero_division=0)

#     return val_loss, mean_auc, dict(zip(LABELS, aurocs)), micro_f1, macro_f1



# ========================== 9b) EVALUATION WITH LOSS ==========================
def evaluate_with_loss(model, loader, criterion, threshold=0.5):
    model.eval()
    total_loss, total_n = 0.0, 0
    ys, ps = [], []

    with torch.no_grad():
        for xb, yb in loader:
            xb, yb = xb.to(DEVICE), yb.to(DEVICE)
            logits = model(xb)
            # Viktig: loss på LOGITS (ikke sigmoid), samme criterion som trening
            loss = criterion(logits, yb)
            total_loss += loss.item() * xb.size(0)
            total_n    += xb.size(0)

            probs = torch.sigmoid(logits)
            ys.append(yb.cpu()); ps.append(probs.cpu())

    val_loss = total_loss / max(total_n, 1)

    ys = torch.cat(ys, 0).numpy()
    ps = torch.cat(ps, 0).numpy()

    # AUROC per klasse
    aurocs = []
    for c in range(len(LABELS)):
        y_c, p_c = ys[:, c], ps[:, c]
        try:
            aurocs.append(roc_auc_score(y_c, p_c))
        except ValueError:
            aurocs.append(np.nan)
    mean_auc = float(np.nanmean(aurocs))

    # F1 ved fast terskel (referanse)
    from sklearn.metrics import f1_score
    preds = (ps >= threshold).astype("int32")
    micro_f1 = f1_score(ys, preds, average="micro", zero_division=0)
    macro_f1 = f1_score(ys, preds, average="macro", zero_division=0)

    return val_loss, mean_auc, dict(zip(LABELS, aurocs)), micro_f1, macro_f1





# ========================== 10) TRAIN with CHECKPOINTS ==========================
best_path = "densenet121_best.pt"
last_path = "densenet121_last.pt"
RESUME    = False
EPOCHS    = 100

def make_ckpt(epoch, best_auc):
    return {
        "epoch": epoch,
        "best_auc": best_auc,
        "model_state": model.state_dict(),
        "optimizer_state": optimizer.state_dict(),
        "scheduler_state": scheduler.state_dict(),
        "scaler_state": scaler.state_dict(),
    }

def save_last(epoch, best_auc):
    torch.save(make_ckpt(epoch, best_auc), last_path)
    print(f" Saved last: {last_path} (epoch={epoch})")

def save_best(epoch, best_auc):
    torch.save(make_ckpt(epoch, best_auc), best_path)
    print(f" Saved BEST: {best_path} (epoch={epoch}, best_auc={best_auc:.4f})")

# Resume
start_epoch = 1
best_auc = -1.0
if RESUME and os.path.exists(last_path):
    ckpt = torch.load(last_path, map_location=DEVICE)
    model.load_state_dict(ckpt["model_state"])
    optimizer.load_state_dict(ckpt["optimizer_state"])
    scheduler.load_state_dict(ckpt["scheduler_state"])
    scaler.load_state_dict(ckpt["scaler_state"])
    start_epoch = ckpt["epoch"] + 1
    best_auc    = ckpt.get("best_auc", best_auc)
    print(f"   Resuming from epoch {start_epoch} (best_auc={best_auc:.4f})")
else:
    print(" Starting fresh training")

# Loop
for ep in range(start_epoch, EPOCHS + 1):
    model.train()
    running_loss = 0.0
    for xb, yb in tqdm(train_dl, desc=f"Epoch {ep}/{EPOCHS}"):
        xb, yb = xb.to(DEVICE, non_blocking=True), yb.to(DEVICE, non_blocking=True)
        optimizer.zero_grad(set_to_none=True)
        with torch.cuda.amp.autocast(enabled=(DEVICE == "cuda")):
            logits = model(xb)
            loss = criterion(logits, yb)
        scaler.scale(loss).backward()
        scaler.step(optimizer); scaler.update()
        running_loss += loss.item() * xb.size(0)

    # scheduler.step()



    # Etter scheduler.step():
    curr_lr = optimizer.param_groups[0]["lr"]
    print(f"LR now: {curr_lr:.6f}")

    train_loss = running_loss / len(train_ds)

    # val_mean_auc, _, val_micro_f1, val_macro_f1 = evaluate(model, val_dl)
    # print(f"[Val] mean AUROC={val_mean_auc:.4f} | microF1={val_micro_f1:.4f} | macroF1={val_macro_f1:.4f} | train_loss={train_loss:.4f}")

    
    val_loss, val_mean_auc, _, val_micro_f1, val_macro_f1 = evaluate_with_loss(model, val_dl, criterion)
    print(f" mean AUROC={val_mean_auc:.4f} | microF1={val_micro_f1:.4f} | macroF1={val_macro_f1:.4f} | train_loss={train_loss:.4f} | Val_loss={val_loss:.4f}")


    # # Always save "last"
    # save_last(ep, best_auc)

    # # Save "best" if improved
    # if val_mean_auc > best_auc:
    #     best_auc = val_mean_auc
    #     save_best(ep, best_auc)

# ========================== 11) TEST ==========================
if Path(best_path).exists():
    ckpt = torch.load(best_path, map_location=DEVICE)
    model.load_state_dict(ckpt["model_state"])
    print(f" Loaded BEST from epoch {ckpt['epoch']} (best_auc={ckpt['best_auc']:.4f})")
else:
    print("⚠ BEST checkpoint not found, using last model weights in memory.")

test_mean_auc, test_per_cls, test_micro_f1, test_macro_f1 = evaluate(model, test_dl)
print(f"[TEST] mean AUROC={test_mean_auc:.4f} | microF1={test_micro_f1:.4f} | macroF1={test_macro_f1:.4f}")
print("Per-class AUROC:", {k: (None if np.isnan(v) else float(v)) for k, v in test_per_cls.items()})

# ========================== 12) (Optional) SINGLE-IMAGE PREDICT ==========================
@torch.no_grad()
def predict_image(img_path, threshold=0.5, top_k=5):
    model.eval()
    img = Image.open(img_path).convert("RGB")
    x = val_tfms(img).unsqueeze(0).to(DEVICE)
    probs = torch.sigmoid(model(x)).squeeze(0).cpu().numpy()
    pred_labels = [LABELS[i] for i, p in enumerate(probs) if p >= threshold]
    top_idx = np.argsort(-probs)[:top_k]
    top = [(LABELS[i], float(probs[i])) for i in top_idx]
    return dict(zip(LABELS, map(float, probs))), pred_labels, top

# Example:
# img_example = "/chest-xray/images_001/images/00001335_006.png"
# probs, preds, top5 = predict_image(img_example, threshold=0.5, top_k=5)
# print("Predicted (>=0.5):", preds)
# print("Top-5:", top5)

Device: cuda
df shape: (112120, 12)
Y shape: (112120, 14)
Positives per class: {'Atelectasis': 11559, 'Cardiomegaly': 2776, 'Effusion': 13317, 'Infiltration': 19894, 'Mass': 5782, 'Nodule': 6331, 'Pneumonia': 1431, 'Pneumothorax': 5302, 'Consolidation': 4667, 'Edema': 2303, 'Emphysema': 2516, 'Fibrosis': 1686, 'Pleural_Thickening': 3385, 'Hernia': 227}
Split sizes -> Train: 90000 Val: 10437 Test: 11683
Indexed files: 112120
Missing among first 20: 0
DL sizes -> 90000 10437 11683
 Starting fresh training


Epoch 1/100: 100%|██████████| 1407/1407 [18:46<00:00,  1.25it/s]

LR now: 0.000500





 mean AUROC=0.7708 | microF1=0.1898 | macroF1=0.1770 | train_loss=1.1624 | Val_loss=1.1219


Epoch 2/100: 100%|██████████| 1407/1407 [18:52<00:00,  1.24it/s]

LR now: 0.000500





 mean AUROC=0.7855 | microF1=0.1836 | macroF1=0.1792 | train_loss=1.0800 | Val_loss=1.0818


Epoch 3/100: 100%|██████████| 1407/1407 [18:46<00:00,  1.25it/s]

LR now: 0.000500





 mean AUROC=0.7948 | microF1=0.2276 | macroF1=0.2015 | train_loss=1.0422 | Val_loss=1.0636


Epoch 4/100: 100%|██████████| 1407/1407 [18:50<00:00,  1.24it/s]

LR now: 0.000500





 mean AUROC=0.8105 | microF1=0.2391 | macroF1=0.2179 | train_loss=1.0062 | Val_loss=1.0365


Epoch 5/100: 100%|██████████| 1407/1407 [18:48<00:00,  1.25it/s]

LR now: 0.000500





 mean AUROC=0.8117 | microF1=0.2382 | macroF1=0.2091 | train_loss=0.9862 | Val_loss=1.0300


Epoch 6/100: 100%|██████████| 1407/1407 [18:37<00:00,  1.26it/s]

LR now: 0.000500





 mean AUROC=0.8099 | microF1=0.2144 | macroF1=0.2057 | train_loss=0.9679 | Val_loss=1.0424


Epoch 7/100: 100%|██████████| 1407/1407 [18:46<00:00,  1.25it/s]

LR now: 0.000500





 mean AUROC=0.8199 | microF1=0.2285 | macroF1=0.2077 | train_loss=0.9497 | Val_loss=0.9816


Epoch 8/100: 100%|██████████| 1407/1407 [18:44<00:00,  1.25it/s]

LR now: 0.000500





 mean AUROC=0.8226 | microF1=0.2422 | macroF1=0.2179 | train_loss=0.9276 | Val_loss=0.9955


Epoch 9/100: 100%|██████████| 1407/1407 [18:40<00:00,  1.26it/s]

LR now: 0.000500





 mean AUROC=0.8273 | microF1=0.2342 | macroF1=0.2116 | train_loss=0.9181 | Val_loss=0.9579


Epoch 10/100: 100%|██████████| 1407/1407 [18:44<00:00,  1.25it/s]

LR now: 0.000500





 mean AUROC=0.8292 | microF1=0.2622 | macroF1=0.2335 | train_loss=0.9055 | Val_loss=0.9853


Epoch 11/100: 100%|██████████| 1407/1407 [18:44<00:00,  1.25it/s]

LR now: 0.000500





 mean AUROC=0.8103 | microF1=0.2008 | macroF1=0.1957 | train_loss=0.8920 | Val_loss=1.0196


Epoch 12/100: 100%|██████████| 1407/1407 [18:43<00:00,  1.25it/s]

LR now: 0.000500





 mean AUROC=0.8301 | microF1=0.2433 | macroF1=0.2231 | train_loss=0.8833 | Val_loss=0.9483


Epoch 13/100: 100%|██████████| 1407/1407 [18:47<00:00,  1.25it/s]

LR now: 0.000500





KeyboardInterrupt: 

in the above we try did try to print the loss val for bedre training analytic and deactive the stackduler

In [None]:

class EarlyStopping:
    """
    Early stop når val_metric ikke forbedres innen 'patience' epoker.
    mode='max' brukes for metrikker som AUROC/F1 (vi vil opp),
    mode='min' for loss (vi vil ned).
    """
    def __init__(self, patience=8, min_delta=0.0, mode='max', restore_best=True):
        assert mode in ('max', 'min')
        self.patience = patience
        self.min_delta = min_delta
        self.mode = mode
        self.restore_best = restore_best

        self.best = None
        self.num_bad = 0
        self.should_stop = False
        self.best_state = None

    def _is_better(self, current, best):
        if self.mode == 'max':
            return current > best + self.min_delta
        else:
            return current < best - self.min_delta

    def step(self, current, model=None):
        if self.best is None:
            self.best = current
            if model is not None:
                self.best_state = {k: v.cpu().clone() for k, v in model.state_dict().items()}
            return False  # ikke stoppe første gang

        if self._is_better(current, self.best):
            self.best = current
            self.num_bad = 0
            if model is not None:
                self.best_state = {k: v.cpu().clone() for k, v in model.state_dict().items()}
            return False
        else:
            self.num_bad += 1
            if self.num_bad >= self.patience:
                self.should_stop = True
                return True
            return False



# ========================== SETUP ==========================
# !pip install -q pandas torch torchvision scikit-learn tqdm

import os
from pathlib import Path
import numpy as np
import pandas as pd
from PIL import Image, ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models
from sklearn.metrics import roc_auc_score, f1_score
from tqdm import tqdm

torch.backends.cudnn.benchmark = True
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print("Device:", DEVICE)

# ========================== 1) PATHS ==========================
CSV_PATH = Path("./chest-xray/Data_Entry_2017.csv")    # <- change if needed
BASE     = Path("./chest-xray")                        # contains images_001, images_002, ...
assert CSV_PATH.exists(), f"CSV not found: {CSV_PATH}"
assert BASE.exists(), f"Base images folder not found: {BASE}"

# ========================== 2) LOAD CSV ==========================
df = pd.read_csv(CSV_PATH)
print("df shape:", df.shape)

# ========================== 3) LABELS + MULTI-HOT ==========================
LABELS = [
    'Atelectasis','Cardiomegaly','Effusion','Infiltration','Mass','Nodule',
    'Pneumonia','Pneumothorax','Consolidation','Edema','Emphysema',
    'Fibrosis','Pleural_Thickening','Hernia'
]

def to_multi_hot(lbl_str: str):
    y = np.zeros(len(LABELS), dtype=np.float32)
    if isinstance(lbl_str, str) and lbl_str != "No Finding":
        for t in lbl_str.split("|"):
            if t in LABELS:
                y[LABELS.index(t)] = 1.0
    return y

Y = np.stack([to_multi_hot(s) for s in df["Finding Labels"].astype(str)], axis=0)
print("Y shape:", Y.shape)
print("Positives per class:", dict(zip(LABELS, Y.sum(axis=0).astype(int))))

# ========================== 4) PATIENT-LEVEL SPLIT (80/10/10) ==========================
df["Patient ID"] = df["Patient ID"].astype(str)
bucket = df["Patient ID"].apply(lambda x: hash(x) % 10)  # 0..9
train_df = df[bucket < 8].reset_index(drop=True)
val_df   = df[bucket == 8].reset_index(drop=True)
test_df  = df[bucket == 9].reset_index(drop=True)
print("Split sizes -> Train:", len(train_df), "Val:", len(val_df), "Test:", len(test_df))

# ========================== 5) INDEX FILES ACROSS SHARDS ==========================
# Your layout: BASE / images_XXX / images / *.png
name_to_path = {}
for p in BASE.glob("images_*/images/*.png"):
    name_to_path[p.name] = str(p)

print("Indexed files:", len(name_to_path))
first20 = df["Image Index"].head(20).tolist()
missing20 = [n for n in first20 if n not in name_to_path]
print("Missing among first 20:", len(missing20))
if missing20:
    print("Example missing:", missing20[:5])

# ========================== 6) DATASET / DATALOADERS ==========================
IMG_SIZE = 384
train_tfms = transforms.Compose([
    transforms.Resize(int(IMG_SIZE*1.1)),
    transforms.RandomResizedCrop(IMG_SIZE, scale=(0.8,1.0)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225]),
])
val_tfms = transforms.Compose([
    transforms.Resize(IMG_SIZE),
    transforms.CenterCrop(IMG_SIZE),
    transforms.ToTensor(),
    transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225]),
])

def row_to_multi_hot_tensor(row):
    return torch.tensor(to_multi_hot(row["Finding Labels"]), dtype=torch.float32)

class ChestXray(Dataset):
    def __init__(self, df, index_map, tfm):
        self.df = df.reset_index(drop=True)
        self.idx = index_map
        self.tfm = tfm
    def __len__(self):
        return len(self.df)
    def __getitem__(self, i):
        r = self.df.iloc[i]
        fname = r["Image Index"]
        img_path = self.idx.get(fname)
        if img_path is None:
            raise FileNotFoundError(f"Image not indexed: {fname}")
        img = Image.open(img_path).convert("RGB")
        x = self.tfm(img)
        y = row_to_multi_hot_tensor(r)
        return x, y

train_ds = ChestXray(train_df, name_to_path, train_tfms)
val_ds   = ChestXray(val_df,   name_to_path, val_tfms)
test_ds  = ChestXray(test_df,  name_to_path, val_tfms)

BATCH_SIZE  = 64
NUM_WORKERS = 2  # set 0 if multiprocessing issues

train_dl = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True,  num_workers=NUM_WORKERS, pin_memory=True)
val_dl   = DataLoader(val_ds,   batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS, pin_memory=True)
test_dl  = DataLoader(test_ds,  batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS, pin_memory=True)

print("DL sizes ->", len(train_ds), len(val_ds), len(test_ds))

# ========================== 7) MODEL ==========================
model = models.densenet121(weights=models.DenseNet121_Weights.IMAGENET1K_V1)
in_features = model.classifier.in_features
model.classifier = nn.Linear(in_features, len(LABELS))
model = model.to(DEVICE)

# ========================== 8) LOSS (pos_weight from TRAIN) ==========================
train_multi = np.vstack(train_df["Finding Labels"].astype(str).map(
    lambda s: np.array(to_multi_hot(s), dtype=np.float32)
).values)
pos = train_multi.sum(axis=0)    # per-class positives in TRAIN
N = len(train_df)
pos = np.clip(pos, 1.0, None)    # avoid div-by-zero
pos_weight = torch.tensor((N - pos) / pos, dtype=torch.float32, device=DEVICE)

criterion = nn.BCEWithLogitsLoss(pos_weight=pos_weight)
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-4, weight_decay=1e-4)
#scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=10)



#scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=EPOCHS)

scaler = torch.cuda.amp.GradScaler(enabled=(DEVICE == "cuda"))

# ========================== 9) EVALUATION ==========================
# def evaluate(model, loader, threshold=0.5):
#     model.eval()
#     ys, ps = [], []
#     with torch.no_grad():
#         for xb, yb in loader:
#             xb, yb = xb.to(DEVICE), yb.to(DEVICE)
#             p = torch.sigmoid(model(xb))
#             ys.append(yb.cpu()); ps.append(p.cpu())
#     ys = torch.cat(ys, 0).numpy()
#     ps = torch.cat(ps, 0).numpy()

#     # AUROC per class
#     aurocs = []
#     for c in range(len(LABELS)):
#         y_c, p_c = ys[:, c], ps[:, c]
#         try:
#             aurocs.append(roc_auc_score(y_c, p_c))
#         except ValueError:
#             aurocs.append(np.nan)
#     mean_auc = float(np.nanmean(aurocs))

#     # F1 at fixed threshold (reference only)
#     preds = (ps >= threshold).astype("int32")
#     micro_f1 = f1_score(ys, preds, average="micro", zero_division=0)
#     macro_f1 = f1_score(ys, preds, average="macro", zero_division=0)
#     return mean_auc, dict(zip(LABELS, aurocs)), micro_f1, macro_f1





# ========================== 9b) EVALUATION WITH LOSS ==========================
# def evaluate_with_loss(model, loader, criterion, threshold=0.5):
#     model.eval()
#     ys, ps = [], []
#     val_loss = 0.0
#     n = 0
#     with torch.no_grad():
#         for xb, yb in loader:
#             xb, yb = xb.to(DEVICE), yb.to(DEVICE)
#             logits = model(xb)
#             loss = criterion(logits, yb)
#             val_loss += loss.item() * xb.size(0)
#             n += xb.size(0)
#             p = torch.sigmoid(logits)
#             ys.append(yb.cpu()); ps.append(p.cpu())

#     val_loss /= max(n, 1)

#     ys = torch.cat(ys, 0).numpy()
#     ps = torch.cat(ps, 0).numpy()

#     # AUROC per class
#     aurocs = []
#     for c in range(len(LABELS)):
#         y_c, p_c = ys[:, c], ps[:, c]
#         try:
#             aurocs.append(roc_auc_score(y_c, p_c))
#         except ValueError:
#             aurocs.append(np.nan)
#     mean_auc = float(np.nanmean(aurocs))

#     # F1 at fixed threshold (reference only)
#     preds = (ps >= threshold).astype("int32")
#     micro_f1 = f1_score(ys, preds, average="micro",  zero_division=0)
#     macro_f1 = f1_score(ys, preds, average="macro",  zero_division=0)

#     return val_loss, mean_auc, dict(zip(LABELS, aurocs)), micro_f1, macro_f1



# ========================== 9b) EVALUATION WITH LOSS ==========================
def evaluate_with_loss(model, loader, criterion, threshold=0.5):
    model.eval()
    total_loss, total_n = 0.0, 0
    ys, ps = [], []

    with torch.no_grad():
        for xb, yb in loader:
            xb, yb = xb.to(DEVICE), yb.to(DEVICE)
            logits = model(xb)
            # Viktig: loss på LOGITS (ikke sigmoid), samme criterion som trening
            loss = criterion(logits, yb)
            total_loss += loss.item() * xb.size(0)
            total_n    += xb.size(0)

            probs = torch.sigmoid(logits)
            ys.append(yb.cpu()); ps.append(probs.cpu())

    val_loss = total_loss / max(total_n, 1)

    ys = torch.cat(ys, 0).numpy()
    ps = torch.cat(ps, 0).numpy()

    # AUROC per klasse
    aurocs = []
    for c in range(len(LABELS)):
        y_c, p_c = ys[:, c], ps[:, c]
        try:
            aurocs.append(roc_auc_score(y_c, p_c))
        except ValueError:
            aurocs.append(np.nan)
    mean_auc = float(np.nanmean(aurocs))

    # F1 ved fast terskel (referanse)
    from sklearn.metrics import f1_score
    preds = (ps >= threshold).astype("int32")
    micro_f1 = f1_score(ys, preds, average="micro", zero_division=0)
    macro_f1 = f1_score(ys, preds, average="macro", zero_division=0)

    return val_loss, mean_auc, dict(zip(LABELS, aurocs)), micro_f1, macro_f1





# ========================== 10) TRAIN with CHECKPOINTS ==========================
best_path = "densenet121_best.pt"
last_path = "densenet121_last.pt"
RESUME    = False
EPOCHS    = 100

def make_ckpt(epoch, best_auc):
    return {
        "epoch": epoch,
        "best_auc": best_auc,
        "model_state": model.state_dict(),
        "optimizer_state": optimizer.state_dict(),
        "scheduler_state": scheduler.state_dict(),
        "scaler_state": scaler.state_dict(),
    }

def save_last(epoch, best_auc):
    torch.save(make_ckpt(epoch, best_auc), last_path)
    print(f" Saved last: {last_path} (epoch={epoch})")

def save_best(epoch, best_auc):
    torch.save(make_ckpt(epoch, best_auc), best_path)
    print(f" Saved BEST: {best_path} (epoch={epoch}, best_auc={best_auc:.4f})")

# Resume
start_epoch = 1
best_auc = -1.0
if RESUME and os.path.exists(last_path):
    ckpt = torch.load(last_path, map_location=DEVICE)
    model.load_state_dict(ckpt["model_state"])
    optimizer.load_state_dict(ckpt["optimizer_state"])
    scheduler.load_state_dict(ckpt["scheduler_state"])
    scaler.load_state_dict(ckpt["scaler_state"])
    start_epoch = ckpt["epoch"] + 1
    best_auc    = ckpt.get("best_auc", best_auc)
    print(f"   Resuming from epoch {start_epoch} (best_auc={best_auc:.4f})")
else:
    print(" Starting fresh training")

# Loop
# for ep in range(start_epoch, EPOCHS + 1):
#     model.train()
#     running_loss = 0.0
#     for xb, yb in tqdm(train_dl, desc=f"Epoch {ep}/{EPOCHS}"):
#         xb, yb = xb.to(DEVICE, non_blocking=True), yb.to(DEVICE, non_blocking=True)
#         optimizer.zero_grad(set_to_none=True)
#         with torch.cuda.amp.autocast(enabled=(DEVICE == "cuda")):
#             logits = model(xb)
#             loss = criterion(logits, yb)
#         scaler.scale(loss).backward()
#         scaler.step(optimizer); scaler.update()
#         running_loss += loss.item() * xb.size(0)

#     scheduler.step()



# Stopper når val_mean_auc ikke forbedres i 'patience' epoker
early_stopper = EarlyStopping(patience=8, min_delta=1e-3, mode='max', restore_best=True)
best_auc = -1.0


# oppsett
    steps_per_epoch = len(train_dl)
    scheduler = torch.optim.lr_scheduler.OneCycleLR(
        optimizer, max_lr=5e-4, steps_per_epoch=steps_per_epoch, epochs=EPOCHS
    )
    
    for ep in range(start_epoch, EPOCHS + 1):
        model.train()
        running_loss = 0.0
        for xb, yb in train_dl:
            optimizer.zero_grad(set_to_none=True)
            with torch.cuda.amp.autocast(enabled=(DEVICE=="cuda")):
                logits = model(xb.to(DEVICE)); loss = criterion(logits, yb.to(DEVICE))
            scaler.scale(loss).backward()
            scaler.step(optimizer); scaler.update()
    
            # <- VIKTIG: OneCycleLR steppes per batch
            scheduler.step()
    
            running_loss += loss.item() * xb.size(0)
        
            train_loss = running_loss / len(train_ds)
            # print gjeldende LR (fra første param group)
            print(f"LR now: {optimizer.param_groups[0]['lr']:.6f}")
        
    



    # val_mean_auc, _, val_micro_f1, val_macro_f1 = evaluate(model, val_dl)
    # print(f"[Val] mean AUROC={val_mean_auc:.4f} | microF1={val_micro_f1:.4f} | macroF1={val_macro_f1:.4f} | train_loss={train_loss:.4f}")

    
    val_loss, val_mean_auc, _, val_micro_f1, val_macro_f1 = evaluate_with_loss(model, val_dl, criterion)
    print(f" mean AUROC={val_mean_auc:.4f} | microF1={val_micro_f1:.4f} | macroF1={val_macro_f1:.4f} | train_loss={train_loss:.4f} | Val_loss={val_loss:.4f}")


    # # Always save "last"
    # save_last(ep, best_auc)

    # # Save "best" if improved
    # if val_mean_auc > best_auc:
    #     best_auc = val_mean_auc
    #     save_best(ep, best_auc)

# ========================== 11) TEST ==========================
if Path(best_path).exists():
    ckpt = torch.load(best_path, map_location=DEVICE)
    model.load_state_dict(ckpt["model_state"])
    print(f" Loaded BEST from epoch {ckpt['epoch']} (best_auc={ckpt['best_auc']:.4f})")
else:
    print("⚠ BEST checkpoint not found, using last model weights in memory.")

test_mean_auc, test_per_cls, test_micro_f1, test_macro_f1 = evaluate(model, test_dl)
print(f"[TEST] mean AUROC={test_mean_auc:.4f} | microF1={test_micro_f1:.4f} | macroF1={test_macro_f1:.4f}")
print("Per-class AUROC:", {k: (None if np.isnan(v) else float(v)) for k, v in test_per_cls.items()})

# ========================== 12) (Optional) SINGLE-IMAGE PREDICT ==========================
@torch.no_grad()
def predict_image(img_path, threshold=0.5, top_k=5):
    model.eval()
    img = Image.open(img_path).convert("RGB")
    x = val_tfms(img).unsqueeze(0).to(DEVICE)
    probs = torch.sigmoid(model(x)).squeeze(0).cpu().numpy()
    pred_labels = [LABELS[i] for i, p in enumerate(probs) if p >= threshold]
    top_idx = np.argsort(-probs)[:top_k]
    top = [(LABELS[i], float(probs[i])) for i in top_idx]
    return dict(zip(LABELS, map(float, probs))), pred_labels, top

# Example:
# img_example = "/chest-xray/images_001/images/00001335_006.png"
# probs, preds, top5 = predict_image(img_example, threshold=0.5, top_k=5)
# print("Predicted (>=0.5):", preds)
# print("Top-5:", top5)

i did comment out the stack holder and did try to solve it without it, in this stage i implenet the early stoping for the traning and the did som change in the stach, see the above cod.
the under code is an implemenation of the early stop 

In [6]:
# ========================== SETUP ==========================
# !pip install -q pandas torch torchvision scikit-learn tqdm

import os
from pathlib import Path
import hashlib
import numpy as np
import pandas as pd
from PIL import Image, ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models
from sklearn.metrics import roc_auc_score, f1_score
from tqdm import tqdm

torch.backends.cudnn.benchmark = True
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print("Device:", DEVICE)

# ========================== EARLY STOPPING ==========================
class EarlyStopping:
    """
    Early stop når val_metric ikke forbedres innen 'patience' epoker.
    mode='max' brukes for metrikker som AUROC/F1 (vi vil opp),
    mode='min' for loss (vi vil ned).
    """
    def __init__(self, patience=8, min_delta=0.0, mode='max', restore_best=True):
        assert mode in ('max', 'min')
        self.patience = patience
        self.min_delta = min_delta
        self.mode = mode
        self.restore_best = restore_best

        self.best = None
        self.num_bad = 0
        self.should_stop = False
        self.best_state = None

    def _is_better(self, current, best):
        if self.mode == 'max':
            return current > best + self.min_delta
        else:
            return current < best - self.min_delta

    def step(self, current, model=None):
        if self.best is None:
            self.best = current
            if model is not None:
                self.best_state = {k: v.detach().cpu().clone() for k, v in model.state_dict().items()}
            return False  # ikke stoppe første gang

        if self._is_better(current, self.best):
            self.best = current
            self.num_bad = 0
            if model is not None:
                self.best_state = {k: v.detach().cpu().clone() for k, v in model.state_dict().items()}
            return False
        else:
            self.num_bad += 1
            if self.num_bad >= self.patience:
                self.should_stop = True
                return True
            return False

# ========================== 1) PATHS ==========================
CSV_PATH = Path("./chest-xray/Data_Entry_2017.csv")    # <- change if needed
BASE     = Path("./chest-xray")                        # contains images_001, images_002, ...
assert CSV_PATH.exists(), f"CSV not found: {CSV_PATH}"
assert BASE.exists(), f"Base images folder not found: {BASE}"

# ========================== 2) LOAD CSV ==========================
df = pd.read_csv(CSV_PATH)
print("df shape:", df.shape)

# ========================== 3) LABELS + MULTI-HOT ==========================
LABELS = [
    'Atelectasis','Cardiomegaly','Effusion','Infiltration','Mass','Nodule',
    'Pneumonia','Pneumothorax','Consolidation','Edema','Emphysema',
    'Fibrosis','Pleural_Thickening','Hernia'
]

def to_multi_hot(lbl_str: str):
    y = np.zeros(len(LABELS), dtype=np.float32)
    if isinstance(lbl_str, str) and lbl_str != "No Finding":
        for t in lbl_str.split("|"):
            if t in LABELS:
                y[LABELS.index(t)] = 1.0
    return y

Y = np.stack([to_multi_hot(s) for s in df["Finding Labels"].astype(str)], axis=0)
print("Y shape:", Y.shape)
print("Positives per class:", dict(zip(LABELS, Y.sum(axis=0).astype(int))))

# ========================== 4) PATIENT-LEVEL SPLIT (80/10/10) ==========================
def stable_bucket(pid: str) -> int:
    return int(hashlib.md5(pid.encode()).hexdigest(), 16) % 10

df["Patient ID"] = df["Patient ID"].astype(str)
bucket = df["Patient ID"].map(stable_bucket)
train_df = df[bucket < 8].reset_index(drop=True)
val_df   = df[bucket == 8].reset_index(drop=True)
test_df  = df[bucket == 9].reset_index(drop=True)
print("Split sizes -> Train:", len(train_df), "Val:", len(val_df), "Test:", len(test_df))

# ========================== 5) INDEX FILES ACROSS SHARDS ==========================
# Layout forventet: BASE / images_XXX / images / *.png
name_to_path = {}
for p in BASE.glob("images_*/images/*.png"):
    name_to_path[p.name] = str(p)

print("Indexed files:", len(name_to_path))
first20 = df["Image Index"].head(20).tolist()
missing20 = [n for n in first20 if n not in name_to_path]
print("Missing among first 20:", len(missing20))
if missing20:
    print("Example missing:", missing20[:5])

# ========================== 6) DATASET / DATALOADERS ==========================
IMG_SIZE = 384
train_tfms = transforms.Compose([
    transforms.Resize(int(IMG_SIZE*1.1)),
    transforms.RandomResizedCrop(IMG_SIZE, scale=(0.8,1.0)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225]),
])
val_tfms = transforms.Compose([
    transforms.Resize(IMG_SIZE),
    transforms.CenterCrop(IMG_SIZE),
    transforms.ToTensor(),
    transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225]),
])

def row_to_multi_hot_tensor(row):
    return torch.tensor(to_multi_hot(row["Finding Labels"]), dtype=torch.float32)

class ChestXray(Dataset):
    def __init__(self, df, index_map, tfm):
        self.df = df.reset_index(drop=True)
        self.idx = index_map
        self.tfm = tfm
    def __len__(self):
        return len(self.df)
    def __getitem__(self, i):
        r = self.df.iloc[i]
        fname = r["Image Index"]
        img_path = self.idx.get(fname)
        if img_path is None:
            raise FileNotFoundError(f"Image not indexed: {fname}")
        img = Image.open(img_path).convert("RGB")
        x = self.tfm(img)
        y = row_to_multi_hot_tensor(r)
        return x, y

train_ds = ChestXray(train_df, name_to_path, train_tfms)
val_ds   = ChestXray(val_df,   name_to_path, val_tfms)
test_ds  = ChestXray(test_df,  name_to_path, val_tfms)

BATCH_SIZE  = 64
NUM_WORKERS = 2
PIN_MEMORY  = (DEVICE == "cuda")

train_dl = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True,
                      num_workers=NUM_WORKERS, pin_memory=PIN_MEMORY)
val_dl   = DataLoader(val_ds,   batch_size=BATCH_SIZE, shuffle=False,
                      num_workers=NUM_WORKERS, pin_memory=PIN_MEMORY)
test_dl  = DataLoader(test_ds,  batch_size=BATCH_SIZE, shuffle=False,
                      num_workers=NUM_WORKERS, pin_memory=PIN_MEMORY)

print("DL sizes ->", len(train_ds), len(val_ds), len(test_ds))

# ========================== 7) MODEL ==========================
model = models.densenet121(weights=models.DenseNet121_Weights.IMAGENET1K_V1)
in_features = model.classifier.in_features
model.classifier = nn.Linear(in_features, len(LABELS))
model = model.to(DEVICE)

# ========================== 8) LOSS (pos_weight fra TRAIN) ==========================
train_multi = np.vstack(train_df["Finding Labels"].astype(str).map(
    lambda s: np.array(to_multi_hot(s), dtype=np.float32)
).values)
pos = train_multi.sum(axis=0)    # per-klasse positives i TRAIN
N = len(train_df)
pos = np.clip(pos, 1.0, None)    # unngå div-by-zero
pos_weight = torch.tensor((N - pos) / pos, dtype=torch.float32, device=DEVICE)

criterion = nn.BCEWithLogitsLoss(pos_weight=pos_weight)
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-4, weight_decay=1e-4)

scaler = torch.cuda.amp.GradScaler(enabled=(DEVICE == "cuda"))

# ========================== 9) EVALUATION HELPERS ==========================
def evaluate_with_loss(model, loader, criterion, threshold=0.5):
    model.eval()
    total_loss, total_n = 0.0, 0
    ys, ps = [], []

    with torch.no_grad():
        for xb, yb in loader:
            xb, yb = xb.to(DEVICE), yb.to(DEVICE)
            logits = model(xb)
            loss = criterion(logits, yb)
            total_loss += loss.item() * xb.size(0)
            total_n    += xb.size(0)

            probs = torch.sigmoid(logits)
            ys.append(yb.cpu()); ps.append(probs.cpu())

    val_loss = total_loss / max(total_n, 1)

    ys = torch.cat(ys, 0).numpy()
    ps = torch.cat(ps, 0).numpy()

    # AUROC per klasse
    aurocs = []
    for c in range(len(LABELS)):
        y_c, p_c = ys[:, c], ps[:, c]
        try:
            aurocs.append(roc_auc_score(y_c, p_c))
        except ValueError:
            aurocs.append(np.nan)
    mean_auc = float(np.nanmean(aurocs))

    preds = (ps >= threshold).astype("int32")
    micro_f1 = f1_score(ys, preds, average="micro", zero_division=0)
    macro_f1 = f1_score(ys, preds, average="macro", zero_division=0)

    return val_loss, mean_auc, dict(zip(LABELS, aurocs)), micro_f1, macro_f1

# Wrapper så TEST-delen funker
def evaluate(model, loader, threshold=0.5):
    _, mean_auc, per_cls, micro_f1, macro_f1 = evaluate_with_loss(model, loader, criterion, threshold)
    return mean_auc, per_cls, micro_f1, macro_f1

# ========================== 10) TRAIN with CHECKPOINTS ==========================
best_path = "densenet121_best.pt"
last_path = "densenet121_last.pt"
RESUME    = False
EPOCHS    = 100

def make_ckpt(epoch, best_auc, scheduler=None):
    return {
        "epoch": epoch,
        "best_auc": best_auc,
        "model_state": model.state_dict(),
        "optimizer_state": optimizer.state_dict(),
        "scheduler_state": (scheduler.state_dict() if scheduler is not None else None),
        "scaler_state": scaler.state_dict(),
    }

def save_last(epoch, best_auc, scheduler=None):
    torch.save(make_ckpt(epoch, best_auc, scheduler), last_path)
    print(f" Saved last: {last_path} (epoch={epoch})")

def save_best(epoch, best_auc, scheduler=None):
    torch.save(make_ckpt(epoch, best_auc, scheduler), best_path)
    print(f" Saved BEST: {best_path} (epoch={epoch}, best_auc={best_auc:.4f})")

# Opprett scheduler på forhånd
steps_per_epoch = len(train_dl)
scheduler = torch.optim.lr_scheduler.OneCycleLR(
    optimizer, max_lr=5e-4, steps_per_epoch=steps_per_epoch, epochs=EPOCHS
)

# Resume (valgfritt)
start_epoch = 1
best_auc = -1.0
if RESUME and os.path.exists(last_path):
    ckpt = torch.load(last_path, map_location=DEVICE)
    model.load_state_dict(ckpt["model_state"])
    optimizer.load_state_dict(ckpt["optimizer_state"])
    if ckpt.get("scheduler_state") is not None:
        scheduler.load_state_dict(ckpt["scheduler_state"])
    scaler.load_state_dict(ckpt["scaler_state"])
    start_epoch = ckpt["epoch"] + 1
    best_auc    = ckpt.get("best_auc", best_auc)
    print(f"   Resuming from epoch {start_epoch} (best_auc={best_auc:.4f})")
else:
    print(" Starting fresh training")

# Early stopping
early_stopper = EarlyStopping(patience=8, min_delta=1e-3, mode='max', restore_best=True)

for ep in range(start_epoch, EPOCHS + 1):
    model.train()
    running_loss = 0.0
    n_seen = 0

    for i, (xb, yb) in enumerate(tqdm(train_dl, desc=f"Epoch {ep}/{EPOCHS}", leave=False)):
        xb = xb.to(DEVICE, non_blocking=True)
        yb = yb.to(DEVICE, non_blocking=True)

        optimizer.zero_grad(set_to_none=True)
        with torch.cuda.amp.autocast(enabled=(DEVICE=="cuda")):
            logits = model(xb)
            loss = criterion(logits, yb)

        scaler.scale(loss).backward()
        # (valgfritt) gradient clipping:
        # scaler.unscale_(optimizer)
        # torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=5.0)
        scaler.step(optimizer)
        scaler.update()

        scheduler.step()  # OneCycleLR per batch

        running_loss += loss.item() * xb.size(0)
        n_seen += xb.size(0)

    train_loss = running_loss / max(n_seen, 1)

    # --- VAL ---
    val_loss, val_mean_auc, _, val_micro_f1, val_macro_f1 = evaluate_with_loss(model, val_dl, criterion)
    print(f"Epoch {ep:03d}/{EPOCHS} | "
          f" mean AUROC={val_mean_auc:.4f} | microF1={val_micro_f1:.4f} | macroF1={val_macro_f1:.4f} | "
          f"train_loss={train_loss:.4f} | val_loss={val_loss:.4f}")

    # Early stop på AUROC
    if early_stopper.step(val_mean_auc, model=model):
        print(f"Early stopping at epoch {ep} (best={early_stopper.best:.4f})")
        if early_stopper.restore_best and early_stopper.best_state is not None:
            model.load_state_dict(early_stopper.best_state)
            model.to(DEVICE)
        # valgfritt: lagre beste etter restore
        save_best(ep, early_stopper.best, scheduler)
        break

    # Lagre "last" hver epoch
    #save_last(ep, best_auc, scheduler)

    # # Lagre "best" når forbedret
    # if val_mean_auc > best_auc:
    #     best_auc = val_mean_auc
    #     save_best(ep, best_auc, scheduler)

# ========================== 11) TEST ==========================
if Path(best_path).exists():
    ckpt = torch.load(best_path, map_location=DEVICE)
    model.load_state_dict(ckpt["model_state"])
    model.to(DEVICE)
    print(f" Loaded BEST from epoch {ckpt['epoch']} (best_auc={ckpt['best_auc']:.4f})")
else:
    print("⚠ BEST checkpoint not found, using last model weights in memory.")

test_mean_auc, test_per_cls, test_micro_f1, test_macro_f1 = evaluate(model, test_dl)
print(f"[TEST] mean AUROC={test_mean_auc:.4f} | microF1={test_micro_f1:.4f} | macroF1={test_macro_f1:.4f}")
print("Per-class AUROC:", {k: (None if np.isnan(v) else float(v)) for k, v in test_per_cls.items()})

# ========================== 12) (Optional) SINGLE-IMAGE PREDICT ==========================
@torch.no_grad()
def predict_image(img_path, threshold=0.5, top_k=5):
    model.eval()
    img = Image.open(img_path).convert("RGB")
    x = val_tfms(img).unsqueeze(0).to(DEVICE)
    probs = torch.sigmoid(model(x)).squeeze(0).cpu().numpy()
    pred_labels = [LABELS[i] for i, p in enumerate(probs) if p >= threshold]
    top_idx = np.argsort(-probs)[:top_k]
    top = [(LABELS[i], float(probs[i])) for i in top_idx]
    return dict(zip(LABELS, map(float, probs))), pred_labels, top

# Example:
# img_example = "/chest-xray/images_001/images/00001335_006.png"
# probs, preds, top5 = predict_image(img_example, threshold=0.5, top_k=5)
# print("Predicted (>=0.5):", preds)
# print("Top-5:", top5)


Device: cuda
df shape: (112120, 12)
Y shape: (112120, 14)
Positives per class: {'Atelectasis': 11559, 'Cardiomegaly': 2776, 'Effusion': 13317, 'Infiltration': 19894, 'Mass': 5782, 'Nodule': 6331, 'Pneumonia': 1431, 'Pneumothorax': 5302, 'Consolidation': 4667, 'Edema': 2303, 'Emphysema': 2516, 'Fibrosis': 1686, 'Pleural_Thickening': 3385, 'Hernia': 227}
Split sizes -> Train: 89644 Val: 11321 Test: 11155
Indexed files: 112120
Missing among first 20: 0
DL sizes -> 89644 11321 11155
 Starting fresh training


                                                                

KeyboardInterrupt: 

In [1]:
# ========================== SETUP ==========================
# !pip install -q pandas torch torchvision scikit-learn tqdm

import os
from pathlib import Path
import hashlib
import numpy as np
import pandas as pd
from PIL import Image, ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models
from sklearn.metrics import roc_auc_score, f1_score
from tqdm import tqdm

torch.backends.cudnn.benchmark = True
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print("Device:", DEVICE)

# ========================== EARLY STOPPING ==========================
class EarlyStopping:
    """
    Early stop når val_metric ikke forbedres innen 'patience' epoker.
    mode='max' brukes for metrikker som AUROC/F1 (vi vil opp),
    mode='min' for loss (vi vil ned).
    """
    def __init__(self, patience=8, min_delta=0.0, mode='max', restore_best=True):
        assert mode in ('max', 'min')
        self.patience = patience
        self.min_delta = min_delta
        self.mode = mode
        self.restore_best = restore_best

        self.best = None
        self.num_bad = 0
        self.should_stop = False
        self.best_state = None

    def _is_better(self, current, best):
        if self.mode == 'max':
            return current > best + self.min_delta
        else:
            return current < best - self.min_delta

    def step(self, current, model=None):
        if self.best is None:
            self.best = current
            if model is not None:
                self.best_state = {k: v.detach().cpu().clone() for k, v in model.state_dict().items()}
            return False  # ikke stoppe første gang

        if self._is_better(current, self.best):
            self.best = current
            self.num_bad = 0
            if model is not None:
                self.best_state = {k: v.detach().cpu().clone() for k, v in model.state_dict().items()}
            return False
        else:
            self.num_bad += 1
            if self.num_bad >= self.patience:
                self.should_stop = True
                return True
            return False

# ========================== 1) PATHS ==========================
CSV_PATH = Path("./chest-xray/Data_Entry_2017.csv")    # <- change if needed
BASE     = Path("./chest-xray")                        # contains images_001, images_002, ...
assert CSV_PATH.exists(), f"CSV not found: {CSV_PATH}"
assert BASE.exists(), f"Base images folder not found: {BASE}"

# ========================== 2) LOAD CSV ==========================
df = pd.read_csv(CSV_PATH)
print("df shape:", df.shape)

# ========================== 3) LABELS + MULTI-HOT ==========================
LABELS = [
    'Atelectasis','Cardiomegaly','Effusion','Infiltration','Mass','Nodule',
    'Pneumonia','Pneumothorax','Consolidation','Edema','Emphysema',
    'Fibrosis','Pleural_Thickening','Hernia'
]

def to_multi_hot(lbl_str: str):
    y = np.zeros(len(LABELS), dtype=np.float32)
    if isinstance(lbl_str, str) and lbl_str != "No Finding":
        for t in lbl_str.split("|"):
            if t in LABELS:
                y[LABELS.index(t)] = 1.0
    return y

Y = np.stack([to_multi_hot(s) for s in df["Finding Labels"].astype(str)], axis=0)
print("Y shape:", Y.shape)
print("Positives per class:", dict(zip(LABELS, Y.sum(axis=0).astype(int))))

# ========================== 4) PATIENT-LEVEL SPLIT (80/10/10) ==========================
def stable_bucket(pid: str) -> int:
    return int(hashlib.md5(pid.encode()).hexdigest(), 16) % 10

df["Patient ID"] = df["Patient ID"].astype(str)
bucket = df["Patient ID"].map(stable_bucket)
train_df = df[bucket < 8].reset_index(drop=True)
val_df   = df[bucket == 8].reset_index(drop=True)
test_df  = df[bucket == 9].reset_index(drop=True)
print("Split sizes -> Train:", len(train_df), "Val:", len(val_df), "Test:", len(test_df))

# ========================== 5) INDEX FILES ACROSS SHARDS ==========================
# Layout forventet: BASE / images_XXX / images / *.png
name_to_path = {}
for p in BASE.glob("images_*/images/*.png"):
    name_to_path[p.name] = str(p)

print("Indexed files:", len(name_to_path))
first20 = df["Image Index"].head(20).tolist()
missing20 = [n for n in first20 if n not in name_to_path]
print("Missing among first 20:", len(missing20))
if missing20:
    print("Example missing:", missing20[:5])

# ========================== 6) DATASET / DATALOADERS ==========================
IMG_SIZE = 384
train_tfms = transforms.Compose([
    transforms.Resize(int(IMG_SIZE*1.1)),
    transforms.RandomResizedCrop(IMG_SIZE, scale=(0.8,1.0)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225]),
])
val_tfms = transforms.Compose([
    transforms.Resize(IMG_SIZE),
    transforms.CenterCrop(IMG_SIZE),
    transforms.ToTensor(),
    transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225]),
])

def row_to_multi_hot_tensor(row):
    return torch.tensor(to_multi_hot(row["Finding Labels"]), dtype=torch.float32)

class ChestXray(Dataset):
    def __init__(self, df, index_map, tfm):
        self.df = df.reset_index(drop=True)
        self.idx = index_map
        self.tfm = tfm
    def __len__(self):
        return len(self.df)
    def __getitem__(self, i):
        r = self.df.iloc[i]
        fname = r["Image Index"]
        img_path = self.idx.get(fname)
        if img_path is None:
            raise FileNotFoundError(f"Image not indexed: {fname}")
        img = Image.open(img_path).convert("RGB")
        x = self.tfm(img)
        y = row_to_multi_hot_tensor(r)
        return x, y

train_ds = ChestXray(train_df, name_to_path, train_tfms)
val_ds   = ChestXray(val_df,   name_to_path, val_tfms)
test_ds  = ChestXray(test_df,  name_to_path, val_tfms)

BATCH_SIZE  = 64
NUM_WORKERS = 2
PIN_MEMORY  = (DEVICE == "cuda")

train_dl = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True,
                      num_workers=NUM_WORKERS, pin_memory=PIN_MEMORY)
val_dl   = DataLoader(val_ds,   batch_size=BATCH_SIZE, shuffle=False,
                      num_workers=NUM_WORKERS, pin_memory=PIN_MEMORY)
test_dl  = DataLoader(test_ds,  batch_size=BATCH_SIZE, shuffle=False,
                      num_workers=NUM_WORKERS, pin_memory=PIN_MEMORY)

print("DL sizes ->", len(train_ds), len(val_ds), len(test_ds))

# ========================== 7) MODEL ==========================
model = models.densenet121(weights=models.DenseNet121_Weights.IMAGENET1K_V1)
in_features = model.classifier.in_features
model.classifier = nn.Linear(in_features, len(LABELS))
model = model.to(DEVICE)

# ========================== 8) LOSS (pos_weight fra TRAIN) ==========================
train_multi = np.vstack(train_df["Finding Labels"].astype(str).map(
    lambda s: np.array(to_multi_hot(s), dtype=np.float32)
).values)
pos = train_multi.sum(axis=0)    # per-klasse positives i TRAIN
N = len(train_df)
pos = np.clip(pos, 1.0, None)    # unngå div-by-zero
pos_weight = torch.tensor((N - pos) / pos, dtype=torch.float32, device=DEVICE)

criterion = nn.BCEWithLogitsLoss(pos_weight=pos_weight)
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-4, weight_decay=1e-4)

scaler = torch.cuda.amp.GradScaler(enabled=(DEVICE == "cuda"))

# ========================== 9) EVALUATION HELPERS ==========================
def evaluate_with_loss(model, loader, criterion, threshold=0.5):
    model.eval()
    total_loss, total_n = 0.0, 0
    ys, ps = [], []

    with torch.no_grad():
        for xb, yb in loader:
            xb, yb = xb.to(DEVICE), yb.to(DEVICE)
            logits = model(xb)
            loss = criterion(logits, yb)
            total_loss += loss.item() * xb.size(0)
            total_n    += xb.size(0)

            probs = torch.sigmoid(logits)
            ys.append(yb.cpu()); ps.append(probs.cpu())

    val_loss = total_loss / max(total_n, 1)

    ys = torch.cat(ys, 0).numpy()
    ps = torch.cat(ps, 0).numpy()

    # AUROC per klasse
    aurocs = []
    for c in range(len(LABELS)):
        y_c, p_c = ys[:, c], ps[:, c]
        try:
            aurocs.append(roc_auc_score(y_c, p_c))
        except ValueError:
            aurocs.append(np.nan)
    mean_auc = float(np.nanmean(aurocs))

    preds = (ps >= threshold).astype("int32")
    micro_f1 = f1_score(ys, preds, average="micro", zero_division=0)
    macro_f1 = f1_score(ys, preds, average="macro", zero_division=0)

    return val_loss, mean_auc, dict(zip(LABELS, aurocs)), micro_f1, macro_f1

# Wrapper så TEST-delen funker
def evaluate(model, loader, threshold=0.5):
    _, mean_auc, per_cls, micro_f1, macro_f1 = evaluate_with_loss(model, loader, criterion, threshold)
    return mean_auc, per_cls, micro_f1, macro_f1

# ========================== 10) TRAIN with CHECKPOINTS ==========================
best_path = "densenet121_best.pt"
last_path = "densenet121_last.pt"
RESUME    = False
EPOCHS    = 100

def make_ckpt(epoch, best_auc, scheduler=None):
    return {
        "epoch": epoch,
        "best_auc": best_auc,
        "model_state": model.state_dict(),
        "optimizer_state": optimizer.state_dict(),
        "scheduler_state": (scheduler.state_dict() if scheduler is not None else None),
        "scaler_state": scaler.state_dict(),
    }

def save_last(epoch, best_auc, scheduler=None):
    torch.save(make_ckpt(epoch, best_auc, scheduler), last_path)
    print(f" Saved last: {last_path} (epoch={epoch})")

def save_best(epoch, best_auc, scheduler=None):
    torch.save(make_ckpt(epoch, best_auc, scheduler), best_path)
    print(f" Saved BEST: {best_path} (epoch={epoch}, best_auc={best_auc:.4f})")

# Opprett scheduler på forhånd
steps_per_epoch = len(train_dl)
scheduler = torch.optim.lr_scheduler.OneCycleLR(
    optimizer, max_lr=5e-4, steps_per_epoch=steps_per_epoch, epochs=EPOCHS
)

# Resume (valgfritt)
start_epoch = 1
best_auc = -1.0
if RESUME and os.path.exists(last_path):
    ckpt = torch.load(last_path, map_location=DEVICE)
    model.load_state_dict(ckpt["model_state"])
    optimizer.load_state_dict(ckpt["optimizer_state"])
    if ckpt.get("scheduler_state") is not None:
        scheduler.load_state_dict(ckpt["scheduler_state"])
    scaler.load_state_dict(ckpt["scaler_state"])
    start_epoch = ckpt["epoch"] + 1
    best_auc    = ckpt.get("best_auc", best_auc)
    print(f"   Resuming from epoch {start_epoch} (best_auc={best_auc:.4f})")
else:
    print(" Starting fresh training")

# Early stopping
early_stopper = EarlyStopping(patience=8, min_delta=1e-3, mode='max', restore_best=True)

for ep in range(start_epoch, EPOCHS + 1):
    model.train()
    running_loss = 0.0
    n_seen = 0

    for i, (xb, yb) in enumerate(tqdm(train_dl, desc=f"Epoch {ep}/{EPOCHS}", leave=False)):
        xb = xb.to(DEVICE, non_blocking=True)
        yb = yb.to(DEVICE, non_blocking=True)

        optimizer.zero_grad(set_to_none=True)
        with torch.cuda.amp.autocast(enabled=(DEVICE=="cuda")):
            logits = model(xb)
            loss = criterion(logits, yb)

        scaler.scale(loss).backward()
        # (valgfritt) gradient clipping:
        # scaler.unscale_(optimizer)
        # torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=5.0)
        scaler.step(optimizer)
        scaler.update()

        scheduler.step()  # OneCycleLR per batch

        running_loss += loss.item() * xb.size(0)
        n_seen += xb.size(0)

    train_loss = running_loss / max(n_seen, 1)

    # --- VAL ---
    val_loss, val_mean_auc, _, val_micro_f1, val_macro_f1 = evaluate_with_loss(model, val_dl, criterion)
    print(f"Epoch {ep:03d}/{EPOCHS} | "
          f" mean AUROC={val_mean_auc:.4f} | microF1={val_micro_f1:.4f} | macroF1={val_macro_f1:.4f} | "
          f"train_loss={train_loss:.4f} | val_loss={val_loss:.4f}")

    # Early stop på AUROC
    if early_stopper.step(val_mean_auc, model=model):
        print(f"Early stopping at epoch {ep} (best={early_stopper.best:.4f})")
        if early_stopper.restore_best and early_stopper.best_state is not None:
            model.load_state_dict(early_stopper.best_state)
            model.to(DEVICE)
        # valgfritt: lagre beste etter restore
        save_best(ep, early_stopper.best, scheduler)
        break

    # Lagre "last" hver epoch
    #save_last(ep, best_auc, scheduler)

    # # Lagre "best" når forbedret
    # if val_mean_auc > best_auc:
    #     best_auc = val_mean_auc
    #     save_best(ep, best_auc, scheduler)

# ========================== 11) TEST ==========================
if Path(best_path).exists():
    ckpt = torch.load(best_path, map_location=DEVICE)
    model.load_state_dict(ckpt["model_state"])
    model.to(DEVICE)
    print(f" Loaded BEST from epoch {ckpt['epoch']} (best_auc={ckpt['best_auc']:.4f})")
else:
    print("⚠ BEST checkpoint not found, using last model weights in memory.")

test_mean_auc, test_per_cls, test_micro_f1, test_macro_f1 = evaluate(model, test_dl)
print(f"[TEST] mean AUROC={test_mean_auc:.4f} | microF1={test_micro_f1:.4f} | macroF1={test_macro_f1:.4f}")
print("Per-class AUROC:", {k: (None if np.isnan(v) else float(v)) for k, v in test_per_cls.items()})

# ========================== 12) (Optional) SINGLE-IMAGE PREDICT ==========================
@torch.no_grad()
def predict_image(img_path, threshold=0.5, top_k=5):
    model.eval()
    img = Image.open(img_path).convert("RGB")
    x = val_tfms(img).unsqueeze(0).to(DEVICE)
    probs = torch.sigmoid(model(x)).squeeze(0).cpu().numpy()
    pred_labels = [LABELS[i] for i, p in enumerate(probs) if p >= threshold]
    top_idx = np.argsort(-probs)[:top_k]
    top = [(LABELS[i], float(probs[i])) for i in top_idx]
    return dict(zip(LABELS, map(float, probs))), pred_labels, top

# Example:
# img_example = "/chest-xray/images_001/images/00001335_006.png"
# probs, preds, top5 = predict_image(img_example, threshold=0.5, top_k=5)
# print("Predicted (>=0.5):", preds)
# print("Top-5:", top5)


Device: cuda
df shape: (112120, 12)
Y shape: (112120, 14)
Positives per class: {'Atelectasis': 11559, 'Cardiomegaly': 2776, 'Effusion': 13317, 'Infiltration': 19894, 'Mass': 5782, 'Nodule': 6331, 'Pneumonia': 1431, 'Pneumothorax': 5302, 'Consolidation': 4667, 'Edema': 2303, 'Emphysema': 2516, 'Fibrosis': 1686, 'Pleural_Thickening': 3385, 'Hernia': 227}
Split sizes -> Train: 89644 Val: 11321 Test: 11155
Indexed files: 112120
Missing among first 20: 0
DL sizes -> 89644 11321 11155
 Starting fresh training


                                                                

Epoch 001/100 |  mean AUROC=0.7822 | microF1=0.2021 | macroF1=0.1766 | train_loss=1.1280 | val_loss=1.0732


                                                                

Epoch 002/100 |  mean AUROC=0.8059 | microF1=0.2291 | macroF1=0.2007 | train_loss=0.9923 | val_loss=1.0252


                                                                

Epoch 003/100 |  mean AUROC=0.8198 | microF1=0.2399 | macroF1=0.2122 | train_loss=0.9347 | val_loss=0.9865


                                                                

Epoch 004/100 |  mean AUROC=0.8248 | microF1=0.2612 | macroF1=0.2259 | train_loss=0.8961 | val_loss=0.9957


                                                                

Epoch 005/100 |  mean AUROC=0.8250 | microF1=0.2350 | macroF1=0.2136 | train_loss=0.8649 | val_loss=0.9901


                                                                

Epoch 006/100 |  mean AUROC=0.8291 | microF1=0.2473 | macroF1=0.2178 | train_loss=0.8595 | val_loss=0.9713


                                                                

Epoch 007/100 |  mean AUROC=0.8136 | microF1=0.2347 | macroF1=0.2272 | train_loss=0.8544 | val_loss=1.0440


                                                                

Epoch 008/100 |  mean AUROC=0.8335 | microF1=0.2740 | macroF1=0.2496 | train_loss=0.8508 | val_loss=1.0056


                                                                

Epoch 009/100 |  mean AUROC=0.8251 | microF1=0.2457 | macroF1=0.2223 | train_loss=0.8520 | val_loss=1.0081


                                                                 

Epoch 010/100 |  mean AUROC=0.8063 | microF1=0.2472 | macroF1=0.2213 | train_loss=0.8310 | val_loss=1.0795


                                                                 

Epoch 011/100 |  mean AUROC=0.8296 | microF1=0.2713 | macroF1=0.2428 | train_loss=0.8529 | val_loss=1.0437


                                                                 

Epoch 012/100 |  mean AUROC=0.8170 | microF1=0.2195 | macroF1=0.2014 | train_loss=0.8580 | val_loss=1.0308


                                                                 

Epoch 013/100 |  mean AUROC=0.8213 | microF1=0.2405 | macroF1=0.2238 | train_loss=0.8623 | val_loss=1.0297


                                                                 

Epoch 014/100 |  mean AUROC=0.8354 | microF1=0.2619 | macroF1=0.2424 | train_loss=0.8748 | val_loss=0.9833


                                                                 

Epoch 015/100 |  mean AUROC=0.8295 | microF1=0.2707 | macroF1=0.2383 | train_loss=0.8636 | val_loss=0.9984


                                                                 

Epoch 016/100 |  mean AUROC=0.8188 | microF1=0.2414 | macroF1=0.2205 | train_loss=0.8755 | val_loss=1.0491


                                                                 

Epoch 017/100 |  mean AUROC=0.8277 | microF1=0.2495 | macroF1=0.2388 | train_loss=0.8642 | val_loss=0.9976


                                                                 

Epoch 018/100 |  mean AUROC=0.8265 | microF1=0.2424 | macroF1=0.2312 | train_loss=0.8699 | val_loss=1.0046


                                                                 

Epoch 019/100 |  mean AUROC=0.8296 | microF1=0.2498 | macroF1=0.2189 | train_loss=0.8768 | val_loss=1.0581


                                                                 

Epoch 020/100 |  mean AUROC=0.8349 | microF1=0.2769 | macroF1=0.2510 | train_loss=0.8577 | val_loss=1.0828


                                                                 

Epoch 021/100 |  mean AUROC=0.8131 | microF1=0.2395 | macroF1=0.2205 | train_loss=0.8752 | val_loss=1.0627


                                                                 

Epoch 022/100 |  mean AUROC=0.8309 | microF1=0.2446 | macroF1=0.2325 | train_loss=0.8640 | val_loss=1.0302
Early stopping at epoch 22 (best=0.8354)
 Saved BEST: densenet121_best.pt (epoch=22, best_auc=0.8354)
 Loaded BEST from epoch 22 (best_auc=0.8354)
[TEST] mean AUROC=0.8367 | microF1=0.2654 | macroF1=0.2401
Per-class AUROC: {'Atelectasis': 0.8198790424048755, 'Cardiomegaly': 0.8958375221038196, 'Effusion': 0.8881537971582047, 'Infiltration': 0.7060026450977478, 'Mass': 0.8461093517698364, 'Nodule': 0.7895330249974442, 'Pneumonia': 0.761585859477212, 'Pneumothorax': 0.8913858200105019, 'Consolidation': 0.8085117157629805, 'Edema': 0.9063445855614974, 'Emphysema': 0.9049632597962061, 'Fibrosis': 0.8102373210725509, 'Pleural_Thickening': 0.8110970539528977, 'Hernia': 0.8741126422382295}
