### GPU Sanity Check

In [1]:
# Safer allocator settings (help fragmentation on Windows/WDDM)
import os
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = (
    "backend:cudaMallocAsync,"
    "expandable_segments:True,"
    "max_split_size_mb:64,"
    "garbage_collection_threshold:0.8"
)

import torch
print("CUDA available:", torch.cuda.is_available())
if torch.cuda.is_available():
    print("GPU:", torch.cuda.get_device_name(0))


CUDA available: True
GPU: NVIDIA GeForce RTX 3050 Ti Laptop GPU


In [3]:
# === Recreate classification datasets + loaders (ONLY your Parquet files) ===
from pathlib import Path
from io import BytesIO
from typing import Sequence, Optional, Callable, Union
import numpy as np
import pyarrow.parquet as pq
from PIL import Image
import torch
from torch.utils.data import Dataset, DataLoader
from datasets import load_dataset, Dataset as HFDataset
import torch.nn.functional as F

# ---- exact Parquet masks you provided ----
TRAIN_PARQUETS = [
    r"C:\Users\sedya\VScodeProjects\Coral-reefs-DBL4\data_preprocessing\coralscapes_export\parquet\train\train_part001.parquet",
    r"C:\Users\sedya\VScodeProjects\Coral-reefs-DBL4\data_preprocessing\coralscapes_export\parquet\train\train_part002.parquet",
]
VAL_PARQUETS = [
    r"C:\Users\sedya\VScodeProjects\Coral-reefs-DBL4\data_preprocessing\coralscapes_export\parquet\validation\validation_part001.parquet",
]
MASK_COLUMN = "label_health_rgb_png"

# ---- mask indexer: dataset index -> PNG ----
class ParquetMasksByIndex:
    def __init__(self, parquet_paths: Sequence[Union[str, Path]], column_png: str = MASK_COLUMN):
        self._tables = [pq.read_table(Path(p)) for p in parquet_paths]
        for t in self._tables:
            if "index" not in t.column_names or column_png not in t.column_names:
                raise ValueError(f"Parquet must have 'index' and '{column_png}'. Got: {t.column_names}")
        self._col = column_png
        self._map = {}
        for tid, t in enumerate(self._tables):
            for rid, ds_idx in enumerate(t["index"].to_pylist()):
                self._map[int(ds_idx)] = (tid, rid)
        print(f"[masks] mapped {len(self._map)} indices from {len(self._tables)} file(s)")

    def get_mask_pil(self, ds_index: int) -> Image.Image:
        tid, rid = self._map[ds_index]
        cell = self._tables[tid][self._col][rid].as_py()
        if isinstance(cell, memoryview): cell = cell.tobytes()
        elif isinstance(cell, bytearray): cell = bytes(cell)
        return Image.open(BytesIO(cell)).convert("RGB")

# ---- HF images: EPFL-ECEO/coralscapes (train/validation) ----
hf_all = load_dataset("EPFL-ECEO/coralscapes")
hf_train: HFDataset = hf_all["train"]
hf_val:   HFDataset = hf_all["validation"]

# ---- PIL -> tensor (copy() avoids "non-writable" warning) ----
def pil_to_tensor_rgb(img: Image.Image) -> torch.Tensor:
    arr = np.asarray(img.convert("RGB"), dtype=np.uint8).copy()
    return torch.from_numpy(arr).permute(2, 0, 1).float() / 255.0

# ---- bind images (HF) + masks (your Parquets only), keep only covered indices ----
class CoralScapesImagesMasks(Dataset):
    def __init__(self, img_ds: HFDataset, masks: ParquetMasksByIndex,
                 img_transform: Optional[Callable] = None,
                 mask_transform: Optional[Callable] = None):
        self.img_ds = img_ds
        self.masks = masks
        self.img_tf = img_transform
        self.mask_tf = mask_transform
        n = len(self.img_ds)
        self.indices = [i for i in range(n) if i in masks._map]
        print(f"[dataset] kept {len(self.indices)}/{n} indices (mask-covered).")

    def __len__(self): return len(self.indices)

    def __getitem__(self, j: int):
        idx = self.indices[j]
        rec = self.img_ds[idx]
        img = rec["image"].convert("RGB")
        mask = self.masks.get_mask_pil(idx)
        if self.img_tf is not None:  img  = self.img_tf(img)
        if self.mask_tf is not None: mask = self.mask_tf(mask)
        return img, mask

masks_train = ParquetMasksByIndex(TRAIN_PARQUETS, MASK_COLUMN)
masks_val   = ParquetMasksByIndex(VAL_PARQUETS,   MASK_COLUMN)
cs_train = CoralScapesImagesMasks(hf_train, masks_train, pil_to_tensor_rgb, pil_to_tensor_rgb)
cs_val   = CoralScapesImagesMasks(hf_val,   masks_val,   pil_to_tensor_rgb, pil_to_tensor_rgb)

# ---- classification wrapper → (image_128x128, label) ----
class MaskToBinaryLabel128(Dataset):
    def __init__(self, base_ds: Dataset, size=128):
        self.base = base_ds
        self.size = size
    def __len__(self): return len(self.base)
    def __getitem__(self, idx):
        img, mask = self.base[idx]  # tensors (3,H,W)
        if img.shape[-2:] != (self.size, self.size):
            img  = F.interpolate(img.unsqueeze(0),  size=(self.size,self.size), mode="bilinear", align_corners=False).squeeze(0)
            mask = F.interpolate(mask.unsqueeze(0), size=(self.size,self.size), mode="bilinear", align_corners=False).squeeze(0)
        red  = mask[0].sum().item()
        blue = mask[2].sum().item()
        label = 1 if blue > red else 0  # bleached if blue energy > red
        return img, torch.tensor(label, dtype=torch.long)

train_cls = MaskToBinaryLabel128(cs_train, size=128)
val_cls   = MaskToBinaryLabel128(cs_val,   size=128)

print(f"[ready] train_cls len={len(train_cls)} | val_cls len={len(val_cls)}")

# ---- loaders (GPU-friendly: pin_memory). If a global `model` exists, run a 1-batch smoke test.
BATCH = 32  # if OOM: 16 → 8
train_loader = DataLoader(train_cls, batch_size=BATCH, shuffle=True,  num_workers=0, pin_memory=True)
val_loader   = DataLoader(val_cls,   batch_size=BATCH, shuffle=False, num_workers=0, pin_memory=True)
print(f"[loaders] train batches ≈ {len(train_loader)} | val batches ≈ {len(val_loader)}")

if 'model' in globals():
    from torch.cuda.amp import autocast, GradScaler
    DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    criterion = torch.nn.CrossEntropyLoss()
    opt = torch.optim.Adam(model.parameters(), lr=1e-3)
    scaler = GradScaler(enabled=(DEVICE.type=="cuda"))
    xb, yb = next(iter(train_loader))
    xb = xb.to(DEVICE, non_blocking=True); yb = yb.to(DEVICE, non_blocking=True)
    with autocast(enabled=(DEVICE.type=="cuda")):
        logits = model(xb); loss = criterion(logits, yb)
    scaler.scale(loss).backward(); scaler.step(opt); scaler.update()
    print("[smoke] 1 batch OK on", DEVICE, "| loss:", float(loss))
else:
    print("[note] Loaders ready. Now run your model cell and training loop.")


[masks] mapped 1517 indices from 2 file(s)
[masks] mapped 166 indices from 1 file(s)
[dataset] kept 1517/1517 indices (mask-covered).
[dataset] kept 166/166 indices (mask-covered).
[ready] train_cls len=1517 | val_cls len=166
[loaders] train batches ≈ 48 | val batches ≈ 6
[note] Loaders ready. Now run your model cell and training loop.


In [4]:
import torch
from torch import nn

# Device
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", DEVICE, "|", torch.cuda.get_device_name(0) if DEVICE.type=="cuda" else "")
torch.backends.cudnn.benchmark = True  # speed boost

# Keras-like CNN (same conv stack you showed; GAP replaces giant Flatten)
class KerasLikeCNN_GAP(nn.Module):
    def __init__(self, p_drop=0.5):
        super().__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 32, 3, padding=0),  # 128 -> 126
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2),                 # 126 -> 63
            nn.Conv2d(32, 64, 3, padding=0), # 63 -> 61
            nn.ReLU(inplace=True),
            nn.Conv2d(64, 32, 1), nn.ReLU(inplace=True),  # Dense(32) over maps
            nn.Conv2d(32, 64, 1), nn.ReLU(inplace=True),  # Dense(64) over maps
            nn.Conv2d(64, 128, 3, padding=0), nn.ReLU(inplace=True),  # 61 -> 59
            nn.Dropout(p=p_drop),
            nn.AdaptiveAvgPool2d(1),         # replaces huge Flatten(59*59*128)
        )
        self.head = nn.Linear(128, 2)        # binary logits

    def forward(self, x):
        x = self.features(x).flatten(1)      # (B,128,1,1) -> (B,128)
        return self.head(x)

model = KerasLikeCNN_GAP(p_drop=0.5).to(DEVICE)
print("Model on:", next(model.parameters()).device)

# Optim, loss, AMP scaler
criterion = nn.CrossEntropyLoss()
opt = torch.optim.Adam(model.parameters(), lr=1e-3)
from torch.cuda.amp import autocast, GradScaler
scaler = GradScaler(enabled=(DEVICE.type=="cuda"))

# (optional) quick smoke test on one batch
xb, yb = next(iter(train_loader))
xb, yb = xb.to(DEVICE, non_blocking=True), yb.to(DEVICE, non_blocking=True)
with autocast(enabled=(DEVICE.type=="cuda")):
    logits = model(xb); loss = criterion(logits, yb)
scaler.scale(loss).backward(); scaler.step(opt); scaler.update()
print("Smoke batch OK | loss:", float(loss))


Using device: cuda | NVIDIA GeForce RTX 3050 Ti Laptop GPU
Model on: cuda:0


  scaler = GradScaler(enabled=(DEVICE.type=="cuda"))
  return data.pin_memory(device)
  with autocast(enabled=(DEVICE.type=="cuda")):


Smoke batch OK | loss: 0.65289306640625


In [5]:
import numpy as np
from collections import Counter

# 1) Build index lists the same way your datasets filtered them
train_indices = [i for i in range(len(hf_train)) if i in masks_train._map]
val_indices   = [i for i in range(len(hf_val))   if i in masks_val._map]

def label_from_mask_bytes(mask_pil) -> int:
    # unhealthy if blue channel energy > red channel energy
    arr = np.asarray(mask_pil.convert("RGB"), dtype=np.uint8)
    return 1 if int(arr[...,2].sum()) > int(arr[...,0].sum()) else 0

# 2) Precompute labels directly from original mask PNGs (no interpolation)
y_train = []
for idx in train_indices:
    y_train.append(label_from_mask_bytes(masks_train.get_mask_pil(idx)))
y_val = []
for idx in val_indices:
    y_val.append(label_from_mask_bytes(masks_val.get_mask_pil(idx)))

cnt_tr = Counter(y_train); cnt_va = Counter(y_val)
print("train label counts:", cnt_tr, "| val label counts:", cnt_va)


train label counts: Counter({0: 1470, 1: 47}) | val label counts: Counter({0: 153, 1: 13})


In [9]:
# 1) get training label counts (works with Images128WithLabels or any (x,y) dataset)
def get_label_counts(ds):
    if hasattr(ds, "y"):  # our Images128WithLabels exposes .y
        y = ds.y.cpu().tolist()
    else:
        y = [int(ds[i][1]) for i in range(len(ds))]
    n0 = sum(1 for t in y if t == 0); n1 = len(y) - n0
    return n0, n1

n0, n1 = get_label_counts(train_cls)
print(f"[labels] train counts → healthy(0)={n0} | unhealthy(1)={n1}")

# 2) Class-Balanced weights (effective number of samples, Cui et al.)
def class_balanced_weights(n_per_class, beta=0.99):
    w = []
    for n in n_per_class:
        n = max(1, n)
        w_c = (1 - beta) / (1 - beta**n)
        w.append(w_c)
    # normalize so mean weight ≈ 1 (helps keep loss scale stable)
    m = sum(w)/len(w)
    return torch.tensor([wc/m for wc in w], dtype=torch.float32, device=DEVICE)

cb_alpha = class_balanced_weights([n0, n1], beta=0.99)
print("[CB weights] alpha:", cb_alpha.tolist())

# 3) Focal Cross-Entropy (multiclass) with class-balanced alpha
def focal_ce_loss(logits, target, alpha=None, gamma=1.0):
    # CE per-sample
    ce = F.cross_entropy(logits, target, reduction="none")
    # p_t = prob of the true class
    pt = F.softmax(logits, dim=1).gather(1, target.view(-1,1)).squeeze(1).clamp_(1e-6, 1-1e-6)
    loss = ce * ((1 - pt) ** gamma)
    if alpha is not None:
        loss = loss * alpha[target]
    return loss.mean()

# 5) optimizer + AMP scaler (keep your existing model / GAP head)
criterion = lambda logits, y: focal_ce_loss(logits, y, alpha=cb_alpha, gamma=1.0)
opt = torch.optim.Adam(model.parameters(), lr=5e-4)   # slightly lower LR for stability
scaler = GradScaler(enabled=(DEVICE.type == "cuda"))
torch.backends.cudnn.benchmark = True

# 6) train with early stopping by val loss
class EarlyStopper:
    def __init__(self, patience=6, min_delta=1e-3):
        self.patience = patience; self.min_delta = min_delta
        self.best = float("inf"); self.count = 0
    def step(self, v):
        if self.best - v > self.min_delta:
            self.best = v; self.count = 0; return False
        self.count += 1; return self.count >= self.patience

def run_epoch(loader, train=True):
    model.train() if train else model.eval()
    loss_sum = correct = n = 0
    for xb, yb in loader:
        xb = xb.to(DEVICE, non_blocking=True); yb = yb.to(DEVICE, non_blocking=True)
        if train: opt.zero_grad(set_to_none=True)
        with torch.set_grad_enabled(train), autocast("cuda", enabled=(DEVICE.type=="cuda")):
            logits = model(xb)
            loss = criterion(logits, yb)
        if train:
            scaler.scale(loss).backward(); scaler.step(opt); scaler.update()
        loss_sum += loss.item() * xb.size(0)
        correct  += (logits.argmax(1) == yb).sum().item()
        n += xb.size(0)
    return loss_sum/max(1,n), correct/max(1,n)

EPOCHS = 2
early = EarlyStopper(patience=6, min_delta=1e-3)
best_state = None

for ep in range(1, EPOCHS+1):
    t0 = time.time()
    tr_loss, tr_acc = run_epoch(train_loader, True)
    va_loss, va_acc = run_epoch(val_loader,   False)
    print(f"Epoch {ep:02d}/{EPOCHS} - loss:{tr_loss:.4f} - acc:{tr_acc:.4f} - val_loss:{va_loss:.4f} - val_acc:{va_acc:.4f} - {time.time()-t0:.1f}s")
    if best_state is None or va_loss < (early.best - 1e-3):
        best_state = {k: v.detach().cpu().clone() for k, v in model.state_dict().items()}
    if early.step(va_loss):
        print(f"Epoch {ep}: early stopping (best val_loss={early.best:.4f})")
        break

if best_state: 
    model.load_state_dict(best_state)
    print("Loaded best weights.")


[labels] train counts → healthy(0)=1471 | unhealthy(1)=46
[CB weights] alpha: [0.5403340458869934, 1.4596658945083618]


  scaler = GradScaler(enabled=(DEVICE.type == "cuda"))


NameError: name 'time' is not defined

In [None]:
import torch

@torch.no_grad()
def collect_val_probs_and_labels(model, loader):
    model.eval()
    probs = []; labels = []
    for xb, yb in loader:
        xb = xb.to(DEVICE); yb = yb.to(DEVICE)
        with autocast("cuda", enabled=(DEVICE.type=="cuda")):
            logits = model(xb)
            p = F.softmax(logits, dim=1)[:, 1]  # P(class=1 = unhealthy)
        probs.append(p.cpu()); labels.append(yb.cpu())
    return torch.cat(probs), torch.cat(labels)

def metrics_from_preds(y_true, y_pred):
    cm = torch.zeros(2,2, dtype=torch.int64)
    for t,p in zip(y_true, y_pred): cm[t,p]+=1
    def prf(c):
        tp=cm[c,c].item(); fp=cm[:,c].sum().item()-tp; fn=cm[c,:].sum().item()-tp
        prec = tp / max(1, tp+fp); rec = tp / max(1, tp+fn)
        f1 = 0.0 if prec+rec==0 else 2*prec*rec/(prec+rec)
        return prec, rec, f1
    p0,r0,f0 = prf(0); p1,r1,f1 = prf(1)
    macro_f1 = 0.5*(f0+f1)
    acc = (y_true == y_pred).float().mean().item()
    return {"acc":acc, "macro_f1":macro_f1, "cm":cm, "per":{"healthy":(p0,r0,f0),"unhealthy":(p1,r1,f1)}}

# 1) collect probabilities for class=1 on the val set
probs, y_true = collect_val_probs_and_labels(model, val_loader)

# 2) sweep thresholds and pick the one that maximizes macro-F1 (balanced performance)
best = {"t":0.5, "macro_f1":-1}
for t in torch.linspace(0.1, 0.9, steps=17):  # 0.1 → 0.9 step 0.05
    y_pred = (probs >= t).long()
    m = metrics_from_preds(y_true, y_pred)
    if m["macro_f1"] > best["macro_f1"]:
        best = {"t": float(t), "macro_f1": m["macro_f1"], "metrics": m}

m = best["metrics"]; cm = m["cm"].numpy()
(p0,r0,f0) = m["per"]["healthy"]; (p1,r1,f1) = m["per"]["unhealthy"]
print(f"\nBest threshold t={best['t']:.2f} (by macro-F1={best['macro_f1']:.3f})")
print(f"Overall: acc={m['acc']:.3f} | macro-F1={m['macro_f1']:.3f}")
print("Confusion Matrix (rows=true, cols=pred):")
#print(f"            pred: healthy   pred: unhealthy")
#print(f"true healthy     {cm[0,0]:>6}          {cm[0,1]:>6}")
#print(f"true unhealthy   {cm[1,0]:>6}          {cm[1,1]:>6}")
print(f"healthy   | precision={p0:.3f} | recall={r0:.3f} | f1={f0:.3f}")
print(f"unhealthy | precision={p1:.3f} | recall={r1:.3f} | f1={f1:.3f}")
