### GPU Sanity Check

In [1]:
# Safer allocator settings (help fragmentation on Windows/WDDM)
import os
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = (
    "backend:cudaMallocAsync,"
    "expandable_segments:True,"
    "max_split_size_mb:64,"
    "garbage_collection_threshold:0.8"
)

import torch
print("CUDA available:", torch.cuda.is_available())
if torch.cuda.is_available():
    print("GPU:", torch.cuda.get_device_name(0))


CUDA available: True
GPU: NVIDIA GeForce RTX 3050 Ti Laptop GPU


### CNN Model Training

In [2]:
# === Recreate classification datasets + loaders (ONLY your Parquet files) ===
from pathlib import Path
from io import BytesIO
from typing import Sequence, Optional, Callable, Union
import numpy as np
import pyarrow.parquet as pq
from PIL import Image
import torch
from torch.utils.data import Dataset, DataLoader
from datasets import load_dataset, Dataset as HFDataset
import torch.nn.functional as F

# ---- exact Parquet masks you provided ----
TRAIN_PARQUETS = [
    r"C:\Users\sedya\VScodeProjects\Coral-reefs-DBL4\data_preprocessing\coralscapes_export\parquet\train\train_part001.parquet",
    r"C:\Users\sedya\VScodeProjects\Coral-reefs-DBL4\data_preprocessing\coralscapes_export\parquet\train\train_part002.parquet",
]
VAL_PARQUETS = [
    r"C:\Users\sedya\VScodeProjects\Coral-reefs-DBL4\data_preprocessing\coralscapes_export\parquet\validation\validation_part001.parquet",
]
MASK_COLUMN = "label_health_rgb_png"

# ---- mask indexer: dataset index -> PNG ----
class ParquetMasksByIndex:
    def __init__(self, parquet_paths: Sequence[Union[str, Path]], column_png: str = MASK_COLUMN):
        self._tables = [pq.read_table(Path(p)) for p in parquet_paths]
        for t in self._tables:
            if "index" not in t.column_names or column_png not in t.column_names:
                raise ValueError(f"Parquet must have 'index' and '{column_png}'. Got: {t.column_names}")
        self._col = column_png
        self._map = {}
        for tid, t in enumerate(self._tables):
            for rid, ds_idx in enumerate(t["index"].to_pylist()):
                self._map[int(ds_idx)] = (tid, rid)
        print(f"[masks] mapped {len(self._map)} indices from {len(self._tables)} file(s)")

    def get_mask_pil(self, ds_index: int) -> Image.Image:
        tid, rid = self._map[ds_index]
        cell = self._tables[tid][self._col][rid].as_py()
        if isinstance(cell, memoryview): cell = cell.tobytes()
        elif isinstance(cell, bytearray): cell = bytes(cell)
        return Image.open(BytesIO(cell)).convert("RGB")

# ---- HF images: EPFL-ECEO/coralscapes (train/validation) ----
hf_all = load_dataset("EPFL-ECEO/coralscapes")
hf_train: HFDataset = hf_all["train"]
hf_val:   HFDataset = hf_all["validation"]

# ---- PIL -> tensor (copy() avoids "non-writable" warning) ----
def pil_to_tensor_rgb(img: Image.Image) -> torch.Tensor:
    arr = np.asarray(img.convert("RGB"), dtype=np.uint8).copy()
    return torch.from_numpy(arr).permute(2, 0, 1).float() / 255.0

# ---- bind images (HF) + masks (your Parquets only), keep only covered indices ----
class CoralScapesImagesMasks(Dataset):
    def __init__(self, img_ds: HFDataset, masks: ParquetMasksByIndex,
                 img_transform: Optional[Callable] = None,
                 mask_transform: Optional[Callable] = None):
        self.img_ds = img_ds
        self.masks = masks
        self.img_tf = img_transform
        self.mask_tf = mask_transform
        n = len(self.img_ds)
        self.indices = [i for i in range(n) if i in masks._map]
        print(f"[dataset] kept {len(self.indices)}/{n} indices (mask-covered).")

    def __len__(self): return len(self.indices)

    def __getitem__(self, j: int):
        idx = self.indices[j]
        rec = self.img_ds[idx]
        img = rec["image"].convert("RGB")
        mask = self.masks.get_mask_pil(idx)
        if self.img_tf is not None:  img  = self.img_tf(img)
        if self.mask_tf is not None: mask = self.mask_tf(mask)
        return img, mask

masks_train = ParquetMasksByIndex(TRAIN_PARQUETS, MASK_COLUMN)
masks_val   = ParquetMasksByIndex(VAL_PARQUETS,   MASK_COLUMN)
cs_train = CoralScapesImagesMasks(hf_train, masks_train, pil_to_tensor_rgb, pil_to_tensor_rgb)
cs_val   = CoralScapesImagesMasks(hf_val,   masks_val,   pil_to_tensor_rgb, pil_to_tensor_rgb)

# ---- classification wrapper → (image_128x128, label) ----
class MaskToBinaryLabel128(Dataset):
    def __init__(self, base_ds: Dataset, size=128):
        self.base = base_ds
        self.size = size
    def __len__(self): return len(self.base)
    def __getitem__(self, idx):
        img, mask = self.base[idx]  # tensors (3,H,W)
        if img.shape[-2:] != (self.size, self.size):
            img  = F.interpolate(img.unsqueeze(0),  size=(self.size,self.size), mode="bilinear", align_corners=False).squeeze(0)
            mask = F.interpolate(mask.unsqueeze(0), size=(self.size,self.size), mode="bilinear", align_corners=False).squeeze(0)
        red  = mask[0].sum().item()
        blue = mask[2].sum().item()
        label = 1 if blue > red else 0  # bleached if blue energy > red
        return img, torch.tensor(label, dtype=torch.long)

train_cls = MaskToBinaryLabel128(cs_train, size=128)
val_cls   = MaskToBinaryLabel128(cs_val,   size=128)

print(f"[ready] train_cls len={len(train_cls)} | val_cls len={len(val_cls)}")

# ---- loaders (GPU-friendly: pin_memory). If a global `model` exists, run a 1-batch smoke test.
BATCH = 32  # if OOM: 16 → 8
train_loader = DataLoader(train_cls, batch_size=BATCH, shuffle=True,  num_workers=0, pin_memory=True)
val_loader   = DataLoader(val_cls,   batch_size=BATCH, shuffle=False, num_workers=0, pin_memory=True)
print(f"[loaders] train batches ≈ {len(train_loader)} | val batches ≈ {len(val_loader)}")

if 'model' in globals():
    from torch.cuda.amp import autocast, GradScaler
    DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    criterion = torch.nn.CrossEntropyLoss()
    opt = torch.optim.Adam(model.parameters(), lr=1e-3)
    scaler = GradScaler(enabled=(DEVICE.type=="cuda"))
    xb, yb = next(iter(train_loader))
    xb = xb.to(DEVICE, non_blocking=True); yb = yb.to(DEVICE, non_blocking=True)
    with autocast(enabled=(DEVICE.type=="cuda")):
        logits = model(xb); loss = criterion(logits, yb)
    scaler.scale(loss).backward(); scaler.step(opt); scaler.update()
    print("[smoke] 1 batch OK on", DEVICE, "| loss:", float(loss))
else:
    print("[note] Loaders ready. Now run your model cell and training loop.")


[masks] mapped 1517 indices from 2 file(s)
[masks] mapped 166 indices from 1 file(s)
[dataset] kept 1517/1517 indices (mask-covered).
[dataset] kept 166/166 indices (mask-covered).
[ready] train_cls len=1517 | val_cls len=166
[loaders] train batches ≈ 48 | val batches ≈ 6
[note] Loaders ready. Now run your model cell and training loop.


In [3]:
import torch
from torch import nn

# Device
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", DEVICE, "|", torch.cuda.get_device_name(0) if DEVICE.type=="cuda" else "")
torch.backends.cudnn.benchmark = True  # speed boost

# Keras-like CNN (same conv stack you showed; GAP replaces giant Flatten)
class KerasLikeCNN_GAP(nn.Module):
    def __init__(self, p_drop=0.5):
        super().__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 32, 3, padding=0),  # 128 -> 126
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2),                 # 126 -> 63
            nn.Conv2d(32, 64, 3, padding=0), # 63 -> 61
            nn.ReLU(inplace=True),
            nn.Conv2d(64, 32, 1), nn.ReLU(inplace=True),  # Dense(32) over maps
            nn.Conv2d(32, 64, 1), nn.ReLU(inplace=True),  # Dense(64) over maps
            nn.Conv2d(64, 128, 3, padding=0), nn.ReLU(inplace=True),  # 61 -> 59
            nn.Dropout(p=p_drop),
            nn.AdaptiveAvgPool2d(1),         # replaces huge Flatten(59*59*128)
        )
        self.head = nn.Linear(128, 2)        # binary logits

    def forward(self, x):
        x = self.features(x).flatten(1)      # (B,128,1,1) -> (B,128)
        return self.head(x)

model = KerasLikeCNN_GAP(p_drop=0.5).to(DEVICE)
print("Model on:", next(model.parameters()).device)

# Optim, loss, AMP scaler
criterion = nn.CrossEntropyLoss()
opt = torch.optim.Adam(model.parameters(), lr=1e-3)
from torch.cuda.amp import autocast, GradScaler
scaler = GradScaler(enabled=(DEVICE.type=="cuda"))

# (optional) quick smoke test on one batch
xb, yb = next(iter(train_loader))
xb, yb = xb.to(DEVICE, non_blocking=True), yb.to(DEVICE, non_blocking=True)
with autocast(enabled=(DEVICE.type=="cuda")):
    logits = model(xb); loss = criterion(logits, yb)
scaler.scale(loss).backward(); scaler.step(opt); scaler.update()
print("Smoke batch OK | loss:", float(loss))


Using device: cuda | NVIDIA GeForce RTX 3050 Ti Laptop GPU
Model on: cuda:0


  scaler = GradScaler(enabled=(DEVICE.type=="cuda"))
  return data.pin_memory(device)
  with autocast(enabled=(DEVICE.type=="cuda")):


Smoke batch OK | loss: 0.72027587890625


In [7]:
import time
from torch.cuda.amp import autocast

class EarlyStopper:
    def __init__(self, patience=5, min_delta=1e-3):
        self.patience = patience; self.min_delta = min_delta
        self.best = float("inf"); self.count = 0
    def step(self, val_loss):
        if self.best - val_loss > self.min_delta:
            self.best = val_loss; self.count = 0; return False
        self.count += 1; return self.count >= self.patience

def run_epoch(loader, train=True):
    model.train() if train else model.eval()
    loss_sum = correct = n = 0
    for xb, yb in loader:
        xb = xb.to(DEVICE, non_blocking=True)
        yb = yb.to(DEVICE, non_blocking=True)
        if train: opt.zero_grad(set_to_none=True)
        with torch.set_grad_enabled(train), autocast(enabled=(DEVICE.type=="cuda")):
            logits = model(xb)
            loss = criterion(logits, yb)
        if train:
            scaler.scale(loss).backward()
            scaler.step(opt); scaler.update()
        loss_sum += loss.item() * xb.size(0)
        correct  += (logits.argmax(1) == yb).sum().item()
        n += xb.size(0)
    return loss_sum/max(1,n), correct/max(1,n)

EPOCHS = 5
early = EarlyStopper(patience=5, min_delta=1e-3)
best_state = None

for epoch in range(1, EPOCHS+1):
    t0 = time.time()
    tr_loss, tr_acc = run_epoch(train_loader, train=True)
    va_loss, va_acc = run_epoch(val_loader,   train=False)
    dt = time.time() - t0
    print(f"Epoch {epoch:02d}/{EPOCHS} - loss: {tr_loss:.4f} - acc: {tr_acc:.4f} "
          f"- val_loss: {va_loss:.4f} - val_acc: {va_acc:.4f} - {dt:.1f}s")
    if best_state is None or va_loss < (early.best - 1e-3):
        best_state = {k: v.detach().cpu().clone() for k, v in model.state_dict().items()}
    if early.step(va_loss):
        print(f"Epoch {epoch}: early stopping (best val_loss={early.best:.4f})")
        break

if best_state:
    model.load_state_dict(best_state)
    print("Loaded best weights.")

# Final validation (like model.evaluate)
model.eval()
val_loss = val_acc = n = 0
with torch.no_grad(), autocast(enabled=(DEVICE.type=="cuda")):
    for xb, yb in val_loader:
        xb, yb = xb.to(DEVICE), yb.to(DEVICE)
        logits = model(xb)
        loss = criterion(logits, yb)
        val_loss += loss.item() * xb.size(0)
        val_acc  += (logits.argmax(1) == yb).sum().item()
        n += xb.size(0)
val_loss /= max(1, n)
val_acc  /= max(1, n)
print("Validation Loss:", round(val_loss, 4))
print("Validation Accuracy:", round(val_acc, 4))


  with torch.set_grad_enabled(train), autocast(enabled=(DEVICE.type=="cuda")):


KeyboardInterrupt: 

### with class

In [6]:
# === Per-class setup (0=healthy, 1=unhealthy/bleached) ===
import numpy as np
import torch
from torch.utils.data import DataLoader, WeightedRandomSampler
from PIL import Image
from collections import Counter

CLASS_NAMES = {0: "healthy", 1: "unhealthy"}  # 1=bleached/unhealthy

def label_from_mask_fast(mask_pil: Image.Image) -> int:
    arr = np.asarray(mask_pil.convert("RGB"), dtype=np.uint8)
    red  = int(arr[..., 0].sum())
    blue = int(arr[..., 2].sum())
    return 1 if blue > red else 0  # unhealthy if blue dominates

# Build labels aligned with train_cls order (no need to re-open images)
train_labels = []
for j in range(len(train_cls)):
    idx = cs_train.indices[j]
    m = masks_train.get_mask_pil(idx)
    train_labels.append(label_from_mask_fast(m))

cnt = Counter(train_labels)
n0, n1 = cnt.get(0, 0), cnt.get(1, 0)
print(f"class counts -> healthy(0): {n0} | unhealthy(1): {n1}")

# Class weights inversely proportional to frequency
total = len(train_labels)
w0 = total / (2.0 * max(1, n0))
w1 = total / (2.0 * max(1, n1))
class_weights = torch.tensor([w0, w1], dtype=torch.float32, device=DEVICE)
print("class weights:", [round(float(w0), 4), round(float(w1), 4)])

# Weighted sampler for balanced minibatches
sample_weights = [w0 if y == 0 else w1 for y in train_labels]
sampler = WeightedRandomSampler(sample_weights, num_samples=len(sample_weights), replacement=True)

# Rebuild loaders (pin_memory for GPU IO). Keep val shuffled=False
BATCH = 32  # drop to 16/8 if VRAM is tight
train_loader = DataLoader(train_cls, batch_size=BATCH, sampler=sampler, num_workers=0, pin_memory=True)
val_loader   = DataLoader(val_cls,   batch_size=BATCH, shuffle=False,  num_workers=0, pin_memory=True)
print(f"[loaders] train batches ≈ {len(train_loader)} | val batches ≈ {len(val_loader)}")

# Re-define criterion with class weights (affects loss per class)
import torch.nn as nn
criterion = nn.CrossEntropyLoss(weight=class_weights)

# (Optional) reset optimizer to start fresh with the new criterion
opt = torch.optim.Adam(model.parameters(), lr=1e-3)


class counts -> healthy(0): 1339 | unhealthy(1): 178
class weights: [0.5665, 4.2612]
[loaders] train batches ≈ 48 | val batches ≈ 6


In [9]:
import torch
import time
import torch.nn.functional as F
from torch.cuda.amp import autocast, GradScaler

# If you didn't define a scaler yet:
try:
    scaler
except NameError:
    scaler = GradScaler(enabled=(DEVICE.type == "cuda"))

class EarlyStopper:
    def __init__(self, patience=5, min_delta=1e-3):
        self.patience = patience; self.min_delta = min_delta
        self.best = float("inf"); self.count = 0
    def step(self, val_loss):
        if self.best - val_loss > self.min_delta:
            self.best = val_loss; self.count = 0; return False
        self.count += 1; return self.count >= self.patience

def run_epoch(loader, train=True):
    model.train() if train else model.eval()
    loss_sum = correct = n = 0
    for xb, yb in loader:
        xb = xb.to(DEVICE, non_blocking=True)
        yb = yb.to(DEVICE, non_blocking=True)
        if train: opt.zero_grad(set_to_none=True)
        with torch.set_grad_enabled(train), autocast(enabled=(DEVICE.type=="cuda")):
            logits = model(xb)
            loss = criterion(logits, yb)
        if train:
            scaler.scale(loss).backward()
            scaler.step(opt); scaler.update()
        loss_sum += loss.item() * xb.size(0)
        correct  += (logits.argmax(1) == yb).sum().item()
        n += xb.size(0)
    return loss_sum/max(1,n), correct/max(1,n)

@torch.no_grad()
def evaluate_with_metrics(model, loader):
    model.eval()
    n = 0; loss_sum = 0
    all_true = []; all_pred = []
    for xb, yb in loader:
        xb = xb.to(DEVICE); yb = yb.to(DEVICE)
        logits = model(xb)
        loss = criterion(logits, yb)
        loss_sum += loss.item() * xb.size(0); n += xb.size(0)
        all_true.append(yb.cpu()); all_pred.append(logits.argmax(1).cpu())
    y_true = torch.cat(all_true)
    y_pred = torch.cat(all_pred)
    # Confusion matrix (2x2): rows=true, cols=pred
    cm = torch.zeros(2, 2, dtype=torch.int64)
    for t, p in zip(y_true, y_pred):
        cm[t, p] += 1
    # Per-class precision/recall/F1
    per_class = {}
    for c in [0, 1]:
        tp = cm[c, c].item()
        fp = cm[:, c].sum().item() - tp
        fn = cm[c, :].sum().item() - tp
        precision = tp / max(1, tp + fp)
        recall    = tp / max(1, tp + fn)
        f1        = 0.0 if (precision + recall) == 0 else 2 * precision * recall / (precision + recall)
        per_class[c] = {
            "name": CLASS_NAMES[c],
            "precision": precision,
            "recall": recall,
            "f1": f1,
            "support": cm[c, :].sum().item(),
        }
    acc = (y_true == y_pred).float().mean().item()
    return {"loss": loss_sum/max(1,n), "acc": acc, "cm": cm, "per_class": per_class}

# ---- Train with early stopping, save best by val loss ----
EPOCHS = 3
early = EarlyStopper(patience=5, min_delta=1e-3)
best_state = None

for epoch in range(1, EPOCHS+1):
    t0 = time.time()
    tr_loss, tr_acc = run_epoch(train_loader, train=True)
    va_loss, va_acc = run_epoch(val_loader,   train=False)
    print(f"Epoch {epoch:02d}/{EPOCHS} - loss: {tr_loss:.4f} - acc: {tr_acc:.4f} "
          f"- val_loss: {va_loss:.4f} - val_acc: {va_acc:.4f} - {time.time()-t0:.1f}s")
    if best_state is None or va_loss < (early.best - 1e-3):
        best_state = {k: v.detach().cpu().clone() for k, v in model.state_dict().items()}
    if early.step(va_loss):
        print(f"Epoch {epoch}: early stopping (best val_loss={early.best:.4f})")
        break

if best_state:
    model.load_state_dict(best_state)
    print("Loaded best weights.")

# ---- Final evaluation with PER-CLASS metrics ----
res = evaluate_with_metrics(model, val_loader)
print("\n=== Validation (per class) ===")
print(f"Overall: loss={res['loss']:.4f} | acc={res['acc']:.4f}")
cm = res["cm"].numpy()
print("\nConfusion Matrix (rows=true, cols=pred):")
print(f"            pred: healthy   pred: unhealthy")
print(f"true healthy     {cm[0,0]:>6}          {cm[0,1]:>6}")
print(f"true unhealthy   {cm[1,0]:>6}          {cm[1,1]:>6}")
print("\nPer-class metrics:")
for c in [0, 1]:
    m = res["per_class"][c]
    print(f"  {m['name']:<10} | precision={m['precision']:.3f} | recall={m['recall']:.3f} | f1={m['f1']:.3f} | support={m['support']}")


  with torch.set_grad_enabled(train), autocast(enabled=(DEVICE.type=="cuda")):


Epoch 01/3 - loss: 0.3694 - acc: 0.4984 - val_loss: 0.9613 - val_acc: 0.2711 - 354.3s
Epoch 02/3 - loss: 0.3717 - acc: 0.4937 - val_loss: 0.8740 - val_acc: 0.2711 - 316.0s
Epoch 03/3 - loss: 0.3557 - acc: 0.5208 - val_loss: 1.0742 - val_acc: 0.2711 - 315.5s
Loaded best weights.

=== Validation (per class) ===
Overall: loss=0.8739 | acc=0.2711

Confusion Matrix (rows=true, cols=pred):
            pred: healthy   pred: unhealthy
true healthy          0             121
true unhealthy        0              45

Per-class metrics:
  healthy    | precision=0.000 | recall=0.000 | f1=0.000 | support=121
  unhealthy  | precision=0.271 | recall=1.000 | f1=0.427 | support=45
