### Sanity Check

In [2]:
# Safer allocator settings (help fragmentation on Windows/WDDM)
import os
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = (
    "backend:cudaMallocAsync,"
    "expandable_segments:True,"
    "max_split_size_mb:64,"
    "garbage_collection_threshold:0.8"
)

import torch
print("CUDA available:", torch.cuda.is_available())
if torch.cuda.is_available():
    print("GPU:", torch.cuda.get_device_name(0))


CUDA available: True
GPU: NVIDIA GeForce RTX 3050 Ti Laptop GPU


## Setting Baseline Model

### Step 0

In [3]:
from pathlib import Path
from typing import Sequence, Optional, Callable, List, Union
from io import BytesIO

# EXACT Parquet files you provided (masks only)
TRAIN_PARQUETS = [
    r"C:\Users\sedya\VScodeProjects\Coral-reefs-DBL4\data_preprocessing\coralscapes_export\parquet\train\train_part001.parquet",
    r"C:\Users\sedya\VScodeProjects\Coral-reefs-DBL4\data_preprocessing\coralscapes_export\parquet\train\train_part002.parquet",
]
VAL_PARQUETS = [
    r"C:\Users\sedya\VScodeProjects\Coral-reefs-DBL4\data_preprocessing\coralscapes_export\parquet\validation\validation_part001.parquet",
]

MASK_COLUMN = "label_health_rgb_png"  # mask PNG bytes column


### Step 1

In [4]:
import pyarrow.parquet as pq
import numpy as np
from PIL import Image
import torch
from torch.utils.data import Dataset, DataLoader
from datasets import load_dataset, Dataset as HFDataset


### Step 2

In [5]:
def check_files_exist(paths: Sequence[str], tag: str):
    print(f"\n[check] {tag} Parquet files:")
    ok = True
    for p in paths:
        pe = Path(p)
        print(f"  - {pe}  -->  {'OK' if pe.exists() else 'MISSING'}")
        ok &= pe.exists()
    if not ok:
        raise FileNotFoundError(f"Some {tag} Parquet paths are missing.")

check_files_exist(TRAIN_PARQUETS, "TRAIN")
check_files_exist(VAL_PARQUETS, "VAL")



[check] TRAIN Parquet files:
  - C:\Users\sedya\VScodeProjects\Coral-reefs-DBL4\data_preprocessing\coralscapes_export\parquet\train\train_part001.parquet  -->  OK
  - C:\Users\sedya\VScodeProjects\Coral-reefs-DBL4\data_preprocessing\coralscapes_export\parquet\train\train_part002.parquet  -->  OK

[check] VAL Parquet files:
  - C:\Users\sedya\VScodeProjects\Coral-reefs-DBL4\data_preprocessing\coralscapes_export\parquet\validation\validation_part001.parquet  -->  OK


### Step 3

In [6]:
def inspect_parquet(paths: Sequence[str], tag: str, mask_col: str = MASK_COLUMN):
    print(f"\n[inspect] {tag}")
    total_rows = 0
    for p in paths:
        tbl = pq.read_table(p)
        n = tbl.num_rows
        cols = tbl.column_names
        total_rows += n
        print(f"  * {p}")
        print(f"    - rows: {n}")
        print(f"    - columns: {cols}")
        print(f"    - has 'index': {'index' in cols}, has '{mask_col}': {mask_col in cols}")
        if "index" not in cols or mask_col not in cols:
            raise ValueError(f"{p} must contain 'index' and '{mask_col}'.")
        # peek first non-null mask value
        col = tbl[mask_col]
        sample = None
        for i in range(n):
            v = col[i].as_py()
            if v is not None:
                sample = v
                break
        print(f"    - sample mask type: {type(sample).__name__ if sample is not None else 'None'}")
    print(f"[inspect] {tag} total rows across files: {total_rows}")

inspect_parquet(TRAIN_PARQUETS, "TRAIN")
inspect_parquet(VAL_PARQUETS, "VAL")



[inspect] TRAIN
  * C:\Users\sedya\VScodeProjects\Coral-reefs-DBL4\data_preprocessing\coralscapes_export\parquet\train\train_part001.parquet
    - rows: 1121
    - columns: ['split', 'index', 'label_health_rgb_png']
    - has 'index': True, has 'label_health_rgb_png': True
    - sample mask type: bytes
  * C:\Users\sedya\VScodeProjects\Coral-reefs-DBL4\data_preprocessing\coralscapes_export\parquet\train\train_part002.parquet
    - rows: 396
    - columns: ['split', 'index', 'label_health_rgb_png']
    - has 'index': True, has 'label_health_rgb_png': True
    - sample mask type: bytes
[inspect] TRAIN total rows across files: 1517

[inspect] VAL
  * C:\Users\sedya\VScodeProjects\Coral-reefs-DBL4\data_preprocessing\coralscapes_export\parquet\validation\validation_part001.parquet
    - rows: 166
    - columns: ['split', 'index', 'label_health_rgb_png']
    - has 'index': True, has 'label_health_rgb_png': True
    - sample mask type: bytes
[inspect] VAL total rows across files: 166


### Step 4

In [7]:
class ParquetMasksByIndex:
    def __init__(self, parquet_paths: Sequence[Union[str, Path]], column_png: str = MASK_COLUMN):
        self._tables = []
        for p in parquet_paths:
            p = Path(p)
            if not p.exists():
                raise FileNotFoundError(f"Parquet file not found: {p}")
            self._tables.append(pq.read_table(p))
        for t in self._tables:
            if "index" not in t.column_names or column_png not in t.column_names:
                raise ValueError(f"Parquet must have 'index' and '{column_png}'. Got: {t.column_names}")
        self._colname = column_png
        # index → (table_id,row_id)
        self._map = {}
        for tid, t in enumerate(self._tables):
            for rid, ds_idx in enumerate(t["index"].to_pylist()):
                self._map[int(ds_idx)] = (tid, rid)
        print(f"[masks] index map size: {len(self._map)} from {len(self._tables)} file(s)")

    def get_mask_pil(self, ds_index: int) -> Image.Image:
        if ds_index not in self._map:
            raise KeyError(f"Index {ds_index} not found in masks.")
        tid, rid = self._map[ds_index]
        cell = self._tables[tid][self._colname][rid].as_py()
        if isinstance(cell, memoryview): cell = cell.tobytes()
        elif isinstance(cell, bytearray): cell = bytes(cell)
        return Image.open(BytesIO(cell)).convert("RGB")

masks_train = ParquetMasksByIndex(TRAIN_PARQUETS)
masks_val   = ParquetMasksByIndex(VAL_PARQUETS)


[masks] index map size: 1517 from 2 file(s)
[masks] index map size: 166 from 1 file(s)


### Step 5

In [8]:
print("\n[images] loading EPFL-ECEO/coralscapes: 'train' + 'validation' splits...")
hf_all = load_dataset("EPFL-ECEO/coralscapes")
hf_train: HFDataset = hf_all["train"]
hf_val:   HFDataset = hf_all["validation"]
print(f"[images] sizes → train={len(hf_train)}, val={len(hf_val)}")



[images] loading EPFL-ECEO/coralscapes: 'train' + 'validation' splits...
[images] sizes → train=1517, val=166


### Step 6

In [9]:
def pil_to_tensor_rgb(img: Image.Image) -> torch.Tensor:
    arr = np.asarray(img.convert("RGB"), dtype=np.uint8).copy()  # <- .copy() avoids the warning
    return torch.from_numpy(arr).permute(2, 0, 1).float() / 255.0


### Step 7

In [10]:
class CoralScapesImagesMasks(Dataset):
    def __init__(self, img_ds: HFDataset, masks: ParquetMasksByIndex,
                 img_transform: Optional[Callable] = None,
                 mask_transform: Optional[Callable] = None,
                 only_indices_with_masks: bool = True):
        self.img_ds = img_ds
        self.masks = masks
        self.img_tf = img_transform
        self.mask_tf = mask_transform
        n = len(self.img_ds)
        if only_indices_with_masks:
            self.indices = [i for i in range(n) if i in self.masks._map]
        else:
            self.indices = list(range(n))
        print(f"[dataset] keeping {len(self.indices)}/{n} indices (mask-covered).")

    def __len__(self): return len(self.indices)

    def __getitem__(self, j: int):
        idx = self.indices[j]
        rec = self.img_ds[idx]
        img: Image.Image = rec["image"].convert("RGB")
        mask: Image.Image = self.masks.get_mask_pil(idx)
        if self.img_tf is not None:  img = self.img_tf(img)
        if self.mask_tf is not None: mask = self.mask_tf(mask)
        return img, mask

cs_train = CoralScapesImagesMasks(hf_train, masks_train, pil_to_tensor_rgb, pil_to_tensor_rgb)
cs_val   = CoralScapesImagesMasks(hf_val,   masks_val,   pil_to_tensor_rgb, pil_to_tensor_rgb)


[dataset] keeping 1517/1517 indices (mask-covered).
[dataset] keeping 166/166 indices (mask-covered).


### Step 8

In [11]:
def probe_one(ds: Dataset, name: str):
    if len(ds) == 0:
        print(f"[probe] {name}: length=0 (no overlapping indices)."); return
    x, y = ds[0]
    print(f"[probe] {name}: sample 0 → image {tuple(x.shape)} | mask {tuple(y.shape)}")

probe_one(cs_train, "TRAIN")
probe_one(cs_val,   "VAL")


[probe] TRAIN: sample 0 → image (3, 1024, 2048) | mask (3, 1024, 2048)
[probe] VAL: sample 0 → image (3, 1024, 2048) | mask (3, 1024, 2048)


### Step 9

In [12]:
def pad_collate(batch):
    imgs, masks = zip(*batch)
    C = imgs[0].shape[0]
    H = max(t.shape[1] for t in imgs); W = max(t.shape[2] for t in imgs)
    xb = torch.zeros(len(imgs), C, H, W, dtype=imgs[0].dtype)
    yb = torch.zeros(len(masks), C, H, W, dtype=masks[0].dtype)
    for i, (x, y) in enumerate(zip(imgs, masks)):
        h, w = x.shape[1], x.shape[2]
        xb[i, :, :h, :w] = x
        yb[i, :, :h, :w] = y
    return xb, yb

train_loader = DataLoader(cs_train, batch_size=2, shuffle=True,  num_workers=0, collate_fn=pad_collate)
val_loader   = DataLoader(cs_val,   batch_size=2, shuffle=False, num_workers=0, collate_fn=pad_collate)

xb, yb = next(iter(train_loader))
print(f"[batch] TRAIN xb {tuple(xb.shape)} | yb {tuple(yb.shape)}")
xb2, yb2 = next(iter(val_loader))
print(f"[batch] VAL   xb {tuple(xb2.shape)} | yb {tuple(yb2.shape)}")


[batch] TRAIN xb (2, 3, 1024, 2048) | yb (2, 3, 1024, 2048)
[batch] VAL   xb (2, 3, 1024, 2048) | yb (2, 3, 1024, 2048)


### GPU Check

In [13]:
import torch, torchvision, torchaudio, sys
print("torch:", torch.__version__, "| CUDA:", torch.version.cuda, "| is_available:", torch.cuda.is_available())
print("torch file:", torch.__file__)
print("torchvision:", torchvision.__version__)
print("torchaudio:", torchaudio.__version__)
if torch.cuda.is_available():
    print("GPU:", torch.cuda.get_device_name(0))


torch: 2.5.1+cu121 | CUDA: 12.1 | is_available: True
torch file: c:\Users\sedya\anaconda2024\envs\modern_ts_2E\Lib\site-packages\torch\__init__.py
torchvision: 0.20.1+cu121
torchaudio: 2.5.1+cu121
GPU: NVIDIA GeForce RTX 3050 Ti Laptop GPU


In [15]:
import os, gc, torch

# Optional: allocator hints help fragmentation on Windows/WDDM
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:64,garbage_collection_threshold:0.8"

# Kill any previous big objects that might still hold GPU memory
for name in list(globals().keys()):
    if name in ("model","optimizer","opt","scaler","train_loader","val_loader","cs_train","cs_val"):
        try: del globals()[name]
        except: pass

gc.collect()
if torch.cuda.is_available():
    torch.cuda.empty_cache()
    torch.cuda.reset_peak_memory_stats()
    free, total = torch.cuda.mem_get_info()
    print(f"CUDA mem free: {free/1024**2:.0f} MiB / {total/1024**2:.0f} MiB")
else:
    print("CUDA not available")


CUDA mem free: 3278 MiB / 4096 MiB


## Training Model

In [17]:
import torch
from torch import nn

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", DEVICE, "|", torch.cuda.get_device_name(0) if DEVICE.type=="cuda" else "")

class KerasLikeCNN_GAP(nn.Module):
    def __init__(self, p_drop=0.5):
        super().__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 32, 3, padding=0),  # 128 -> 126
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2),                 # 126 -> 63
            nn.Conv2d(32, 64, 3, padding=0), # 63 -> 61
            nn.ReLU(inplace=True),
            nn.Conv2d(64, 32, 1), nn.ReLU(inplace=True),  # Dense(32) over maps
            nn.Conv2d(32, 64, 1), nn.ReLU(inplace=True),  # Dense(64) over maps
            nn.Conv2d(64, 128, 3, padding=0), nn.ReLU(inplace=True),  # 61 -> 59
            nn.Dropout(p=p_drop),
            nn.AdaptiveAvgPool2d(1),         # replaces giant Flatten
        )
        self.head = nn.Linear(128, 2)        # logits

    def forward(self, x):
        x = self.features(x).flatten(1)
        return self.head(x)

torch.backends.cudnn.benchmark = True  # let cuDNN pick fast kernels
model = KerasLikeCNN_GAP(p_drop=0.5).to(DEVICE)
print("Model on:", next(model.parameters()).device)


Using device: cuda | NVIDIA GeForce RTX 3050 Ti Laptop GPU
Model on: cuda:0


In [18]:
from torch.utils.data import DataLoader
from torch.cuda.amp import autocast, GradScaler
import torch.nn.functional as F

BATCH = 32  # if OOM, try 16 → 8
train_loader = DataLoader(train_cls, batch_size=BATCH, shuffle=True,  num_workers=0, pin_memory=True)
val_loader   = DataLoader(val_cls,   batch_size=BATCH, shuffle=False, num_workers=0, pin_memory=True)

criterion = nn.CrossEntropyLoss()
opt = torch.optim.Adam(model.parameters(), lr=1e-3)
scaler = GradScaler(enabled=(DEVICE.type=="cuda"))

# one mini-batch to confirm GPU path works
xb, yb = next(iter(train_loader))
xb = xb.to(DEVICE, non_blocking=True)
yb = yb.to(DEVICE, non_blocking=True)
with autocast(enabled=(DEVICE.type=="cuda")):
    logits = model(xb)
    loss = criterion(logits, yb)
scaler.scale(loss).backward(); scaler.step(opt); scaler.update()
print("1 batch OK on", DEVICE, "| loss:", float(loss))


NameError: name 'train_cls' is not defined

### THESE CODES DONT GOT THAT GPU POWAAAHH

In [16]:
# === Keras-like CNN training on CPU — no `.to()` anywhere ===
import time
import torch
from torch import nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

# --- (A) Build loaders if missing (wrap cs_train/cs_val to 128x128 + binary labels) ---
class MaskToBinaryLabel128(Dataset):
    """(image, mask) -> (image_128x128, label) with label=1 if blue>red, else 0."""
    def __init__(self, base_ds, size=128):
        self.base = base_ds
        self.size = size
    def __len__(self): return len(self.base)
    def __getitem__(self, idx):
        img, mask = self.base[idx]   # tensors in [0,1], shape (3,H,W)
        if img.shape[-2:] != (self.size, self.size):
            img  = F.interpolate(img.unsqueeze(0),  size=(self.size,self.size), mode="bilinear", align_corners=False).squeeze(0)
            mask = F.interpolate(mask.unsqueeze(0), size=(self.size,self.size), mode="bilinear", align_corners=False).squeeze(0)
        red  = mask[0].sum().item()
        blue = mask[2].sum().item()
        label = 1 if blue > red else 0
        return img, torch.tensor(label, dtype=torch.long)

def _maybe_build_loaders(batch_size=32, num_workers=0):
    if 'train_loader' in globals() and 'val_loader' in globals():
        return train_loader, val_loader
    if 'cs_train' not in globals() or 'cs_val' not in globals():
        raise RuntimeError("Expected cs_train/cs_val or train_loader/val_loader to exist.")
    train_cls = MaskToBinaryLabel128(cs_train, size=128)
    val_cls   = MaskToBinaryLabel128(cs_val,   size=128)
    tl = DataLoader(train_cls, batch_size=batch_size, shuffle=True,  num_workers=num_workers)
    vl = DataLoader(val_cls,   batch_size=batch_size, shuffle=False, num_workers=num_workers)
    return tl, vl

train_loader, val_loader = _maybe_build_loaders(batch_size=32, num_workers=0)

# --- (B) Keras-like CNN (mirrors your TF architecture) ---
class KerasLikeCNN(nn.Module):
    """
    128x128x3 -> Conv(32,3x3,valid) -> ReLU -> MaxPool(2)
               -> Conv(64,3x3,valid) -> ReLU
               -> 1x1 Conv(32) -> ReLU  (Dense on feature maps)
               -> 1x1 Conv(64) -> ReLU  (Dense on feature maps)
               -> Conv(128,3x3,valid) -> ReLU -> Dropout
               -> Flatten -> Linear(2)
    Flatten size for 128 input: 59*59*128 = 445,568 (matches your summary).
    """
    def __init__(self, p_drop=0.5):
        super().__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, padding=0),   # 128 -> 126
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2),                               # 126 -> 63
            nn.Conv2d(32, 64, kernel_size=3, padding=0),  # 63 -> 61
            nn.ReLU(inplace=True),
            nn.Conv2d(64, 32, kernel_size=1, padding=0),  # "Dense(32)" on maps
            nn.ReLU(inplace=True),
            nn.Conv2d(32, 64, kernel_size=1, padding=0),  # "Dense(64)" on maps
            nn.ReLU(inplace=True),
            nn.Conv2d(64, 128, kernel_size=3, padding=0), # 61 -> 59
            nn.ReLU(inplace=True),
            nn.Dropout(p=p_drop),
        )
        self.head = nn.Sequential(
            nn.Flatten(),                 # 128 x 59 x 59 -> 445,568
            nn.Linear(128*59*59, 2),      # logits for 2 classes
        )
    def forward(self, x):
        x = self.features(x)
        x = self.head(x)
        return x

model = KerasLikeCNN(p_drop=0.5)

# --- (C) Training setup (no device moves) ---
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
EPOCHS = 45
PATIENCE = 5
MIN_DELTA = 1e-3

class EarlyStopper:
    def __init__(self, patience=5, min_delta=1e-3):
        self.patience = patience
        self.min_delta = min_delta
        self.best = float('inf')
        self.count = 0
    def step(self, val_loss):
        if self.best - val_loss > self.min_delta:
            self.best = val_loss
            self.count = 0
            return False
        self.count += 1
        return self.count >= self.patience

early = EarlyStopper(PATIENCE, MIN_DELTA)
best_state = None

# --- (D) Train / Validate loops (CPU only) ---
def run_epoch(loader, train=True):
    model.train() if train else model.eval()
    loss_sum, correct, n = 0.0, 0, 0
    for xb, yb in loader:
        if train:
            optimizer.zero_grad(set_to_none=True)
        with torch.set_grad_enabled(train):
            logits = model(xb)                   # model & tensors on CPU
            loss = criterion(logits, yb)
            if train:
                loss.backward()
                optimizer.step()
        loss_sum += loss.item() * xb.size(0)
        correct += (logits.argmax(1) == yb).sum().item()
        n += xb.size(0)
    return loss_sum / max(1, n), correct / max(1, n)

for epoch in range(1, EPOCHS+1):
    t0 = time.time()
    tr_loss, tr_acc = run_epoch(train_loader, train=True)
    va_loss, va_acc = run_epoch(val_loader,   train=False)
    dt = time.time() - t0
    print(f"Epoch {epoch:02d}/{EPOCHS} - loss: {tr_loss:.4f} - acc: {tr_acc:.4f} "
          f"- val_loss: {va_loss:.4f} - val_acc: {va_acc:.4f} - {dt:.1f}s")
    if best_state is None or va_loss < (early.best - MIN_DELTA):
        best_state = {k: v.detach().clone() for k, v in model.state_dict().items()}
    if early.step(va_loss):
        print(f"Epoch {epoch}: early stopping (best val_loss={early.best:.4f})")
        break

if best_state is not None:
    model.load_state_dict(best_state)
    print("Loaded best weights (by val_loss).")

# --- (E) Final validation score (like model.evaluate) ---
model.eval()
val_loss, val_acc, n = 0.0, 0.0, 0
with torch.no_grad():
    for xb, yb in val_loader:
        logits = model(xb)
        loss = criterion(logits, yb)
        val_loss += loss.item() * xb.size(0)
        val_acc  += (logits.argmax(1) == yb).sum().item()
        n += xb.size(0)
val_loss /= max(1, n)
val_acc  /= max(1, n)
print("Validation Loss:", val_loss)
print("Validation Accuracy:", val_acc)


RuntimeError: Expected cs_train/cs_val or train_loader/val_loader to exist.

### Step 1

In [None]:
import torch
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

class MaskToBinaryLabel128(Dataset):
    """
    Wraps a (image_tensor, mask_tensor) dataset and returns:
      image_128x128 (float tensor in [0,1], shape 3x128x128),
      label (long, 0=non-bleached, 1=bleached) via blue-vs-red energy.
    """
    def __init__(self, base_ds, size=128):
        self.base = base_ds
        self.size = size

    def __len__(self): 
        return len(self.base)

    def __getitem__(self, idx):
        img, mask = self.base[idx]            # tensors: (3,H,W) in [0,1]
        # resize to 128x128
        if img.shape[-2:] != (self.size, self.size):
            img  = F.interpolate(img.unsqueeze(0),  size=(self.size,self.size), mode="bilinear", align_corners=False).squeeze(0)
            mask = F.interpolate(mask.unsqueeze(0), size=(self.size,self.size), mode="bilinear", align_corners=False).squeeze(0)
        # label: bleached if blue energy > red energy
        red  = mask[0].sum().item()
        blue = mask[2].sum().item()
        label = 1 if blue > red else 0
        return img, torch.tensor(label, dtype=torch.long)

# build classification datasets (uses ONLY indices your Parquet provided, via cs_train/cs_val)
train_cls = MaskToBinaryLabel128(cs_train, size=128)
val_cls   = MaskToBinaryLabel128(cs_val,   size=128)

# data loaders (no pad needed; fixed 128x128)
BATCH = 32    # 128×128 is light; feel free to increase if memory allows
NUM_WORKERS = 0
train_loader = DataLoader(train_cls, batch_size=BATCH, shuffle=True,  num_workers=NUM_WORKERS)
val_loader   = DataLoader(val_cls,   batch_size=BATCH, shuffle=False, num_workers=NUM_WORKERS)

print(f"train len: {len(train_cls)}  |  val len: {len(val_cls)}")


train len: 1517  |  val len: 166


### Step 2

In [None]:
import torch
from torch import nn

class KerasLikeCNN(nn.Module):
    """
    PyTorch mirror of your Keras Sequential:
      Conv2d(32,3x3,valid) -> MaxPool2d(2)
      Conv2d(64,3x3,valid)
      'Dense(32)' on feature maps -> 1x1 Conv(32)
      'Dense(64)' on feature maps -> 1x1 Conv(64)
      Conv2d(128,3x3,valid)
      Dropout
      Flatten
      Linear(2)
    For input 128x128, spatial dims evolve:
      128 -> 126 (conv) -> 63 (pool) -> 61 (conv) -> 61 (1x1) -> 61 (1x1) -> 59 (conv)
      flatten size = 59*59*128 = 445,568 (matches your summary)
    """
    def __init__(self, p_drop=0.5):
        super().__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, padding=0),   # 128 -> 126
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2),                               # 126 -> 63
            nn.Conv2d(32, 64, kernel_size=3, padding=0),  # 63 -> 61
            nn.ReLU(inplace=True),
            nn.Conv2d(64, 32, kernel_size=1, padding=0),  # "Dense(32)" on maps
            nn.ReLU(inplace=True),
            nn.Conv2d(32, 64, kernel_size=1, padding=0),  # "Dense(64)" on maps
            nn.ReLU(inplace=True),
            nn.Conv2d(64, 128, kernel_size=3, padding=0), # 61 -> 59
            nn.ReLU(inplace=True),
            nn.Dropout(p=p_drop),
        )
        self.head = nn.Sequential(
            nn.Flatten(),             # 128 x 59 x 59 -> 445,568
            nn.Linear(128*59*59, 2),  # logits for 2 classes
        )

    def forward(self, x):
        x = self.features(x)
        x = self.head(x)
        return x

model = KerasLikeCNN(p_drop=0.5)
# sanity: param count close to your Keras summary
n_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"Trainable params: {n_params:,}")


Trainable params: 988,578


### Step 3

In [None]:
import math
import time

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
EPOCHS = 45
LR = 1e-3
PATIENCE = 5        # EarlyStopping like your Keras callback
MIN_DELTA = 1e-3

model = model.to(DEVICE)
opt = torch.optim.Adam(model.parameters(), lr=LR)
criterion = nn.CrossEntropyLoss()

class EarlyStopper:
    def __init__(self, patience=5, min_delta=1e-3):
        self.patience = patience
        self.min_delta = min_delta
        self.best = float("inf")
        self.count = 0
    def step(self, val_loss):
        if self.best - val_loss > self.min_delta:
            self.best = val_loss
            self.count = 0
            return False  # don't stop
        else:
            self.count += 1
            return self.count >= self.patience

early = EarlyStopper(PATIENCE, MIN_DELTA)

def run_epoch(loader, train=True):
    if train: 
        model.train()
    else:
        model.eval()
    total, correct, loss_sum, n = 0.0, 0, 0.0, 0
    for xb, yb in loader:
        xb, yb = xb.to(DEVICE), yb.to(DEVICE)
        if train:
            opt.zero_grad(set_to_none=True)
        with torch.set_grad_enabled(train):
            logits = model(xb)
            loss = criterion(logits, yb)
            if train:
                loss.backward()
                opt.step()
        loss_sum += loss.item() * xb.size(0)
        pred = logits.argmax(1)
        correct += (pred == yb).sum().item()
        n += xb.size(0)
    return loss_sum / max(1, n), correct / max(1, n)

best_state = None
for epoch in range(1, EPOCHS+1):
    t0 = time.time()
    tr_loss, tr_acc = run_epoch(train_loader, train=True)
    va_loss, va_acc = run_epoch(val_loader,   train=False)
    dt = time.time() - t0
    print(f"Epoch {epoch:02d}/{EPOCHS} "
          f"- loss: {tr_loss:.4f} - acc: {tr_acc:.4f} "
          f"- val_loss: {va_loss:.4f} - val_acc: {va_acc:.4f} "
          f"- {dt:.1f}s")
    # Save best
    if va_loss < (early.best - MIN_DELTA):
        best_state = {k: v.detach().cpu().clone() for k, v in model.state_dict().items()}
    # Early stopping
    if early.step(va_loss):
        print(f"Epoch {epoch}: early stopping (best val_loss={early.best:.4f})")
        break

if best_state is not None:
    model.load_state_dict(best_state)
    print("Loaded best weights (by val_loss).")


RuntimeError: CUDA error: out of memory
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.


### Step 4

In [None]:
# === Train ===
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = KerasLikeCNN(in_ch=3, num_classes=2).to(DEVICE)

opt = torch.optim.Adam(model.parameters(), lr=1e-3)
crit = nn.CrossEntropyLoss()

EPOCHS = 10
best_val = float("inf")
CKPT = "keras_like_bleaching_cls.pth"

for epoch in range(1, EPOCHS+1):
    # -- train --
    model.train()
    total, correct, train_loss = 0, 0, 0.0
    for xb, yb in train_loader:
        xb, yb = xb.to(DEVICE), yb.to(DEVICE)
        opt.zero_grad()
        logits = model(xb)
        loss = crit(logits, yb)
        loss.backward()
        opt.step()
        train_loss += loss.item() * yb.size(0)
        pred = logits.argmax(1)
        correct += (pred == yb).sum().item()
        total += yb.size(0)
    train_loss /= total
    train_acc = correct / total

    # -- validate --
    model.eval()
    v_total, v_correct, val_loss = 0, 0, 0.0
    with torch.no_grad():
        for xb, yb in val_loader:
            xb, yb = xb.to(DEVICE), yb.to(DEVICE)
            logits = model(xb)
            loss = crit(logits, yb)
            val_loss += loss.item() * yb.size(0)
            pred = logits.argmax(1)
            v_correct += (pred == yb).sum().item()
            v_total += yb.size(0)
    val_loss /= v_total
    val_acc = v_correct / v_total

    print(f"epoch {epoch:02d}/{EPOCHS} | "
          f"train loss={train_loss:.4f} acc={train_acc:.3f} | "
          f"val loss={val_loss:.4f} acc={val_acc:.3f}")

    if val_loss < best_val:
        best_val = val_loss
        torch.save({"model": model.state_dict(),
                    "epoch": epoch,
                    "val_loss": val_loss,
                    "val_acc": val_acc}, CKPT)
        print(f"  ↳ saved best checkpoint to {CKPT}")


RuntimeError: CUDA error: out of memory
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.
