# Task 4

## CELL 1 - Header / Config (Edit Page)

In [1]:
# =========================
# SimSiam (Task 4,5,6) - Config
# =========================

import os, random, json, time
import numpy as np
import torch

# ---------- Required user edits (change if needed) ----------
DATA_DIR = "/kaggle/input/betel-leaf/Betel Leaf Dataset A Primary Dataset From Field And Controlled Environment/Betel Leaf Dataset"

# Input resolution (default 224). Allowed: 224 (default) or 160 (optional) or any int >= 64.
RESOLUTION = 224  # change to 160 if want 160x160

# Pretraining epochs for SimSiam (ENFORCED minimum 100)
PRETRAIN_EPOCHS = 100

# Downstream training epochs (linear probe / fine-tune)
LINEAR_EPOCHS = 50
FINETUNE_EPOCHS = 50

# Other experiment settings
BATCH_SIZE = 64
BACKBONE = "resnet18"   # 'resnet18' or 'resnet50'
SEED = 42
NUM_WORKERS = 2         # DataLoader workers; increase if instance allows
OUT_DIR = "/kaggle/working/simsiam_task4"

# Safety / assertions
assert os.path.exists(DATA_DIR), f"DATA_DIR not found: {DATA_DIR}"
assert PRETRAIN_EPOCHS >= 100, "PRETRAIN_EPOCHS must be >= 100"
assert isinstance(RESOLUTION, int) and RESOLUTION >= 64, "RESOLUTION must be integer >=64"

# reproducibility
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
if torch.cuda.is_available(): torch.cuda.manual_seed_all(SEED)

# create output dir
os.makedirs(OUT_DIR, exist_ok=True)

print("CONFIG")
print("DATA_DIR:", DATA_DIR)
print("RESOLUTION:", RESOLUTION)
print("PRETRAIN_EPOCHS:", PRETRAIN_EPOCHS)
print("BATCH_SIZE:", BATCH_SIZE)
print("BACKBONE:", BACKBONE)
print("OUT_DIR:", OUT_DIR)


CONFIG
DATA_DIR: /kaggle/input/betel-leaf/Betel Leaf Dataset A Primary Dataset From Field And Controlled Environment/Betel Leaf Dataset
RESOLUTION: 224
PRETRAIN_EPOCHS: 100
BATCH_SIZE: 64
BACKBONE: resnet18
OUT_DIR: /kaggle/working/simsiam_task4


## CELL 2 — Imports & Basic Utilities

In [4]:
# =========================
# Safe Imports & Utilities
# =========================

# Put environment variables first to reduce noisy native logs if TensorFlow
# or other C++ extensions are imported later in the same process.
import os
os.environ.setdefault("TF_CPP_MIN_LOG_LEVEL", "3")   # silence many TF logs if TF loaded later
os.environ.setdefault("XLA_FLAGS", "--xla_gpu_cuda_data_dir=/usr/local/cuda  --xla_force_host_platform_device_count=1")
# Note: XLA_FLAGS and its values may be environment / system dependent;
# setting TF_CPP_MIN_LOG_LEVEL is the most portable suppressor.

# core stdlib
import sys
import math
import shutil
from pathlib import Path
from glob import glob
from typing import Optional

# helpful progress / imaging
from tqdm import tqdm
from PIL import Image

# plotting (matplotlib first, then seaborn)
import matplotlib.pyplot as plt
try:
    import seaborn as sns
except Exception as e:
    sns = None
    print("Warning: seaborn import failed — continuing without it:", e)

# PyTorch (import only if available)
try:
    import torch
    from torch import nn, optim
    from torch.utils.data import DataLoader, Dataset
    import torchvision
    from torchvision import transforms, models
except Exception as e:
    torch = None
    nn = None
    optim = None
    DataLoader = None
    Dataset = None
    torchvision = None
    transforms = None
    models = None
    print("Warning: PyTorch imports failed or CUDA unavailable:", e)

# Scikit-learn (classifiers & metrics)
try:
    from sklearn.linear_model import LogisticRegression
    from sklearn.svm import SVC
    from sklearn.ensemble import RandomForestClassifier
    from sklearn.tree import DecisionTreeClassifier
    from sklearn.neural_network import MLPClassifier
    from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
    from sklearn.model_selection import train_test_split
except Exception as e:
    print("Warning: scikit-learn import failed:", e)

# UMAP / t-SNE / silhouette
try:
    import umap
except Exception as e:
    umap = None
    print("Info: umap not available:", e)

try:
    from sklearn.manifold import TSNE
    from sklearn.metrics import silhouette_score
except Exception as e:
    print("Warning importing TSNE / silhouette_score:", e)

# Save utility
try:
    import joblib
    import pickle
except Exception as e:
    print("Warning: joblib/pickle import issue:", e)

# Utility to print versions and environment diagnostics
def env_diagnostics(show_packages: Optional[list] = None):
    """Print device + common package versions to help debug environment mismatches."""
    print("Python:", sys.version.splitlines()[0])
    # PyTorch & CUDA
    if torch is not None:
        try:
            print("PyTorch:", torch.__version__)
            print("CUDA available:", torch.cuda.is_available())
            if torch.cuda.is_available():
                print("CUDA device count:", torch.cuda.device_count())
                print("CUDA current device:", torch.cuda.current_device())
                print("CUDA device name:", torch.cuda.get_device_name(torch.cuda.current_device()))
        except Exception as e:
            print("PyTorch diagnostic error:", e)
    else:
        print("PyTorch: not available")

    # seaborn / matplotlib
    try:
        import matplotlib
        print("matplotlib:", matplotlib.__version__)
    except Exception:
        print("matplotlib: not available")

    if sns is not None:
        try:
            print("seaborn:", sns.__version__)
        except Exception:
            pass

    # scikit-learn
    try:
        import sklearn
        print("scikit-learn:", sklearn.__version__)
    except Exception:
        print("scikit-learn: not available")

    # umap
    if umap is not None:
        try:
            print("umap-learn:", umap.__version__)
        except Exception:
            pass

    # If TensorFlow is installed (we don't import it by default), just print version info safely:
    try:
        import importlib
        if importlib.util.find_spec("tensorflow") is not None:
            import tensorflow as tf
            print("TensorFlow:", tf.__version__)
            # don't call anything that triggers plugin registration here
        else:
            print("TensorFlow: not installed (or not found in this env)")
    except Exception as e:
        # Avoid failing diagnostics if TF import triggers C++ plugin registrations
        print("TensorFlow import safe-check raised an exception (not fatal):", e)

# Device selection (PyTorch-first)
if torch is not None:
    DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
else:
    DEVICE = "cpu"
print("Device:", DEVICE)

# Example: run environment diagnostics immediately (optional)
env_diagnostics()


Device: cuda
Python: 3.11.13 (main, Jun  4 2025, 08:57:29) [GCC 11.4.0]
PyTorch: 2.6.0+cu124
CUDA available: True
CUDA device count: 2
CUDA current device: 0
CUDA device name: Tesla T4
matplotlib: 3.7.2
seaborn: 0.12.2
scikit-learn: 1.2.2
umap-learn: 0.5.9.post2
TensorFlow: 2.18.0


CELL 3 — Build file manifest (reads dataset structure & prints counts)

In [5]:
# ===== Final robust manifest builder (corrects Leaf / non-Leaf folders) =====

from pathlib import Path
import json, os
root = Path("/kaggle/input/betel-leaf/Betel Leaf Dataset A Primary Dataset From Field And Controlled Environment/Betel Leaf Dataset")
assert root.exists(), f"Dataset root missing: {root}"

expected_classes = ["Diseased", "Dried", "Healthy"]
sources = [p.name for p in root.iterdir() if p.is_dir()]
print("Detected top-level source folders:", sources)

filepaths = []
labels = []
found_map = {}

def normalize(name: str):
    """Utility to normalize folder names for matching."""
    return name.lower().replace(" ", "").replace("_", "").replace("-", "")

for src in sources:
    src_dir = root / src
    subdirs = [d.name for d in src_dir.iterdir() if d.is_dir()]
    print(f"\nSource '{src}' subfolders:", subdirs)

    for cls in expected_classes:
        cls_norm = normalize(cls)
        matched = None

        # Scan each subfolder to find those containing this class
        for s in subdirs:
            if cls_norm in normalize(s):  # <-- KEY FIX: match Diseased ↔ DiseasedLeaf
                matched = s
                break

        if matched is None:
            print(f"WARNING: class '{cls}' not found under '{src}'")
            continue

        found_map.setdefault(src, {})[cls] = matched
        cls_dir = src_dir / matched

        for p in cls_dir.glob("*"):
            if p.suffix.lower() in [".jpg", ".jpeg", ".png"]:
                filepaths.append(str(p))
                labels.append(expected_classes.index(cls))

print("\nTotal images found:", len(filepaths))
from collections import Counter
ctr = Counter([Path(p).parent.name for p in filepaths])
print("Per-subfolder counts:")
for k, v in ctr.items():
    print(f"  {k}: {v}")

manifest = {
    "classes": expected_classes,
    "sources_detected": sources,
    "found_map": found_map,
    "files": filepaths,
    "labels": labels
}

os.makedirs("/kaggle/working/simsiam_task4", exist_ok=True)
with open("/kaggle/working/simsiam_task4/manifest.json", "w") as f:
    json.dump(manifest, f)
print("\nManifest saved to /kaggle/working/simsiam_task4/manifest.json")

Detected top-level source folders: ['On Field', 'Controlled Environment']

Source 'On Field' subfolders: ['Diseased Leaf', 'Healthy Leaf', 'Dried Leaf']

Source 'Controlled Environment' subfolders: ['Diseased', 'Dried', 'Healthy']

Total images found: 1800
Per-subfolder counts:
  Diseased Leaf: 289
  Dried Leaf: 282
  Healthy Leaf: 336
  Diseased: 220
  Dried: 340
  Healthy: 333

Manifest saved to /kaggle/working/simsiam_task4/manifest.json


CELL 4 — Transforms (SimSiam two-view + eval transforms)

In [6]:
# =========================
# Transforms: SimSiam augmentation (two-view) and evaluation transforms
# - Uses global RESOLUTION variable set in cell 1
# =========================

# Augmentation recipe tailored for leaf images; includes safe cropping and color jitter.
# We keep it relatively strong but include an augmentation probe later in the notebook.
simsiam_transform = transforms.Compose([
    transforms.RandomResizedCrop(RESOLUTION, scale=(0.2, 1.0), ratio=(0.75, 1.33)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomVerticalFlip(p=0.2),
    transforms.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.2, hue=0.02),
    transforms.RandomGrayscale(p=0.2),
    transforms.GaussianBlur(kernel_size=(3,3), sigma=(0.1, 2.0)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485,0.456,0.406], std=[0.229,0.224,0.225])
])

# Evaluation transforms (deterministic)
eval_transform = transforms.Compose([
    transforms.Resize(int(RESOLUTION * 1.1)),
    transforms.CenterCrop(RESOLUTION),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485,0.456,0.406], std=[0.229,0.224,0.225])
])

# quick visual check function for augmentation probe (returns a list of PIL images)
def aug_probe_image(path, n=6):
    img = Image.open(path).convert("RGB")
    outs = []
    for _ in range(n):
        timg = simsiam_transform(img)
        # de-normalize for visualization
        t = timg.numpy().transpose(1,2,0)
        t = t * np.array([0.229,0.224,0.225]) + np.array([0.485,0.456,0.406])
        t = np.clip(t, 0, 1)
        outs.append((t*255).astype(np.uint8))
    return outs


CELL 5 — Dataset wrappers: TwoViewDataset + ManifestDataset

In [7]:
# =========================
# Dataset classes
# TwoViewDataset: returns two augmented views per image for SimSiam training
# ManifestDataset: deterministic dataset for evaluation/feature extraction
# =========================

class TwoViewDataset(Dataset):
    """Returns two different augmented views of the same image (for SimSiam)."""
    def __init__(self, paths, labels, transform):
        self.paths = paths
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.paths)

    def __getitem__(self, idx):
        p = self.paths[idx]
        lbl = self.labels[idx]
        img = Image.open(p).convert("RGB")
        x1 = self.transform(img)
        x2 = self.transform(img)
        return x1, x2, lbl, p

class ManifestDataset(Dataset):
    """Deterministic dataset for feature extraction and downstream training."""
    def __init__(self, paths, labels, transform):
        self.paths = paths
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.paths)

    def __getitem__(self, idx):
        p = self.paths[idx]
        lbl = self.labels[idx]
        img = Image.open(p).convert("RGB")
        if self.transform:
            img = self.transform(img)
        return img, lbl, p


CELL 6 — Create train/val/test splits and DataLoaders (deterministic split saved)

In [8]:
# =========================
# Create fixed stratified train/val/test splits and DataLoaders
# Save split manifest for reproducibility
# =========================

paths = manifest["files"]
labels = manifest["labels"]
classes = manifest["classes"]

# First split: fixed test set 20%
train_paths, test_paths, train_labels, test_labels = train_test_split(
    paths, labels, test_size=0.20, stratify=labels, random_state=SEED)

# From train, carve out validation 10% of train
train_paths, val_paths, train_labels, val_labels = train_test_split(
    train_paths, train_labels, test_size=0.10, stratify=train_labels, random_state=SEED)

print("Train:", len(train_paths), "Val:", len(val_paths), "Test:", len(test_paths))

split_manifest = {
    "classes": classes,
    "train": train_paths, "train_labels": train_labels,
    "val": val_paths, "val_labels": val_labels,
    "test": test_paths, "test_labels": test_labels
}
with open(os.path.join(OUT_DIR, "split_manifest.json"), "w") as f:
    json.dump(split_manifest, f)
print("Split manifest saved to", os.path.join(OUT_DIR, "split_manifest.json"))

# DataLoaders for pretraining (two-view)
train_dataset = TwoViewDataset(train_paths, train_labels, simsiam_transform)
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True,
                          num_workers=NUM_WORKERS, drop_last=True)

# DataLoaders for evaluation (use ManifestDataset)
val_dataset = ManifestDataset(val_paths, val_labels, eval_transform)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS)
test_dataset = ManifestDataset(test_paths, test_labels, eval_transform)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS)


Train: 1296 Val: 144 Test: 360
Split manifest saved to /kaggle/working/simsiam_task4/split_manifest.json


CELL 7 — SimSiam model definition (encoder, projector, predictor)

In [9]:
# =========================
# SimSiam model (backbone + projector + predictor)
# Implementation note: this is a compact, readable SimSiam for our project.
# =========================

class SimSiam(nn.Module):
    def __init__(self, backbone="resnet18", pretrained=False, proj_hidden=2048, pred_hidden=512, out_dim=512):
        super().__init__()
        # backbone
        if backbone == "resnet18":
            base = models.resnet18(pretrained=pretrained)
            feat_dim = 512
        elif backbone == "resnet50":
            base = models.resnet50(pretrained=pretrained)
            feat_dim = 2048
        else:
            raise ValueError("backbone must be resnet18 or resnet50")
        # remove fc
        modules = list(base.children())[:-1]  # remove avgpool? we keep avgpool and flatten path
        self.encoder = nn.Sequential(*modules)  # encoder returns [B, feat_dim, 1, 1]
        self.feat_dim = feat_dim

        # projector: 3-layer MLP
        self.projector = nn.Sequential(
            nn.Linear(feat_dim, proj_hidden),
            nn.BatchNorm1d(proj_hidden),
            nn.ReLU(inplace=True),
            nn.Linear(proj_hidden, proj_hidden),
            nn.BatchNorm1d(proj_hidden),
            nn.ReLU(inplace=True),
            nn.Linear(proj_hidden, out_dim)
        )

        # predictor: 2-layer MLP
        self.predictor = nn.Sequential(
            nn.Linear(out_dim, pred_hidden),
            nn.BatchNorm1d(pred_hidden),
            nn.ReLU(inplace=True),
            nn.Linear(pred_hidden, out_dim)
        )

    def forward_backbone(self, x):
        # encoder gives shape [B, feat_dim, 1, 1] normally; flatten to [B, feat_dim]
        h = self.encoder(x)
        h = h.view(h.size(0), -1)
        return h

    def forward(self, x1, x2):
        # returns predictor outputs and targets (projector outputs detached)
        h1 = self.forward_backbone(x1)
        h2 = self.forward_backbone(x2)
        z1 = self.projector(h1)
        z2 = self.projector(h2)
        p1 = self.predictor(z1)
        p2 = self.predictor(z2)
        # For loss: compare p1 with z2.detach() and p2 with z1.detach()
        return p1, p2, z1.detach(), z2.detach()


CELL 8 — Loss function (negative cosine similarity) & utilities

In [10]:
# =========================
# Loss: negative cosine similarity as used by SimSiam
# =========================

def negative_cosine_similarity(p, z):
    # p: [B, D] predictor outputs (not detached)
    # z: [B, D] target projector outputs (detached)
    p = nn.functional.normalize(p, dim=1)
    z = nn.functional.normalize(z, dim=1)
    return - (p * z).sum(dim=1).mean()

# checkpoint saving helper
def save_checkpoint(state, filename):
    torch.save(state, filename)
    print("Saved checkpoint:", filename)


CELL 9 — Pretraining loop (resumeable) — run this cell to pretrain SimSiam

In [10]:
# =========================
# Pretraining: SimSiam training loop with resumeable checkpointing
# Save encoder at the end as simsiam_encoder.pth
# =========================

# Hyperparams for pretraining loop (can edit if needed)
learning_rate = 0.03 * (BATCH_SIZE / 256)  # linear scaling rule; small batches => smaller lr
momentum = 0.9
weight_decay = 1e-4

# instantiate model, optimizer, scheduler
model = SimSiam(backbone=BACKBONE).to(DEVICE)
optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=momentum, weight_decay=weight_decay)
scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=PRETRAIN_EPOCHS)

# checkpoint paths
latest_ckpt = os.path.join(OUT_DIR, "simsiam_latest.pth")
best_ckpt = os.path.join(OUT_DIR, "simsiam_best_linearprobe.pth")  # kept for future extension
encoder_outpath = os.path.join(OUT_DIR, "simsiam_encoder.pth")

# optionally resume
start_epoch = 0
if os.path.exists(latest_ckpt):
    ck = torch.load(latest_ckpt, map_location=DEVICE)
    model.load_state_dict(ck["model_state"])
    optimizer.load_state_dict(ck["optimizer_state"])
    scheduler.load_state_dict(ck["scheduler_state"])
    start_epoch = ck["epoch"] + 1
    print("Resumed from checkpoint. Starting at epoch", start_epoch)

# quick function to evaluate linear probe on val using frozen encoder (used as proxy occasionally)
def extract_features_from_encoder(encoder, paths_list, transform, batch_size=64):
    encoder.eval()
    ds = ManifestDataset(paths_list, [0]*len(paths_list), transform=transform)
    loader = DataLoader(ds, batch_size=batch_size, shuffle=False, num_workers=NUM_WORKERS)
    feats = []
    with torch.no_grad():
        for imgs, _, _ in loader:
            imgs = imgs.to(DEVICE)
            h = encoder(imgs).view(imgs.size(0), -1).cpu().numpy()
            feats.append(h)
    feats = np.vstack(feats)
    return feats

# For occasional quick probing: a small linear-probe train on a subset (fast)
def quick_linear_probe(encoder, train_paths, train_labels, val_paths, val_labels, transform, max_samples=500):
    # build features (may subsample for speed)
    tpaths = train_paths[:max_samples]; tlabels = train_labels[:max_samples]
    train_feats = extract_features_from_encoder(encoder, tpaths, transform)
    val_feats = extract_features_from_encoder(encoder, val_paths, transform)
    clf = LogisticRegression(max_iter=1000)
    clf.fit(train_feats, tlabels)
    preds = clf.predict(val_feats)
    acc = accuracy_score(val_labels, preds)
    return acc

# Main training loop
print("Starting pretraining for", PRETRAIN_EPOCHS, "epochs (from epoch", start_epoch, ")")
for epoch in range(start_epoch, PRETRAIN_EPOCHS):
    model.train()
    epoch_losses = []
    loop = tqdm(train_loader, desc=f"Pretrain Epoch {epoch+1}/{PRETRAIN_EPOCHS}")
    for x1, x2, lbl, _ in loop:
        x1 = x1.to(DEVICE); x2 = x2.to(DEVICE)
        p1, p2, z1, z2 = model(x1, x2)
        loss = 0.5 * negative_cosine_similarity(p1, z2) + 0.5 * negative_cosine_similarity(p2, z1)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        epoch_losses.append(loss.item())
        loop.set_postfix(loss=f"{np.mean(epoch_losses):.4f}")

    scheduler.step()
    avg_loss = float(np.mean(epoch_losses))
    print(f"Epoch {epoch+1} finished. Avg loss: {avg_loss:.4f}")

    # checkpoint: save latest (includes RNG/manifest information for reproducibility)
    ck = {
        "epoch": epoch,
        "model_state": model.state_dict(),
        "optimizer_state": optimizer.state_dict(),
        "scheduler_state": scheduler.state_dict(),
        "avg_loss": avg_loss,
        "manifest": split_manifest
    }
    save_checkpoint(ck, latest_ckpt)

# After training save encoder state dict (only encoder weights; projector/predictor not needed downstream)
torch.save({"encoder_state_dict": model.encoder.state_dict(), "feat_dim": model.feat_dim},
           encoder_outpath)
print("Pretraining complete. Encoder saved to", encoder_outpath)




Starting pretraining for 100 epochs (from epoch 0 )


Pretrain Epoch 1/100: 100%|██████████| 20/20 [06:57<00:00, 20.89s/it, loss=-0.1287]


Epoch 1 finished. Avg loss: -0.1287
Saved checkpoint: /kaggle/working/simsiam_task4/simsiam_latest.pth


Pretrain Epoch 2/100: 100%|██████████| 20/20 [06:21<00:00, 19.07s/it, loss=-0.4303]


Epoch 2 finished. Avg loss: -0.4303
Saved checkpoint: /kaggle/working/simsiam_task4/simsiam_latest.pth


Pretrain Epoch 3/100: 100%|██████████| 20/20 [06:14<00:00, 18.73s/it, loss=-0.5787]


Epoch 3 finished. Avg loss: -0.5787
Saved checkpoint: /kaggle/working/simsiam_task4/simsiam_latest.pth


Pretrain Epoch 4/100: 100%|██████████| 20/20 [06:10<00:00, 18.51s/it, loss=-0.6860]


Epoch 4 finished. Avg loss: -0.6860
Saved checkpoint: /kaggle/working/simsiam_task4/simsiam_latest.pth


Pretrain Epoch 5/100: 100%|██████████| 20/20 [06:19<00:00, 18.96s/it, loss=-0.7681]


Epoch 5 finished. Avg loss: -0.7681
Saved checkpoint: /kaggle/working/simsiam_task4/simsiam_latest.pth


Pretrain Epoch 6/100: 100%|██████████| 20/20 [06:10<00:00, 18.54s/it, loss=-0.8111]


Epoch 6 finished. Avg loss: -0.8111
Saved checkpoint: /kaggle/working/simsiam_task4/simsiam_latest.pth


Pretrain Epoch 7/100: 100%|██████████| 20/20 [06:13<00:00, 18.67s/it, loss=-0.8327]


Epoch 7 finished. Avg loss: -0.8327
Saved checkpoint: /kaggle/working/simsiam_task4/simsiam_latest.pth


Pretrain Epoch 8/100: 100%|██████████| 20/20 [06:14<00:00, 18.72s/it, loss=-0.8515]


Epoch 8 finished. Avg loss: -0.8515
Saved checkpoint: /kaggle/working/simsiam_task4/simsiam_latest.pth


Pretrain Epoch 9/100: 100%|██████████| 20/20 [06:17<00:00, 18.88s/it, loss=-0.8444]


Epoch 9 finished. Avg loss: -0.8444
Saved checkpoint: /kaggle/working/simsiam_task4/simsiam_latest.pth


Pretrain Epoch 10/100: 100%|██████████| 20/20 [06:17<00:00, 18.89s/it, loss=-0.8696]


Epoch 10 finished. Avg loss: -0.8696
Saved checkpoint: /kaggle/working/simsiam_task4/simsiam_latest.pth


Pretrain Epoch 11/100: 100%|██████████| 20/20 [06:19<00:00, 18.98s/it, loss=-0.8581]


Epoch 11 finished. Avg loss: -0.8581
Saved checkpoint: /kaggle/working/simsiam_task4/simsiam_latest.pth


Pretrain Epoch 12/100: 100%|██████████| 20/20 [06:14<00:00, 18.72s/it, loss=-0.8753]


Epoch 12 finished. Avg loss: -0.8753
Saved checkpoint: /kaggle/working/simsiam_task4/simsiam_latest.pth


Pretrain Epoch 13/100: 100%|██████████| 20/20 [06:06<00:00, 18.35s/it, loss=-0.8759]


Epoch 13 finished. Avg loss: -0.8759
Saved checkpoint: /kaggle/working/simsiam_task4/simsiam_latest.pth


Pretrain Epoch 14/100: 100%|██████████| 20/20 [06:13<00:00, 18.68s/it, loss=-0.8736]


Epoch 14 finished. Avg loss: -0.8736
Saved checkpoint: /kaggle/working/simsiam_task4/simsiam_latest.pth


Pretrain Epoch 15/100: 100%|██████████| 20/20 [06:13<00:00, 18.65s/it, loss=-0.8942]


Epoch 15 finished. Avg loss: -0.8942
Saved checkpoint: /kaggle/working/simsiam_task4/simsiam_latest.pth


Pretrain Epoch 16/100: 100%|██████████| 20/20 [06:19<00:00, 18.98s/it, loss=-0.8858]


Epoch 16 finished. Avg loss: -0.8858
Saved checkpoint: /kaggle/working/simsiam_task4/simsiam_latest.pth


Pretrain Epoch 17/100: 100%|██████████| 20/20 [06:13<00:00, 18.67s/it, loss=-0.8934]


Epoch 17 finished. Avg loss: -0.8934
Saved checkpoint: /kaggle/working/simsiam_task4/simsiam_latest.pth


Pretrain Epoch 18/100: 100%|██████████| 20/20 [05:59<00:00, 18.00s/it, loss=-0.8937]


Epoch 18 finished. Avg loss: -0.8937
Saved checkpoint: /kaggle/working/simsiam_task4/simsiam_latest.pth


Pretrain Epoch 19/100: 100%|██████████| 20/20 [06:17<00:00, 18.90s/it, loss=-0.8942]


Epoch 19 finished. Avg loss: -0.8942
Saved checkpoint: /kaggle/working/simsiam_task4/simsiam_latest.pth


Pretrain Epoch 20/100: 100%|██████████| 20/20 [06:19<00:00, 18.97s/it, loss=-0.9003]


Epoch 20 finished. Avg loss: -0.9003
Saved checkpoint: /kaggle/working/simsiam_task4/simsiam_latest.pth


Pretrain Epoch 21/100: 100%|██████████| 20/20 [06:11<00:00, 18.59s/it, loss=-0.8986]


Epoch 21 finished. Avg loss: -0.8986
Saved checkpoint: /kaggle/working/simsiam_task4/simsiam_latest.pth


Pretrain Epoch 22/100: 100%|██████████| 20/20 [06:11<00:00, 18.58s/it, loss=-0.8984]


Epoch 22 finished. Avg loss: -0.8984
Saved checkpoint: /kaggle/working/simsiam_task4/simsiam_latest.pth


Pretrain Epoch 23/100: 100%|██████████| 20/20 [06:20<00:00, 19.04s/it, loss=-0.8972]


Epoch 23 finished. Avg loss: -0.8972
Saved checkpoint: /kaggle/working/simsiam_task4/simsiam_latest.pth


Pretrain Epoch 24/100: 100%|██████████| 20/20 [06:18<00:00, 18.91s/it, loss=-0.8948]


Epoch 24 finished. Avg loss: -0.8948
Saved checkpoint: /kaggle/working/simsiam_task4/simsiam_latest.pth


Pretrain Epoch 25/100: 100%|██████████| 20/20 [06:03<00:00, 18.16s/it, loss=-0.8922]


Epoch 25 finished. Avg loss: -0.8922
Saved checkpoint: /kaggle/working/simsiam_task4/simsiam_latest.pth


Pretrain Epoch 26/100: 100%|██████████| 20/20 [06:19<00:00, 18.97s/it, loss=-0.8974]


Epoch 26 finished. Avg loss: -0.8974
Saved checkpoint: /kaggle/working/simsiam_task4/simsiam_latest.pth


Pretrain Epoch 27/100: 100%|██████████| 20/20 [06:16<00:00, 18.80s/it, loss=-0.8999]


Epoch 27 finished. Avg loss: -0.8999
Saved checkpoint: /kaggle/working/simsiam_task4/simsiam_latest.pth


Pretrain Epoch 28/100: 100%|██████████| 20/20 [06:10<00:00, 18.54s/it, loss=-0.9013]


Epoch 28 finished. Avg loss: -0.9013
Saved checkpoint: /kaggle/working/simsiam_task4/simsiam_latest.pth


Pretrain Epoch 29/100: 100%|██████████| 20/20 [06:20<00:00, 19.02s/it, loss=-0.9023]


Epoch 29 finished. Avg loss: -0.9023
Saved checkpoint: /kaggle/working/simsiam_task4/simsiam_latest.pth


Pretrain Epoch 30/100: 100%|██████████| 20/20 [06:21<00:00, 19.08s/it, loss=-0.9070]


Epoch 30 finished. Avg loss: -0.9070
Saved checkpoint: /kaggle/working/simsiam_task4/simsiam_latest.pth


Pretrain Epoch 31/100: 100%|██████████| 20/20 [06:21<00:00, 19.06s/it, loss=-0.9009]


Epoch 31 finished. Avg loss: -0.9009
Saved checkpoint: /kaggle/working/simsiam_task4/simsiam_latest.pth


Pretrain Epoch 32/100: 100%|██████████| 20/20 [06:28<00:00, 19.40s/it, loss=-0.9014]


Epoch 32 finished. Avg loss: -0.9014
Saved checkpoint: /kaggle/working/simsiam_task4/simsiam_latest.pth


Pretrain Epoch 33/100: 100%|██████████| 20/20 [06:12<00:00, 18.64s/it, loss=-0.8989]


Epoch 33 finished. Avg loss: -0.8989
Saved checkpoint: /kaggle/working/simsiam_task4/simsiam_latest.pth


Pretrain Epoch 34/100: 100%|██████████| 20/20 [06:18<00:00, 18.91s/it, loss=-0.9083]


Epoch 34 finished. Avg loss: -0.9083
Saved checkpoint: /kaggle/working/simsiam_task4/simsiam_latest.pth


Pretrain Epoch 35/100: 100%|██████████| 20/20 [06:20<00:00, 19.03s/it, loss=-0.9056]


Epoch 35 finished. Avg loss: -0.9056
Saved checkpoint: /kaggle/working/simsiam_task4/simsiam_latest.pth


Pretrain Epoch 36/100: 100%|██████████| 20/20 [06:13<00:00, 18.67s/it, loss=-0.9021]


Epoch 36 finished. Avg loss: -0.9021
Saved checkpoint: /kaggle/working/simsiam_task4/simsiam_latest.pth


Pretrain Epoch 37/100: 100%|██████████| 20/20 [06:19<00:00, 18.96s/it, loss=-0.9027]


Epoch 37 finished. Avg loss: -0.9027
Saved checkpoint: /kaggle/working/simsiam_task4/simsiam_latest.pth


Pretrain Epoch 38/100: 100%|██████████| 20/20 [06:21<00:00, 19.06s/it, loss=-0.9024]


Epoch 38 finished. Avg loss: -0.9024
Saved checkpoint: /kaggle/working/simsiam_task4/simsiam_latest.pth


Pretrain Epoch 39/100: 100%|██████████| 20/20 [06:17<00:00, 18.87s/it, loss=-0.9041]


Epoch 39 finished. Avg loss: -0.9041
Saved checkpoint: /kaggle/working/simsiam_task4/simsiam_latest.pth


Pretrain Epoch 40/100: 100%|██████████| 20/20 [06:15<00:00, 18.77s/it, loss=-0.9015]


Epoch 40 finished. Avg loss: -0.9015
Saved checkpoint: /kaggle/working/simsiam_task4/simsiam_latest.pth


Pretrain Epoch 41/100: 100%|██████████| 20/20 [06:12<00:00, 18.61s/it, loss=-0.8954]


Epoch 41 finished. Avg loss: -0.8954
Saved checkpoint: /kaggle/working/simsiam_task4/simsiam_latest.pth


Pretrain Epoch 42/100: 100%|██████████| 20/20 [06:16<00:00, 18.84s/it, loss=-0.9000]


Epoch 42 finished. Avg loss: -0.9000
Saved checkpoint: /kaggle/working/simsiam_task4/simsiam_latest.pth


Pretrain Epoch 43/100: 100%|██████████| 20/20 [06:20<00:00, 19.05s/it, loss=-0.9058]


Epoch 43 finished. Avg loss: -0.9058
Saved checkpoint: /kaggle/working/simsiam_task4/simsiam_latest.pth


Pretrain Epoch 44/100: 100%|██████████| 20/20 [06:22<00:00, 19.13s/it, loss=-0.9043]


Epoch 44 finished. Avg loss: -0.9043
Saved checkpoint: /kaggle/working/simsiam_task4/simsiam_latest.pth


Pretrain Epoch 45/100: 100%|██████████| 20/20 [06:25<00:00, 19.29s/it, loss=-0.9010]


Epoch 45 finished. Avg loss: -0.9010
Saved checkpoint: /kaggle/working/simsiam_task4/simsiam_latest.pth


Pretrain Epoch 46/100: 100%|██████████| 20/20 [06:22<00:00, 19.13s/it, loss=-0.9076]


Epoch 46 finished. Avg loss: -0.9076
Saved checkpoint: /kaggle/working/simsiam_task4/simsiam_latest.pth


Pretrain Epoch 47/100: 100%|██████████| 20/20 [06:16<00:00, 18.82s/it, loss=-0.9098]


Epoch 47 finished. Avg loss: -0.9098
Saved checkpoint: /kaggle/working/simsiam_task4/simsiam_latest.pth


Pretrain Epoch 48/100: 100%|██████████| 20/20 [06:14<00:00, 18.73s/it, loss=-0.9045]


Epoch 48 finished. Avg loss: -0.9045
Saved checkpoint: /kaggle/working/simsiam_task4/simsiam_latest.pth


Pretrain Epoch 49/100: 100%|██████████| 20/20 [06:23<00:00, 19.17s/it, loss=-0.9105]


Epoch 49 finished. Avg loss: -0.9105
Saved checkpoint: /kaggle/working/simsiam_task4/simsiam_latest.pth


Pretrain Epoch 50/100: 100%|██████████| 20/20 [06:11<00:00, 18.60s/it, loss=-0.9072]


Epoch 50 finished. Avg loss: -0.9072
Saved checkpoint: /kaggle/working/simsiam_task4/simsiam_latest.pth


Pretrain Epoch 51/100: 100%|██████████| 20/20 [06:22<00:00, 19.13s/it, loss=-0.9112]


Epoch 51 finished. Avg loss: -0.9112
Saved checkpoint: /kaggle/working/simsiam_task4/simsiam_latest.pth


Pretrain Epoch 52/100: 100%|██████████| 20/20 [06:18<00:00, 18.93s/it, loss=-0.9114]


Epoch 52 finished. Avg loss: -0.9114
Saved checkpoint: /kaggle/working/simsiam_task4/simsiam_latest.pth


Pretrain Epoch 53/100: 100%|██████████| 20/20 [06:14<00:00, 18.74s/it, loss=-0.9047]


Epoch 53 finished. Avg loss: -0.9047
Saved checkpoint: /kaggle/working/simsiam_task4/simsiam_latest.pth


Pretrain Epoch 54/100: 100%|██████████| 20/20 [06:29<00:00, 19.47s/it, loss=-0.8987]


Epoch 54 finished. Avg loss: -0.8987
Saved checkpoint: /kaggle/working/simsiam_task4/simsiam_latest.pth


Pretrain Epoch 55/100: 100%|██████████| 20/20 [06:21<00:00, 19.05s/it, loss=-0.9183]


Epoch 55 finished. Avg loss: -0.9183
Saved checkpoint: /kaggle/working/simsiam_task4/simsiam_latest.pth


Pretrain Epoch 56/100: 100%|██████████| 20/20 [06:15<00:00, 18.76s/it, loss=-0.9190]


Epoch 56 finished. Avg loss: -0.9190
Saved checkpoint: /kaggle/working/simsiam_task4/simsiam_latest.pth


Pretrain Epoch 57/100: 100%|██████████| 20/20 [06:17<00:00, 18.88s/it, loss=-0.9139]


Epoch 57 finished. Avg loss: -0.9139
Saved checkpoint: /kaggle/working/simsiam_task4/simsiam_latest.pth


Pretrain Epoch 58/100: 100%|██████████| 20/20 [06:24<00:00, 19.23s/it, loss=-0.9146]


Epoch 58 finished. Avg loss: -0.9146
Saved checkpoint: /kaggle/working/simsiam_task4/simsiam_latest.pth


Pretrain Epoch 59/100: 100%|██████████| 20/20 [06:18<00:00, 18.92s/it, loss=-0.9131]


Epoch 59 finished. Avg loss: -0.9131
Saved checkpoint: /kaggle/working/simsiam_task4/simsiam_latest.pth


Pretrain Epoch 60/100: 100%|██████████| 20/20 [06:21<00:00, 19.06s/it, loss=-0.9122]


Epoch 60 finished. Avg loss: -0.9122
Saved checkpoint: /kaggle/working/simsiam_task4/simsiam_latest.pth


Pretrain Epoch 61/100: 100%|██████████| 20/20 [06:19<00:00, 18.98s/it, loss=-0.9176]


Epoch 61 finished. Avg loss: -0.9176
Saved checkpoint: /kaggle/working/simsiam_task4/simsiam_latest.pth


Pretrain Epoch 62/100: 100%|██████████| 20/20 [06:19<00:00, 18.97s/it, loss=-0.9130]


Epoch 62 finished. Avg loss: -0.9130
Saved checkpoint: /kaggle/working/simsiam_task4/simsiam_latest.pth


Pretrain Epoch 63/100: 100%|██████████| 20/20 [06:18<00:00, 18.93s/it, loss=-0.9147]


Epoch 63 finished. Avg loss: -0.9147
Saved checkpoint: /kaggle/working/simsiam_task4/simsiam_latest.pth


Pretrain Epoch 64/100: 100%|██████████| 20/20 [06:21<00:00, 19.07s/it, loss=-0.9130]


Epoch 64 finished. Avg loss: -0.9130
Saved checkpoint: /kaggle/working/simsiam_task4/simsiam_latest.pth


Pretrain Epoch 65/100: 100%|██████████| 20/20 [06:19<00:00, 18.99s/it, loss=-0.9130]


Epoch 65 finished. Avg loss: -0.9130
Saved checkpoint: /kaggle/working/simsiam_task4/simsiam_latest.pth


Pretrain Epoch 66/100: 100%|██████████| 20/20 [06:20<00:00, 19.02s/it, loss=-0.9187]


Epoch 66 finished. Avg loss: -0.9187
Saved checkpoint: /kaggle/working/simsiam_task4/simsiam_latest.pth


Pretrain Epoch 67/100: 100%|██████████| 20/20 [06:19<00:00, 18.97s/it, loss=-0.9150]


Epoch 67 finished. Avg loss: -0.9150
Saved checkpoint: /kaggle/working/simsiam_task4/simsiam_latest.pth


Pretrain Epoch 68/100: 100%|██████████| 20/20 [06:05<00:00, 18.28s/it, loss=-0.9201]


Epoch 68 finished. Avg loss: -0.9201
Saved checkpoint: /kaggle/working/simsiam_task4/simsiam_latest.pth


Pretrain Epoch 69/100: 100%|██████████| 20/20 [06:22<00:00, 19.14s/it, loss=-0.9135]


Epoch 69 finished. Avg loss: -0.9135
Saved checkpoint: /kaggle/working/simsiam_task4/simsiam_latest.pth


Pretrain Epoch 70/100: 100%|██████████| 20/20 [06:23<00:00, 19.18s/it, loss=-0.9123]


Epoch 70 finished. Avg loss: -0.9123
Saved checkpoint: /kaggle/working/simsiam_task4/simsiam_latest.pth


Pretrain Epoch 71/100: 100%|██████████| 20/20 [06:22<00:00, 19.10s/it, loss=-0.9177]


Epoch 71 finished. Avg loss: -0.9177
Saved checkpoint: /kaggle/working/simsiam_task4/simsiam_latest.pth


Pretrain Epoch 72/100: 100%|██████████| 20/20 [06:20<00:00, 19.03s/it, loss=-0.9154]


Epoch 72 finished. Avg loss: -0.9154
Saved checkpoint: /kaggle/working/simsiam_task4/simsiam_latest.pth


Pretrain Epoch 73/100: 100%|██████████| 20/20 [06:24<00:00, 19.22s/it, loss=-0.9192]


Epoch 73 finished. Avg loss: -0.9192
Saved checkpoint: /kaggle/working/simsiam_task4/simsiam_latest.pth


Pretrain Epoch 74/100: 100%|██████████| 20/20 [06:22<00:00, 19.13s/it, loss=-0.9154]


Epoch 74 finished. Avg loss: -0.9154
Saved checkpoint: /kaggle/working/simsiam_task4/simsiam_latest.pth


Pretrain Epoch 75/100: 100%|██████████| 20/20 [06:21<00:00, 19.06s/it, loss=-0.9158]


Epoch 75 finished. Avg loss: -0.9158
Saved checkpoint: /kaggle/working/simsiam_task4/simsiam_latest.pth


Pretrain Epoch 76/100: 100%|██████████| 20/20 [06:22<00:00, 19.13s/it, loss=-0.9128]


Epoch 76 finished. Avg loss: -0.9128
Saved checkpoint: /kaggle/working/simsiam_task4/simsiam_latest.pth


Pretrain Epoch 77/100: 100%|██████████| 20/20 [06:13<00:00, 18.69s/it, loss=-0.9191]


Epoch 77 finished. Avg loss: -0.9191
Saved checkpoint: /kaggle/working/simsiam_task4/simsiam_latest.pth


Pretrain Epoch 78/100: 100%|██████████| 20/20 [06:13<00:00, 18.65s/it, loss=-0.9230]


Epoch 78 finished. Avg loss: -0.9230
Saved checkpoint: /kaggle/working/simsiam_task4/simsiam_latest.pth


Pretrain Epoch 79/100: 100%|██████████| 20/20 [06:17<00:00, 18.88s/it, loss=-0.9172]


Epoch 79 finished. Avg loss: -0.9172
Saved checkpoint: /kaggle/working/simsiam_task4/simsiam_latest.pth


Pretrain Epoch 80/100: 100%|██████████| 20/20 [06:15<00:00, 18.77s/it, loss=-0.9178]


Epoch 80 finished. Avg loss: -0.9178
Saved checkpoint: /kaggle/working/simsiam_task4/simsiam_latest.pth


Pretrain Epoch 81/100: 100%|██████████| 20/20 [06:12<00:00, 18.60s/it, loss=-0.9160]


Epoch 81 finished. Avg loss: -0.9160
Saved checkpoint: /kaggle/working/simsiam_task4/simsiam_latest.pth


Pretrain Epoch 82/100: 100%|██████████| 20/20 [06:21<00:00, 19.06s/it, loss=-0.9195]


Epoch 82 finished. Avg loss: -0.9195
Saved checkpoint: /kaggle/working/simsiam_task4/simsiam_latest.pth


Pretrain Epoch 83/100: 100%|██████████| 20/20 [06:15<00:00, 18.76s/it, loss=-0.9179]


Epoch 83 finished. Avg loss: -0.9179
Saved checkpoint: /kaggle/working/simsiam_task4/simsiam_latest.pth


Pretrain Epoch 84/100: 100%|██████████| 20/20 [06:18<00:00, 18.92s/it, loss=-0.9208]


Epoch 84 finished. Avg loss: -0.9208
Saved checkpoint: /kaggle/working/simsiam_task4/simsiam_latest.pth


Pretrain Epoch 85/100: 100%|██████████| 20/20 [05:50<00:00, 17.52s/it, loss=-0.9239]


Epoch 85 finished. Avg loss: -0.9239
Saved checkpoint: /kaggle/working/simsiam_task4/simsiam_latest.pth


Pretrain Epoch 86/100: 100%|██████████| 20/20 [06:19<00:00, 18.96s/it, loss=-0.9157]


Epoch 86 finished. Avg loss: -0.9157
Saved checkpoint: /kaggle/working/simsiam_task4/simsiam_latest.pth


Pretrain Epoch 87/100: 100%|██████████| 20/20 [06:20<00:00, 19.01s/it, loss=-0.9111]


Epoch 87 finished. Avg loss: -0.9111
Saved checkpoint: /kaggle/working/simsiam_task4/simsiam_latest.pth


Pretrain Epoch 88/100: 100%|██████████| 20/20 [06:11<00:00, 18.60s/it, loss=-0.9194]


Epoch 88 finished. Avg loss: -0.9194
Saved checkpoint: /kaggle/working/simsiam_task4/simsiam_latest.pth


Pretrain Epoch 89/100: 100%|██████████| 20/20 [06:14<00:00, 18.72s/it, loss=-0.9169]


Epoch 89 finished. Avg loss: -0.9169
Saved checkpoint: /kaggle/working/simsiam_task4/simsiam_latest.pth


Pretrain Epoch 90/100: 100%|██████████| 20/20 [06:14<00:00, 18.71s/it, loss=-0.9176]


Epoch 90 finished. Avg loss: -0.9176
Saved checkpoint: /kaggle/working/simsiam_task4/simsiam_latest.pth


Pretrain Epoch 91/100: 100%|██████████| 20/20 [06:17<00:00, 18.87s/it, loss=-0.9189]


Epoch 91 finished. Avg loss: -0.9189
Saved checkpoint: /kaggle/working/simsiam_task4/simsiam_latest.pth


Pretrain Epoch 92/100: 100%|██████████| 20/20 [05:32<00:00, 16.64s/it, loss=-0.9194]


Epoch 92 finished. Avg loss: -0.9194
Saved checkpoint: /kaggle/working/simsiam_task4/simsiam_latest.pth


Pretrain Epoch 93/100: 100%|██████████| 20/20 [06:16<00:00, 18.81s/it, loss=-0.9203]


Epoch 93 finished. Avg loss: -0.9203
Saved checkpoint: /kaggle/working/simsiam_task4/simsiam_latest.pth


Pretrain Epoch 94/100: 100%|██████████| 20/20 [06:14<00:00, 18.73s/it, loss=-0.9212]


Epoch 94 finished. Avg loss: -0.9212
Saved checkpoint: /kaggle/working/simsiam_task4/simsiam_latest.pth


Pretrain Epoch 95/100: 100%|██████████| 20/20 [06:16<00:00, 18.82s/it, loss=-0.9223]


Epoch 95 finished. Avg loss: -0.9223
Saved checkpoint: /kaggle/working/simsiam_task4/simsiam_latest.pth


Pretrain Epoch 96/100: 100%|██████████| 20/20 [06:11<00:00, 18.57s/it, loss=-0.9198]


Epoch 96 finished. Avg loss: -0.9198
Saved checkpoint: /kaggle/working/simsiam_task4/simsiam_latest.pth


Pretrain Epoch 97/100: 100%|██████████| 20/20 [06:14<00:00, 18.74s/it, loss=-0.9226]


Epoch 97 finished. Avg loss: -0.9226
Saved checkpoint: /kaggle/working/simsiam_task4/simsiam_latest.pth


Pretrain Epoch 98/100: 100%|██████████| 20/20 [06:11<00:00, 18.56s/it, loss=-0.9148]


Epoch 98 finished. Avg loss: -0.9148
Saved checkpoint: /kaggle/working/simsiam_task4/simsiam_latest.pth


Pretrain Epoch 99/100: 100%|██████████| 20/20 [06:18<00:00, 18.95s/it, loss=-0.9190]


Epoch 99 finished. Avg loss: -0.9190
Saved checkpoint: /kaggle/working/simsiam_task4/simsiam_latest.pth


Pretrain Epoch 100/100: 100%|██████████| 20/20 [06:15<00:00, 18.76s/it, loss=-0.9217]


Epoch 100 finished. Avg loss: -0.9217
Saved checkpoint: /kaggle/working/simsiam_task4/simsiam_latest.pth
Pretraining complete. Encoder saved to /kaggle/working/simsiam_task4/simsiam_encoder.pth


## Save Stage

In [11]:
# =========================
# Cell: Save & Export (run after Cell 9 finishes)
# Saves checkpoints, RNG states, manifests, encoder and zips OUT_DIR for download.
# =========================

import os, json, time, pickle, sys, shutil
import torch

OUT_DIR = "/kaggle/working/simsiam_task4"   # same as notebook config
archive_path = "/kaggle/working/simsiam_task4_archive.zip"

os.makedirs(OUT_DIR, exist_ok=True)

# 1) Save a reproducible checkpoint if model exists in memory
try:
    ck = {
        "timestamp": time.time(),
        "python_version": sys.version,
        "manifest": globals().get("manifest", None),
        "split_manifest": globals().get("split_manifest", None),
        "seed": globals().get("SEED", None),
    }
    # model/optimizer/scheduler/epoch if available
    if "model" in globals():
        ck["model_state"] = model.state_dict()
    if "optimizer" in globals():
        ck["optimizer_state"] = optimizer.state_dict()
    if "scheduler" in globals():
        try:
            ck["scheduler_state"] = scheduler.state_dict()
        except Exception:
            pass
    # epoch if defined
    ck["last_epoch"] = globals().get("epoch", globals().get("start_epoch", None))
    # RNG states
    import random, numpy as np
    ck["py_random_state"] = random.getstate()
    ck["np_random_state"] = np.random.get_state()
    ck["torch_cpu_rng"] = torch.get_rng_state()
    if torch.cuda.is_available():
        try:
            ck["torch_cuda_rng_all"] = torch.cuda.get_rng_state_all()
        except Exception:
            pass
    # save
    ck_path = os.path.join(OUT_DIR, "manual_checkpoint_after_pretrain.pth")
    torch.save(ck, ck_path)
    print("Saved manual checkpoint to:", ck_path)
except Exception as e:
    print("Warning: could not save model/optimizer states:", str(e))

# 2) Save encoder separately if present
try:
    if "model" in globals() and hasattr(model, "encoder"):
        enc_path = os.path.join(OUT_DIR, "simsiam_encoder_memory.pth")
        torch.save({"encoder_state_dict": model.encoder.state_dict(), "feat_dim": model.feat_dim}, enc_path)
        print("Saved in-memory encoder to:", enc_path)
    elif os.path.exists(os.path.join(OUT_DIR, "simsiam_encoder.pth")):
        print("Encoder checkpoint already on disk:", os.path.join(OUT_DIR, "simsiam_encoder.pth"))
except Exception as e:
    print("Warning saving encoder:", str(e))

# 3) Ensure manifests are saved to OUT_DIR (copy if present in /kaggle/working)
for fname in ["manifest.json", "split_manifest.json"]:
    src_candidates = [
        os.path.join(OUT_DIR, fname),
        os.path.join("/kaggle/working/simsiam_task4", fname),
        fname
    ]
    for s in src_candidates:
        if os.path.exists(s) and not os.path.exists(os.path.join(OUT_DIR, fname)):
            try:
                shutil.copy(s, os.path.join(OUT_DIR, fname))
                print("Copied manifest to OUT_DIR:", fname)
            except Exception:
                pass

# 4) Save a requirements snapshot
try:
    req_path = os.path.join(OUT_DIR, "requirements.txt")
    # pip freeze may be available on Kaggle
    import subprocess
    with open(req_path, "w") as f:
        subprocess.run([sys.executable, "-m", "pip", "freeze"], stdout=f)
    print("Saved package list to:", req_path)
except Exception as e:
    print("Warning saving requirements:", str(e))

# 5) Zip the OUT_DIR for download (overwrites existing)
try:
    if os.path.exists(archive_path):
        os.remove(archive_path)
    shutil.make_archive(base_name=archive_path.replace(".zip",""), format="zip", root_dir=OUT_DIR)
    print("Created archive:", archive_path)
    # list archive
    print("Archive size (bytes):", os.path.getsize(archive_path))
except Exception as e:
    print("Error creating archive:", str(e))

# 6) Quick listing of artifacts to confirm
print("\nSample files in OUT_DIR:")
for root, dirs, files in os.walk(OUT_DIR):
    sample = files[:10]
    print(root, "->", sample)
    break

print("\nDONE: Download the file 'simsiam_task4_archive.zip' from the Kaggle notebook output files (right panel).")

Saved manual checkpoint to: /kaggle/working/simsiam_task4/manual_checkpoint_after_pretrain.pth
Saved in-memory encoder to: /kaggle/working/simsiam_task4/simsiam_encoder_memory.pth
Saved package list to: /kaggle/working/simsiam_task4/requirements.txt
Created archive: /kaggle/working/simsiam_task4_archive.zip
Archive size (bytes): 350052858

Sample files in OUT_DIR:
/kaggle/working/simsiam_task4 -> ['split_manifest.json', 'requirements.txt', 'simsiam_latest.pth', 'simsiam_encoder_memory.pth', 'manual_checkpoint_after_pretrain.pth', 'manifest.json', 'simsiam_encoder.pth']

DONE: Download the file 'simsiam_task4_archive.zip' from the Kaggle notebook output files (right panel).


## Load Stage

In [12]:
# ====== Load & Restore (fixed for torch.load unpickling) ======
import os, json, shutil, torch, random, numpy as np

ARCHIVE_DIR = "/kaggle/input/simsiam-task4-archive"   # your extracted archive path
OUT_DIR = "/kaggle/working/simsiam_task4"
os.makedirs(OUT_DIR, exist_ok=True)

# Copy files from archive to OUT_DIR
for fname in os.listdir(ARCHIVE_DIR):
    src = os.path.join(ARCHIVE_DIR, fname)
    dst = os.path.join(OUT_DIR, fname)
    if not os.path.exists(dst):
        try:
            shutil.copy(src, dst)
        except:
            pass

print("Copied archive files to:", OUT_DIR)

# Load manifests
manifest = None
split_manifest = None
if os.path.exists(os.path.join(OUT_DIR,"manifest.json")):
    with open(os.path.join(OUT_DIR,"manifest.json"),"r") as f:
        manifest = json.load(f)
if os.path.exists(os.path.join(OUT_DIR,"split_manifest.json")):
    with open(os.path.join(OUT_DIR,"split_manifest.json"),"r") as f:
        split_manifest = json.load(f)

print("manifest loaded:", bool(manifest), "split_manifest loaded:", bool(split_manifest))

# Load manual checkpoint (FIXED)
ckpt_path = os.path.join(OUT_DIR, "manual_checkpoint_after_pretrain.pth")
ck = None
if os.path.exists(ckpt_path):
    print("Loading checkpoint with weights_only=False ...")
    ck = torch.load(ckpt_path, map_location="cpu", weights_only=False)
    print("Loaded checkpoint:", ckpt_path)

    # restore RNG states if present
    try:
        if "py_random_state" in ck: random.setstate(ck["py_random_state"])
        if "np_random_state" in ck: np.random.set_state(ck["np_random_state"])
        if "torch_cpu_rng" in ck: torch.set_rng_state(ck["torch_cpu_rng"])
        if torch.cuda.is_available() and "torch_cuda_rng_all" in ck:
            try: torch.cuda.set_rng_state_all(ck["torch_cuda_rng_all"])
            except: pass
    except Exception as e:
        print("Warning restoring RNG:", e)
else:
    print("WARNING: Checkpoint not found:", ckpt_path)

# Load encoder checkpoint
enc_ck = None
for p in [
    os.path.join(OUT_DIR,"simsiam_encoder_memory.pth"),
    os.path.join(OUT_DIR,"simsiam_encoder.pth")
]:
    if os.path.exists(p):
        enc_ck = torch.load(p, map_location="cpu", weights_only=False)
        print("Loaded encoder:", p)
        break

encoder = None
if enc_ck is not None and "SimSiam" in globals():
    try:
        BACKBONE = globals().get("BACKBONE", "resnet18")
        model = SimSiam(backbone=BACKBONE)
        model.encoder.load_state_dict(enc_ck["encoder_state_dict"])
        encoder = model.encoder
        print("Encoder restored in memory.")
    except Exception as e:
        print("Could not attach encoder to SimSiam model:", e)
else:
    print("Encoder available in enc_ck but SimSiam class not defined yet.")

globals().update({
    "manifest": manifest,
    "split_manifest": split_manifest,
    "checkpoint_dict": ck,
    "encoder_ck": enc_ck,
    "encoder": encoder
})

print("\nRestore summary:")
print(" manifest:", bool(manifest))
print(" split_manifest:", bool(split_manifest))
print(" checkpoint loaded:", bool(ck))
print(" encoder loaded:", encoder is not None)
print("You can now continue from Cell 10.")


Copied archive files to: /kaggle/working/simsiam_task4
manifest loaded: True split_manifest loaded: True
Loading checkpoint with weights_only=False ...
Loaded checkpoint: /kaggle/working/simsiam_task4/manual_checkpoint_after_pretrain.pth
Loaded encoder: /kaggle/working/simsiam_task4/simsiam_encoder_memory.pth
Encoder restored in memory.

Restore summary:
 manifest: True
 split_manifest: True
 checkpoint loaded: True
 encoder loaded: True
You can now continue from Cell 10.


## 2nd Loader

In [14]:
# ====== Restore train/val/test splits from split_manifest.json ======

import json, os

split_path = "/kaggle/working/simsiam_task4/split_manifest.json"

with open(split_path, "r") as f:
    split = json.load(f)

train_paths = split["train"]
train_labels = split["train_labels"]
val_paths   = split["val"]
val_labels  = split["val_labels"]
test_paths  = split["test"]
test_labels = split["test_labels"]

classes = split["classes"] if "classes" in split else ["Diseased", "Dried", "Healthy"]

print("Loaded splits:")
print("Train:", len(train_paths))
print("Val:", len(val_paths))
print("Test:", len(test_paths))


Loaded splits:
Train: 1296
Val: 144
Test: 360


CELL 10 — Feature extraction (frozen encoder) for downstream tasks

In [15]:
# =========================
# Feature extraction using frozen encoder saved above.
# Produces numpy files: train_feats.npy, val_feats.npy, test_feats.npy and corresponding label npys.
# =========================

# load encoder
enc_ckpt = os.path.join(OUT_DIR, "simsiam_encoder.pth")
if not os.path.exists(enc_ckpt):
    # if we just trained in memory, use model.encoder; else load from file
    if 'model' in globals() and hasattr(model, "encoder"):
        encoder = model.encoder
    else:
        raise FileNotFoundError("Encoder checkpoint not found and model not in memory.")
else:
    d = torch.load(enc_ckpt, map_location=DEVICE)
    encoder = SimSiam(backbone=BACKBONE).encoder  # dummy to get structure
    encoder.load_state_dict(d["encoder_state_dict"])
encoder = encoder.to(DEVICE)
encoder.eval()

# helper to extract and save
def extract_and_save(paths_list, labels_list, split_name):
    ds = ManifestDataset(paths_list, labels_list, eval_transform)
    loader = DataLoader(ds, batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS)
    feats = []
    files = []
    with torch.no_grad():
        for imgs, lbls, ps in loader:
            imgs = imgs.to(DEVICE)
            h = encoder(imgs).view(imgs.size(0), -1).cpu().numpy()
            feats.append(h)
            files.extend(ps)
    feats = np.vstack(feats)
    np.save(os.path.join(OUT_DIR, f"{split_name}_feats.npy"), feats)
    np.save(os.path.join(OUT_DIR, f"{split_name}_labels.npy"), np.array(labels_list))
    print(f"Saved {split_name} features: {feats.shape} to {OUT_DIR}/{split_name}_feats.npy")
    return feats

train_feats = extract_and_save(train_paths, train_labels, "train")
val_feats = extract_and_save(val_paths, val_labels, "val")
test_feats = extract_and_save(test_paths, test_labels, "test")


Saved train features: (1296, 512) to /kaggle/working/simsiam_task4/train_feats.npy
Saved val features: (144, 512) to /kaggle/working/simsiam_task4/val_feats.npy
Saved test features: (360, 512) to /kaggle/working/simsiam_task4/test_feats.npy


CELL 11 — Linear probe + shallow heads evaluations (train classifiers on frozen features)

In [16]:
# =========================
# Train linear probe and several shallow heads on frozen features.
# Outputs models to OUT_DIR and prints metrics.
# =========================

# load feats if needed
train_feats = np.load(os.path.join(OUT_DIR, "train_feats.npy"))
train_lbls = np.load(os.path.join(OUT_DIR, "train_labels.npy"))
val_feats = np.load(os.path.join(OUT_DIR, "val_feats.npy"))
val_lbls = np.load(os.path.join(OUT_DIR, "val_labels.npy"))
test_feats = np.load(os.path.join(OUT_DIR, "test_feats.npy"))
test_lbls = np.load(os.path.join(OUT_DIR, "test_labels.npy"))

# list of models to train
classifiers = {
    "LogisticRegression": LogisticRegression(max_iter=2000),
    "SVM_RBF": SVC(kernel="rbf", probability=True),
    "RandomForest": RandomForestClassifier(n_estimators=100),
    "DecisionTree": DecisionTreeClassifier(),
    "MLP": MLPClassifier(hidden_layer_sizes=(512,), max_iter=500)
}

results = {}
for name, clf in classifiers.items():
    print("Training:", name)
    clf.fit(train_feats, train_lbls)
    val_pred = clf.predict(val_feats)
    val_acc = accuracy_score(val_lbls, val_pred)
    test_pred = clf.predict(test_feats)
    test_acc = accuracy_score(test_lbls, test_pred)
    print(f" {name} val_acc: {val_acc:.4f} test_acc: {test_acc:.4f}")
    results[name] = {"val_acc": float(val_acc), "test_acc": float(test_acc)}
    joblib.dump(clf, os.path.join(OUT_DIR, f"{name}.joblib"))

# Save results summary
with open(os.path.join(OUT_DIR, "probe_results.json"), "w") as f:
    json.dump(results, f, indent=2)
print("Probe results saved to", os.path.join(OUT_DIR, "probe_results.json"))


Training: LogisticRegression
 LogisticRegression val_acc: 0.8333 test_acc: 0.8167
Training: SVM_RBF
 SVM_RBF val_acc: 0.7917 test_acc: 0.7806
Training: RandomForest
 RandomForest val_acc: 0.8403 test_acc: 0.7972
Training: DecisionTree
 DecisionTree val_acc: 0.7431 test_acc: 0.6778
Training: MLP
 MLP val_acc: 0.8750 test_acc: 0.8528
Probe results saved to /kaggle/working/simsiam_task4/probe_results.json


In [17]:
# =========================
# Full fine-tune: build a classifier that uses the encoder and a linear head; unfreeze encoder
# Trains for FINETUNE_EPOCHS and saves best checkpoint by val accuracy.
# =========================

# =========================
# Cell 12 (REPLACED) — Full fine-tune with safe DataLoader, resume support, and interrupt handling
# Replace the original Cell 12 with this cell. It:
# - uses num_workers=0 to avoid worker hang after restoring
# - uses pin_memory when CUDA available
# - supports resuming from finetune_resume.pth or finetune_best.pth
# - periodically saves a resume checkpoint and best checkpoint
# - saves when KeyboardInterrupt is caught so you can continue later
# =========================

import os
import torch
import numpy as np
from torch import nn, optim
from torch.utils.data import DataLoader
from sklearn.metrics import accuracy_score
from tqdm import tqdm

# --- Configurable small settings for stability ---
ft_num_workers = 0                      # Recommended 0 for Kaggle / resume stability
pin_memory = True if torch.cuda.is_available() else False
resume_ckpt_path = os.path.join(OUT_DIR, "finetune_resume.pth")
best_ckpt_path = os.path.join(OUT_DIR, "finetune_best.pth")
save_every_epoch = True                 # set False to save only best

# --- Recreate / ensure encoder is loaded ---
# If encoder variable not present but encoder checkpoint file exists, load it.
if 'encoder' not in globals() or encoder is None:
    enc_candidates = [
        os.path.join(OUT_DIR, "simsiam_encoder_memory.pth"),
        os.path.join(OUT_DIR, "simsiam_encoder.pth")
    ]
    found = None
    for p in enc_candidates:
        if os.path.exists(p):
            found = p
            break
    if found is None:
        raise FileNotFoundError("Encoder checkpoint not found in OUT_DIR. Run pretraining or restore archive.")
    enc_ck = torch.load(found, map_location="cpu", weights_only=False)
    # build a SimSiam model to host the encoder (SimSiam class must be defined in the notebook)
    BACKBONE = globals().get("BACKBONE", "resnet18")
    tmp_model = SimSiam(backbone=BACKBONE)
    tmp_model.encoder.load_state_dict(enc_ck["encoder_state_dict"])
    encoder = tmp_model.encoder
    del tmp_model

# Put encoder on device
encoder = encoder.to(DEVICE)

# --- Build fine-tune model wrapper ---
class FineTuneClassifier(nn.Module):
    def __init__(self, encoder, feat_dim, num_classes):
        super().__init__()
        self.encoder = encoder
        self.head = nn.Linear(feat_dim, num_classes)

    def forward(self, x):
        h = self.encoder(x).view(x.size(0), -1)
        return self.head(h)

# Determine feature dim from encoder output
# Forward a dummy tensor to compute feat_dim robustly (safe)
encoder.eval()
with torch.no_grad():
    dummy = torch.zeros(1, 3, RESOLUTION, RESOLUTION).to(DEVICE)
    try:
        out = encoder(dummy).view(1, -1)
        feat_dim = out.shape[1]
    except Exception:
        # fallback to known dims
        feat_dim = 512 if BACKBONE == "resnet18" else 2048

num_classes = len(classes)
ft_model = FineTuneClassifier(encoder, feat_dim, num_classes).to(DEVICE)

# --- Fine-tune dataset & loaders (use mild augment on train) ---
ft_train_transform = transforms.Compose([
    transforms.Resize(int(RESOLUTION*1.1)),
    transforms.CenterCrop(RESOLUTION),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485,0.456,0.406], std=[0.229,0.224,0.225])
])
ft_train_ds = ManifestDataset(train_paths, train_labels, ft_train_transform)
ft_val_ds = ManifestDataset(val_paths, val_labels, eval_transform)

ft_train_loader = DataLoader(ft_train_ds, batch_size=BATCH_SIZE, shuffle=True,
                             num_workers=ft_num_workers, pin_memory=pin_memory)
ft_val_loader   = DataLoader(ft_val_ds,  batch_size=BATCH_SIZE, shuffle=False,
                             num_workers=ft_num_workers, pin_memory=pin_memory)

# --- Optimizer / scheduler / loss ---
ft_optimizer = optim.SGD(ft_model.parameters(), lr=0.01, momentum=0.9, weight_decay=1e-4)
ft_scheduler = optim.lr_scheduler.StepLR(ft_optimizer, step_size=15, gamma=0.1)
criterion = nn.CrossEntropyLoss()

# --- Resume if possible ---
start_epoch = 0
best_val_acc = 0.0
if os.path.exists(resume_ckpt_path):
    try:
        ck = torch.load(resume_ckpt_path, map_location=DEVICE, weights_only=False)
        ft_model.load_state_dict(ck["model_state"])
        ft_optimizer.load_state_dict(ck["optimizer_state"])
        if "scheduler_state" in ck:
            try:
                ft_scheduler.load_state_dict(ck["scheduler_state"])
            except Exception:
                pass
        start_epoch = ck.get("epoch", 0) + 1
        best_val_acc = ck.get("val_acc", 0.0)
        print(f"Resumed fine-tune from resume checkpoint at epoch {start_epoch} (best val {best_val_acc:.4f})")
    except Exception as e:
        print("Could not resume from resume checkpoint:", e)

# If no resume but best exists, you may optionally load best as init
elif os.path.exists(best_ckpt_path):
    try:
        ck = torch.load(best_ckpt_path, map_location=DEVICE, weights_only=False)
        ft_model.load_state_dict(ck["model_state"])
        best_val_acc = ck.get("val_acc", 0.0)
        print("Initialized fine-tune from best checkpoint (val_acc={:.4f})".format(best_val_acc))
    except Exception as e:
        print("Could not load best checkpoint as init:", e)

# --- Main fine-tune loop with safe saving & interrupt handling ---
try:
    for epoch in range(start_epoch, FINETUNE_EPOCHS):
        ft_model.train()
        losses = []
        loop = tqdm(ft_train_loader, desc=f"Fine-tune Epoch {epoch+1}/{FINETUNE_EPOCHS}")
        for imgs, labels_batch, _ in loop:
            imgs = imgs.to(DEVICE, non_blocking=pin_memory)
            labels_batch = labels_batch.to(DEVICE, non_blocking=pin_memory)
            logits = ft_model(imgs)
            loss = criterion(logits, labels_batch)
            ft_optimizer.zero_grad()
            loss.backward()
            ft_optimizer.step()
            losses.append(loss.item())
            loop.set_postfix(train_loss=f"{np.mean(losses):.4f}")

        ft_scheduler.step()

        # Validation
        ft_model.eval()
        all_preds = []
        all_labels = []
        with torch.no_grad():
            for imgs, labels_batch, _ in ft_val_loader:
                imgs = imgs.to(DEVICE, non_blocking=pin_memory)
                logits = ft_model(imgs)
                preds = logits.argmax(dim=1).cpu().numpy()
                all_preds.extend(preds)
                all_labels.extend(labels_batch.numpy())
        val_acc = accuracy_score(all_labels, all_preds)
        print(f"Fine-tune Epoch {epoch+1}/{FINETUNE_EPOCHS} - train_loss: {np.mean(losses):.4f} val_acc: {val_acc:.4f}")

        # Save best
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            torch.save({
                "epoch": epoch,
                "model_state": ft_model.state_dict(),
                "optimizer_state": ft_optimizer.state_dict(),
                "val_acc": val_acc
            }, best_ckpt_path)
            print("Saved best fine-tune checkpoint:", best_ckpt_path)

        # Periodic resume checkpoint
        if save_every_epoch:
            torch.save({
                "epoch": epoch,
                "model_state": ft_model.state_dict(),
                "optimizer_state": ft_optimizer.state_dict(),
                "scheduler_state": ft_scheduler.state_dict(),
                "val_acc": val_acc
            }, resume_ckpt_path)

except KeyboardInterrupt:
    # Save resume checkpoint on interrupt so you can continue later
    print("KeyboardInterrupt caught — saving resume checkpoint...")
    torch.save({
        "epoch": epoch,
        "model_state": ft_model.state_dict(),
        "optimizer_state": ft_optimizer.state_dict(),
        "scheduler_state": ft_scheduler.state_dict(),
        "val_acc": best_val_acc
    }, resume_ckpt_path)
    print("Saved resume checkpoint to", resume_ckpt_path)
    raise

print("Fine-tune complete. Best val acc:", best_val_acc)
# End of replaced Cell 12



Fine-tune Epoch 1/50: 100%|██████████| 21/21 [10:33<00:00, 30.17s/it, train_loss=0.8706]


Fine-tune Epoch 1/50 - train_loss: 0.8706 val_acc: 0.3889
Saved best fine-tune checkpoint: /kaggle/working/simsiam_task4/finetune_best.pth


Fine-tune Epoch 2/50: 100%|██████████| 21/21 [10:34<00:00, 30.23s/it, train_loss=0.6193]


Fine-tune Epoch 2/50 - train_loss: 0.6193 val_acc: 0.5694
Saved best fine-tune checkpoint: /kaggle/working/simsiam_task4/finetune_best.pth


Fine-tune Epoch 3/50: 100%|██████████| 21/21 [10:30<00:00, 30.02s/it, train_loss=0.7477]


Fine-tune Epoch 3/50 - train_loss: 0.7477 val_acc: 0.7431
Saved best fine-tune checkpoint: /kaggle/working/simsiam_task4/finetune_best.pth


Fine-tune Epoch 4/50: 100%|██████████| 21/21 [10:27<00:00, 29.87s/it, train_loss=0.4576]


Fine-tune Epoch 4/50 - train_loss: 0.4576 val_acc: 0.7778
Saved best fine-tune checkpoint: /kaggle/working/simsiam_task4/finetune_best.pth


Fine-tune Epoch 5/50: 100%|██████████| 21/21 [10:29<00:00, 29.98s/it, train_loss=0.4598]


Fine-tune Epoch 5/50 - train_loss: 0.4598 val_acc: 0.8681
Saved best fine-tune checkpoint: /kaggle/working/simsiam_task4/finetune_best.pth


Fine-tune Epoch 6/50: 100%|██████████| 21/21 [10:29<00:00, 29.97s/it, train_loss=0.3948]


Fine-tune Epoch 6/50 - train_loss: 0.3948 val_acc: 0.8611


Fine-tune Epoch 7/50: 100%|██████████| 21/21 [10:37<00:00, 30.33s/it, train_loss=0.3207]


Fine-tune Epoch 7/50 - train_loss: 0.3207 val_acc: 0.6250


Fine-tune Epoch 8/50: 100%|██████████| 21/21 [10:31<00:00, 30.07s/it, train_loss=0.3516]


Fine-tune Epoch 8/50 - train_loss: 0.3516 val_acc: 0.8958
Saved best fine-tune checkpoint: /kaggle/working/simsiam_task4/finetune_best.pth


Fine-tune Epoch 9/50: 100%|██████████| 21/21 [10:33<00:00, 30.19s/it, train_loss=0.3168]


Fine-tune Epoch 9/50 - train_loss: 0.3168 val_acc: 0.8681


Fine-tune Epoch 10/50: 100%|██████████| 21/21 [10:34<00:00, 30.19s/it, train_loss=0.2560]


Fine-tune Epoch 10/50 - train_loss: 0.2560 val_acc: 0.8611


Fine-tune Epoch 11/50: 100%|██████████| 21/21 [10:39<00:00, 30.45s/it, train_loss=0.2551]


Fine-tune Epoch 11/50 - train_loss: 0.2551 val_acc: 0.8889


Fine-tune Epoch 12/50: 100%|██████████| 21/21 [10:44<00:00, 30.70s/it, train_loss=0.2649]


Fine-tune Epoch 12/50 - train_loss: 0.2649 val_acc: 0.8611


Fine-tune Epoch 13/50: 100%|██████████| 21/21 [10:29<00:00, 29.99s/it, train_loss=0.2212]


Fine-tune Epoch 13/50 - train_loss: 0.2212 val_acc: 0.8819


Fine-tune Epoch 14/50: 100%|██████████| 21/21 [10:25<00:00, 29.77s/it, train_loss=0.2540]


Fine-tune Epoch 14/50 - train_loss: 0.2540 val_acc: 0.8194


Fine-tune Epoch 15/50: 100%|██████████| 21/21 [10:26<00:00, 29.85s/it, train_loss=0.2133]


Fine-tune Epoch 15/50 - train_loss: 0.2133 val_acc: 0.8889


Fine-tune Epoch 16/50: 100%|██████████| 21/21 [10:37<00:00, 30.38s/it, train_loss=0.1158]


Fine-tune Epoch 16/50 - train_loss: 0.1158 val_acc: 0.9236
Saved best fine-tune checkpoint: /kaggle/working/simsiam_task4/finetune_best.pth


Fine-tune Epoch 17/50: 100%|██████████| 21/21 [10:22<00:00, 29.66s/it, train_loss=0.0835]


Fine-tune Epoch 17/50 - train_loss: 0.0835 val_acc: 0.9306
Saved best fine-tune checkpoint: /kaggle/working/simsiam_task4/finetune_best.pth


Fine-tune Epoch 18/50: 100%|██████████| 21/21 [10:35<00:00, 30.26s/it, train_loss=0.0748]


Fine-tune Epoch 18/50 - train_loss: 0.0748 val_acc: 0.9375
Saved best fine-tune checkpoint: /kaggle/working/simsiam_task4/finetune_best.pth


Fine-tune Epoch 19/50: 100%|██████████| 21/21 [10:25<00:00, 29.78s/it, train_loss=0.0656]


Fine-tune Epoch 19/50 - train_loss: 0.0656 val_acc: 0.9306


Fine-tune Epoch 20/50: 100%|██████████| 21/21 [10:23<00:00, 29.68s/it, train_loss=0.0606]


Fine-tune Epoch 20/50 - train_loss: 0.0606 val_acc: 0.9306


Fine-tune Epoch 21/50: 100%|██████████| 21/21 [10:29<00:00, 29.99s/it, train_loss=0.0553]


Fine-tune Epoch 21/50 - train_loss: 0.0553 val_acc: 0.9306


Fine-tune Epoch 22/50: 100%|██████████| 21/21 [10:24<00:00, 29.73s/it, train_loss=0.0635]


Fine-tune Epoch 22/50 - train_loss: 0.0635 val_acc: 0.9306


Fine-tune Epoch 23/50: 100%|██████████| 21/21 [10:28<00:00, 29.93s/it, train_loss=0.0465]


Fine-tune Epoch 23/50 - train_loss: 0.0465 val_acc: 0.9375


Fine-tune Epoch 24/50: 100%|██████████| 21/21 [10:34<00:00, 30.22s/it, train_loss=0.0815]


Fine-tune Epoch 24/50 - train_loss: 0.0815 val_acc: 0.9236


Fine-tune Epoch 25/50: 100%|██████████| 21/21 [10:33<00:00, 30.15s/it, train_loss=0.0767]


Fine-tune Epoch 25/50 - train_loss: 0.0767 val_acc: 0.9375


Fine-tune Epoch 26/50: 100%|██████████| 21/21 [10:25<00:00, 29.81s/it, train_loss=0.0469]


Fine-tune Epoch 26/50 - train_loss: 0.0469 val_acc: 0.9444
Saved best fine-tune checkpoint: /kaggle/working/simsiam_task4/finetune_best.pth


Fine-tune Epoch 27/50: 100%|██████████| 21/21 [10:33<00:00, 30.17s/it, train_loss=0.0623]


Fine-tune Epoch 27/50 - train_loss: 0.0623 val_acc: 0.9306


Fine-tune Epoch 28/50: 100%|██████████| 21/21 [10:34<00:00, 30.24s/it, train_loss=0.0466]


Fine-tune Epoch 28/50 - train_loss: 0.0466 val_acc: 0.9306


Fine-tune Epoch 29/50: 100%|██████████| 21/21 [10:31<00:00, 30.08s/it, train_loss=0.0395]


Fine-tune Epoch 29/50 - train_loss: 0.0395 val_acc: 0.9306


Fine-tune Epoch 30/50: 100%|██████████| 21/21 [10:35<00:00, 30.24s/it, train_loss=0.0448]


Fine-tune Epoch 30/50 - train_loss: 0.0448 val_acc: 0.9444


Fine-tune Epoch 31/50: 100%|██████████| 21/21 [10:38<00:00, 30.40s/it, train_loss=0.0296]


Fine-tune Epoch 31/50 - train_loss: 0.0296 val_acc: 0.9444


Fine-tune Epoch 32/50: 100%|██████████| 21/21 [10:43<00:00, 30.64s/it, train_loss=0.0322]


Fine-tune Epoch 32/50 - train_loss: 0.0322 val_acc: 0.9375


Fine-tune Epoch 33/50: 100%|██████████| 21/21 [10:29<00:00, 29.96s/it, train_loss=0.0306]


Fine-tune Epoch 33/50 - train_loss: 0.0306 val_acc: 0.9375


Fine-tune Epoch 34/50: 100%|██████████| 21/21 [10:34<00:00, 30.20s/it, train_loss=0.0329]


Fine-tune Epoch 34/50 - train_loss: 0.0329 val_acc: 0.9375


Fine-tune Epoch 35/50: 100%|██████████| 21/21 [10:33<00:00, 30.17s/it, train_loss=0.0472]


Fine-tune Epoch 35/50 - train_loss: 0.0472 val_acc: 0.9444


Fine-tune Epoch 36/50: 100%|██████████| 21/21 [10:37<00:00, 30.38s/it, train_loss=0.0243]


Fine-tune Epoch 36/50 - train_loss: 0.0243 val_acc: 0.9306


Fine-tune Epoch 37/50: 100%|██████████| 21/21 [10:33<00:00, 30.15s/it, train_loss=0.0286]


Fine-tune Epoch 37/50 - train_loss: 0.0286 val_acc: 0.9375


Fine-tune Epoch 38/50: 100%|██████████| 21/21 [10:38<00:00, 30.40s/it, train_loss=0.0308]


Fine-tune Epoch 38/50 - train_loss: 0.0308 val_acc: 0.9306


Fine-tune Epoch 39/50: 100%|██████████| 21/21 [10:33<00:00, 30.15s/it, train_loss=0.0283]


Fine-tune Epoch 39/50 - train_loss: 0.0283 val_acc: 0.9444


Fine-tune Epoch 40/50: 100%|██████████| 21/21 [10:43<00:00, 30.66s/it, train_loss=0.0349]


Fine-tune Epoch 40/50 - train_loss: 0.0349 val_acc: 0.9375


Fine-tune Epoch 41/50: 100%|██████████| 21/21 [10:31<00:00, 30.06s/it, train_loss=0.0262]


Fine-tune Epoch 41/50 - train_loss: 0.0262 val_acc: 0.9444


Fine-tune Epoch 42/50: 100%|██████████| 21/21 [10:40<00:00, 30.52s/it, train_loss=0.0232]


Fine-tune Epoch 42/50 - train_loss: 0.0232 val_acc: 0.9444


Fine-tune Epoch 43/50: 100%|██████████| 21/21 [10:38<00:00, 30.38s/it, train_loss=0.0282]


Fine-tune Epoch 43/50 - train_loss: 0.0282 val_acc: 0.9375


Fine-tune Epoch 44/50: 100%|██████████| 21/21 [10:30<00:00, 30.01s/it, train_loss=0.0302]


Fine-tune Epoch 44/50 - train_loss: 0.0302 val_acc: 0.9444


Fine-tune Epoch 45/50: 100%|██████████| 21/21 [10:23<00:00, 29.69s/it, train_loss=0.0270]


Fine-tune Epoch 45/50 - train_loss: 0.0270 val_acc: 0.9375


Fine-tune Epoch 46/50: 100%|██████████| 21/21 [10:35<00:00, 30.27s/it, train_loss=0.0399]


Fine-tune Epoch 46/50 - train_loss: 0.0399 val_acc: 0.9306


Fine-tune Epoch 47/50: 100%|██████████| 21/21 [10:31<00:00, 30.05s/it, train_loss=0.0271]


Fine-tune Epoch 47/50 - train_loss: 0.0271 val_acc: 0.9444


Fine-tune Epoch 48/50: 100%|██████████| 21/21 [10:33<00:00, 30.19s/it, train_loss=0.0240]


Fine-tune Epoch 48/50 - train_loss: 0.0240 val_acc: 0.9375


Fine-tune Epoch 49/50: 100%|██████████| 21/21 [10:40<00:00, 30.49s/it, train_loss=0.0271]


Fine-tune Epoch 49/50 - train_loss: 0.0271 val_acc: 0.9306


Fine-tune Epoch 50/50: 100%|██████████| 21/21 [10:31<00:00, 30.09s/it, train_loss=0.0240]


Fine-tune Epoch 50/50 - train_loss: 0.0240 val_acc: 0.9375
Fine-tune complete. Best val acc: 0.9444444444444444


CELL 12 — Full fine-tune: attach classification head and fine-tune entire encoder

In [14]:
# =========================
# Full fine-tune: build a classifier that uses the encoder and a linear head; unfreeze encoder
# Trains for FINETUNE_EPOCHS and saves best checkpoint by val accuracy.
# =========================

# =========================
# Cell 12 (REPLACED) — Full fine-tune with safe DataLoader, resume support, and interrupt handling
# Replace the original Cell 12 with this cell. It:
# - uses num_workers=0 to avoid worker hang after restoring
# - uses pin_memory when CUDA available
# - supports resuming from finetune_resume.pth or finetune_best.pth
# - periodically saves a resume checkpoint and best checkpoint
# - saves when KeyboardInterrupt is caught so you can continue later
# =========================

import os
import torch
import numpy as np
from torch import nn, optim
from torch.utils.data import DataLoader
from sklearn.metrics import accuracy_score
from tqdm import tqdm

# --- Configurable small settings for stability ---
ft_num_workers = 0                      # Recommended 0 for Kaggle / resume stability
pin_memory = True if torch.cuda.is_available() else False
resume_ckpt_path = os.path.join(OUT_DIR, "finetune_resume.pth")
best_ckpt_path = os.path.join(OUT_DIR, "finetune_best.pth")
save_every_epoch = True                 # set False to save only best

# --- Recreate / ensure encoder is loaded ---
# If encoder variable not present but encoder checkpoint file exists, load it.
if 'encoder' not in globals() or encoder is None:
    enc_candidates = [
        os.path.join(OUT_DIR, "simsiam_encoder_memory.pth"),
        os.path.join(OUT_DIR, "simsiam_encoder.pth")
    ]
    found = None
    for p in enc_candidates:
        if os.path.exists(p):
            found = p
            break
    if found is None:
        raise FileNotFoundError("Encoder checkpoint not found in OUT_DIR. Run pretraining or restore archive.")
    enc_ck = torch.load(found, map_location="cpu", weights_only=False)
    # build a SimSiam model to host the encoder (SimSiam class must be defined in the notebook)
    BACKBONE = globals().get("BACKBONE", "resnet18")
    tmp_model = SimSiam(backbone=BACKBONE)
    tmp_model.encoder.load_state_dict(enc_ck["encoder_state_dict"])
    encoder = tmp_model.encoder
    del tmp_model

# Put encoder on device
encoder = encoder.to(DEVICE)

# --- Build fine-tune model wrapper ---
class FineTuneClassifier(nn.Module):
    def __init__(self, encoder, feat_dim, num_classes):
        super().__init__()
        self.encoder = encoder
        self.head = nn.Linear(feat_dim, num_classes)

    def forward(self, x):
        h = self.encoder(x).view(x.size(0), -1)
        return self.head(h)

# Determine feature dim from encoder output
# Forward a dummy tensor to compute feat_dim robustly (safe)
encoder.eval()
with torch.no_grad():
    dummy = torch.zeros(1, 3, RESOLUTION, RESOLUTION).to(DEVICE)
    try:
        out = encoder(dummy).view(1, -1)
        feat_dim = out.shape[1]
    except Exception:
        # fallback to known dims
        feat_dim = 512 if BACKBONE == "resnet18" else 2048

num_classes = len(classes)
ft_model = FineTuneClassifier(encoder, feat_dim, num_classes).to(DEVICE)

# --- Fine-tune dataset & loaders (use mild augment on train) ---
ft_train_transform = transforms.Compose([
    transforms.Resize(int(RESOLUTION*1.1)),
    transforms.CenterCrop(RESOLUTION),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485,0.456,0.406], std=[0.229,0.224,0.225])
])
ft_train_ds = ManifestDataset(train_paths, train_labels, ft_train_transform)
ft_val_ds = ManifestDataset(val_paths, val_labels, eval_transform)

ft_train_loader = DataLoader(ft_train_ds, batch_size=BATCH_SIZE, shuffle=True,
                             num_workers=ft_num_workers, pin_memory=pin_memory)
ft_val_loader   = DataLoader(ft_val_ds,  batch_size=BATCH_SIZE, shuffle=False,
                             num_workers=ft_num_workers, pin_memory=pin_memory)

# --- Optimizer / scheduler / loss ---
ft_optimizer = optim.SGD(ft_model.parameters(), lr=0.01, momentum=0.9, weight_decay=1e-4)
ft_scheduler = optim.lr_scheduler.StepLR(ft_optimizer, step_size=15, gamma=0.1)
criterion = nn.CrossEntropyLoss()

# --- Resume if possible ---
start_epoch = 0
best_val_acc = 0.0
if os.path.exists(resume_ckpt_path):
    try:
        ck = torch.load(resume_ckpt_path, map_location=DEVICE, weights_only=False)
        ft_model.load_state_dict(ck["model_state"])
        ft_optimizer.load_state_dict(ck["optimizer_state"])
        if "scheduler_state" in ck:
            try:
                ft_scheduler.load_state_dict(ck["scheduler_state"])
            except Exception:
                pass
        start_epoch = ck.get("epoch", 0) + 1
        best_val_acc = ck.get("val_acc", 0.0)
        print(f"Resumed fine-tune from resume checkpoint at epoch {start_epoch} (best val {best_val_acc:.4f})")
    except Exception as e:
        print("Could not resume from resume checkpoint:", e)

# If no resume but best exists, you may optionally load best as init
elif os.path.exists(best_ckpt_path):
    try:
        ck = torch.load(best_ckpt_path, map_location=DEVICE, weights_only=False)
        ft_model.load_state_dict(ck["model_state"])
        best_val_acc = ck.get("val_acc", 0.0)
        print("Initialized fine-tune from best checkpoint (val_acc={:.4f})".format(best_val_acc))
    except Exception as e:
        print("Could not load best checkpoint as init:", e)

# --- Main fine-tune loop with safe saving & interrupt handling ---
try:
    for epoch in range(start_epoch, FINETUNE_EPOCHS):
        ft_model.train()
        losses = []
        loop = tqdm(ft_train_loader, desc=f"Fine-tune Epoch {epoch+1}/{FINETUNE_EPOCHS}")
        for imgs, labels_batch, _ in loop:
            imgs = imgs.to(DEVICE, non_blocking=pin_memory)
            labels_batch = labels_batch.to(DEVICE, non_blocking=pin_memory)
            logits = ft_model(imgs)
            loss = criterion(logits, labels_batch)
            ft_optimizer.zero_grad()
            loss.backward()
            ft_optimizer.step()
            losses.append(loss.item())
            loop.set_postfix(train_loss=f"{np.mean(losses):.4f}")

        ft_scheduler.step()

        # Validation
        ft_model.eval()
        all_preds = []
        all_labels = []
        with torch.no_grad():
            for imgs, labels_batch, _ in ft_val_loader:
                imgs = imgs.to(DEVICE, non_blocking=pin_memory)
                logits = ft_model(imgs)
                preds = logits.argmax(dim=1).cpu().numpy()
                all_preds.extend(preds)
                all_labels.extend(labels_batch.numpy())
        val_acc = accuracy_score(all_labels, all_preds)
        print(f"Fine-tune Epoch {epoch+1}/{FINETUNE_EPOCHS} - train_loss: {np.mean(losses):.4f} val_acc: {val_acc:.4f}")

        # Save best
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            torch.save({
                "epoch": epoch,
                "model_state": ft_model.state_dict(),
                "optimizer_state": ft_optimizer.state_dict(),
                "val_acc": val_acc
            }, best_ckpt_path)
            print("Saved best fine-tune checkpoint:", best_ckpt_path)

        # Periodic resume checkpoint
        if save_every_epoch:
            torch.save({
                "epoch": epoch,
                "model_state": ft_model.state_dict(),
                "optimizer_state": ft_optimizer.state_dict(),
                "scheduler_state": ft_scheduler.state_dict(),
                "val_acc": val_acc
            }, resume_ckpt_path)

except KeyboardInterrupt:
    # Save resume checkpoint on interrupt so you can continue later
    print("KeyboardInterrupt caught — saving resume checkpoint...")
    torch.save({
        "epoch": epoch,
        "model_state": ft_model.state_dict(),
        "optimizer_state": ft_optimizer.state_dict(),
        "scheduler_state": ft_scheduler.state_dict(),
        "val_acc": best_val_acc
    }, resume_ckpt_path)
    print("Saved resume checkpoint to", resume_ckpt_path)
    raise

print("Fine-tune complete. Best val acc:", best_val_acc)
# End of replaced Cell 12



Fine-tune Epoch 1/50: 100%|██████████| 21/21 [10:45<00:00, 30.75s/it, train_loss=0.8789]


Fine-tune Epoch 1/50 - train_loss: 0.8789 val_acc: 0.6875
Saved best fine-tune checkpoint: /kaggle/working/simsiam_task4/finetune_best.pth


Fine-tune Epoch 2/50: 100%|██████████| 21/21 [10:31<00:00, 30.05s/it, train_loss=0.5919]


Fine-tune Epoch 2/50 - train_loss: 0.5919 val_acc: 0.7222
Saved best fine-tune checkpoint: /kaggle/working/simsiam_task4/finetune_best.pth


Fine-tune Epoch 3/50: 100%|██████████| 21/21 [10:14<00:00, 29.26s/it, train_loss=0.7003]


Fine-tune Epoch 3/50 - train_loss: 0.7003 val_acc: 0.6181


Fine-tune Epoch 4/50: 100%|██████████| 21/21 [10:24<00:00, 29.72s/it, train_loss=0.5374]


Fine-tune Epoch 4/50 - train_loss: 0.5374 val_acc: 0.7222


Fine-tune Epoch 5/50: 100%|██████████| 21/21 [10:24<00:00, 29.72s/it, train_loss=0.5268]


Fine-tune Epoch 5/50 - train_loss: 0.5268 val_acc: 0.5972


Fine-tune Epoch 6/50: 100%|██████████| 21/21 [10:42<00:00, 30.62s/it, train_loss=0.4216]


Fine-tune Epoch 6/50 - train_loss: 0.4216 val_acc: 0.8542
Saved best fine-tune checkpoint: /kaggle/working/simsiam_task4/finetune_best.pth


Fine-tune Epoch 7/50: 100%|██████████| 21/21 [10:35<00:00, 30.27s/it, train_loss=0.3410]


Fine-tune Epoch 7/50 - train_loss: 0.3410 val_acc: 0.7153


Fine-tune Epoch 8/50: 100%|██████████| 21/21 [10:36<00:00, 30.31s/it, train_loss=0.3464]


Fine-tune Epoch 8/50 - train_loss: 0.3464 val_acc: 0.8750
Saved best fine-tune checkpoint: /kaggle/working/simsiam_task4/finetune_best.pth


Fine-tune Epoch 9/50: 100%|██████████| 21/21 [10:36<00:00, 30.33s/it, train_loss=0.3185]


Fine-tune Epoch 9/50 - train_loss: 0.3185 val_acc: 0.8889
Saved best fine-tune checkpoint: /kaggle/working/simsiam_task4/finetune_best.pth


Fine-tune Epoch 10/50: 100%|██████████| 21/21 [10:39<00:00, 30.45s/it, train_loss=0.2622]


Fine-tune Epoch 10/50 - train_loss: 0.2622 val_acc: 0.7569


Fine-tune Epoch 11/50: 100%|██████████| 21/21 [10:55<00:00, 31.20s/it, train_loss=0.2276]


Fine-tune Epoch 11/50 - train_loss: 0.2276 val_acc: 0.8403


Fine-tune Epoch 12/50: 100%|██████████| 21/21 [10:50<00:00, 30.98s/it, train_loss=0.2587]


Fine-tune Epoch 12/50 - train_loss: 0.2587 val_acc: 0.8958
Saved best fine-tune checkpoint: /kaggle/working/simsiam_task4/finetune_best.pth


Fine-tune Epoch 13/50: 100%|██████████| 21/21 [10:53<00:00, 31.12s/it, train_loss=0.2122]


Fine-tune Epoch 13/50 - train_loss: 0.2122 val_acc: 0.9444
Saved best fine-tune checkpoint: /kaggle/working/simsiam_task4/finetune_best.pth


Fine-tune Epoch 14/50: 100%|██████████| 21/21 [10:56<00:00, 31.27s/it, train_loss=0.2195]


Fine-tune Epoch 14/50 - train_loss: 0.2195 val_acc: 0.8958


Fine-tune Epoch 15/50: 100%|██████████| 21/21 [10:37<00:00, 30.35s/it, train_loss=0.2529]


Fine-tune Epoch 15/50 - train_loss: 0.2529 val_acc: 0.8958


Fine-tune Epoch 16/50: 100%|██████████| 21/21 [10:32<00:00, 30.14s/it, train_loss=0.1418]


Fine-tune Epoch 16/50 - train_loss: 0.1418 val_acc: 0.9375


Fine-tune Epoch 17/50: 100%|██████████| 21/21 [10:21<00:00, 29.59s/it, train_loss=0.1043]


Fine-tune Epoch 17/50 - train_loss: 0.1043 val_acc: 0.9375


Fine-tune Epoch 18/50: 100%|██████████| 21/21 [10:42<00:00, 30.59s/it, train_loss=0.0951]


Fine-tune Epoch 18/50 - train_loss: 0.0951 val_acc: 0.9306


Fine-tune Epoch 19/50: 100%|██████████| 21/21 [10:38<00:00, 30.40s/it, train_loss=0.0772]


Fine-tune Epoch 19/50 - train_loss: 0.0772 val_acc: 0.9306


Fine-tune Epoch 20/50: 100%|██████████| 21/21 [10:37<00:00, 30.38s/it, train_loss=0.0750]


Fine-tune Epoch 20/50 - train_loss: 0.0750 val_acc: 0.9375


Fine-tune Epoch 21/50: 100%|██████████| 21/21 [10:37<00:00, 30.36s/it, train_loss=0.0699]


Fine-tune Epoch 21/50 - train_loss: 0.0699 val_acc: 0.9444


Fine-tune Epoch 22/50: 100%|██████████| 21/21 [10:29<00:00, 29.96s/it, train_loss=0.0748]


Fine-tune Epoch 22/50 - train_loss: 0.0748 val_acc: 0.9375


Fine-tune Epoch 23/50: 100%|██████████| 21/21 [10:29<00:00, 29.96s/it, train_loss=0.0591]


Fine-tune Epoch 23/50 - train_loss: 0.0591 val_acc: 0.9444


Fine-tune Epoch 24/50: 100%|██████████| 21/21 [10:49<00:00, 30.93s/it, train_loss=0.0852]


Fine-tune Epoch 24/50 - train_loss: 0.0852 val_acc: 0.9375


Fine-tune Epoch 25/50: 100%|██████████| 21/21 [10:32<00:00, 30.10s/it, train_loss=0.0999]


Fine-tune Epoch 25/50 - train_loss: 0.0999 val_acc: 0.9306


Fine-tune Epoch 26/50: 100%|██████████| 21/21 [10:40<00:00, 30.49s/it, train_loss=0.0599]


Fine-tune Epoch 26/50 - train_loss: 0.0599 val_acc: 0.9375


Fine-tune Epoch 27/50: 100%|██████████| 21/21 [10:29<00:00, 29.97s/it, train_loss=0.0674]


Fine-tune Epoch 27/50 - train_loss: 0.0674 val_acc: 0.9444


Fine-tune Epoch 28/50: 100%|██████████| 21/21 [10:29<00:00, 29.98s/it, train_loss=0.0546]


Fine-tune Epoch 28/50 - train_loss: 0.0546 val_acc: 0.9514
Saved best fine-tune checkpoint: /kaggle/working/simsiam_task4/finetune_best.pth


Fine-tune Epoch 29/50: 100%|██████████| 21/21 [10:31<00:00, 30.06s/it, train_loss=0.0487]


Fine-tune Epoch 29/50 - train_loss: 0.0487 val_acc: 0.9375


Fine-tune Epoch 30/50: 100%|██████████| 21/21 [10:39<00:00, 30.45s/it, train_loss=0.0500]


Fine-tune Epoch 30/50 - train_loss: 0.0500 val_acc: 0.9444


Fine-tune Epoch 31/50: 100%|██████████| 21/21 [10:39<00:00, 30.43s/it, train_loss=0.0388]


Fine-tune Epoch 31/50 - train_loss: 0.0388 val_acc: 0.9375


Fine-tune Epoch 32/50: 100%|██████████| 21/21 [10:43<00:00, 30.62s/it, train_loss=0.0415]


Fine-tune Epoch 32/50 - train_loss: 0.0415 val_acc: 0.9375


Fine-tune Epoch 33/50: 100%|██████████| 21/21 [10:45<00:00, 30.74s/it, train_loss=0.0404]


Fine-tune Epoch 33/50 - train_loss: 0.0404 val_acc: 0.9444


Fine-tune Epoch 34/50: 100%|██████████| 21/21 [10:38<00:00, 30.39s/it, train_loss=0.0412]


Fine-tune Epoch 34/50 - train_loss: 0.0412 val_acc: 0.9236


Fine-tune Epoch 35/50: 100%|██████████| 21/21 [10:38<00:00, 30.39s/it, train_loss=0.0595]


Fine-tune Epoch 35/50 - train_loss: 0.0595 val_acc: 0.9375


Fine-tune Epoch 36/50: 100%|██████████| 21/21 [10:49<00:00, 30.91s/it, train_loss=0.0339]


Fine-tune Epoch 36/50 - train_loss: 0.0339 val_acc: 0.9375


Fine-tune Epoch 37/50: 100%|██████████| 21/21 [10:48<00:00, 30.90s/it, train_loss=0.0386]


Fine-tune Epoch 37/50 - train_loss: 0.0386 val_acc: 0.9444


Fine-tune Epoch 38/50: 100%|██████████| 21/21 [10:33<00:00, 30.17s/it, train_loss=0.0415]


Fine-tune Epoch 38/50 - train_loss: 0.0415 val_acc: 0.9375


Fine-tune Epoch 39/50: 100%|██████████| 21/21 [10:24<00:00, 29.73s/it, train_loss=0.0358]


Fine-tune Epoch 39/50 - train_loss: 0.0358 val_acc: 0.9375


Fine-tune Epoch 40/50: 100%|██████████| 21/21 [10:46<00:00, 30.78s/it, train_loss=0.0500]


Fine-tune Epoch 40/50 - train_loss: 0.0500 val_acc: 0.9375


Fine-tune Epoch 41/50: 100%|██████████| 21/21 [10:43<00:00, 30.64s/it, train_loss=0.0360]


Fine-tune Epoch 41/50 - train_loss: 0.0360 val_acc: 0.9375


Fine-tune Epoch 42/50: 100%|██████████| 21/21 [10:36<00:00, 30.30s/it, train_loss=0.0315]


Fine-tune Epoch 42/50 - train_loss: 0.0315 val_acc: 0.9375


Fine-tune Epoch 43/50: 100%|██████████| 21/21 [10:34<00:00, 30.20s/it, train_loss=0.0389]


Fine-tune Epoch 43/50 - train_loss: 0.0389 val_acc: 0.9375


Fine-tune Epoch 44/50: 100%|██████████| 21/21 [10:45<00:00, 30.76s/it, train_loss=0.0408]


Fine-tune Epoch 44/50 - train_loss: 0.0408 val_acc: 0.9375


Fine-tune Epoch 45/50: 100%|██████████| 21/21 [10:38<00:00, 30.43s/it, train_loss=0.0409]


Fine-tune Epoch 45/50 - train_loss: 0.0409 val_acc: 0.9375


Fine-tune Epoch 46/50: 100%|██████████| 21/21 [10:36<00:00, 30.32s/it, train_loss=0.0494]


Fine-tune Epoch 46/50 - train_loss: 0.0494 val_acc: 0.9375


Fine-tune Epoch 47/50: 100%|██████████| 21/21 [10:39<00:00, 30.43s/it, train_loss=0.0380]


Fine-tune Epoch 47/50 - train_loss: 0.0380 val_acc: 0.9444


Fine-tune Epoch 48/50: 100%|██████████| 21/21 [10:39<00:00, 30.45s/it, train_loss=0.0347]


Fine-tune Epoch 48/50 - train_loss: 0.0347 val_acc: 0.9514


Fine-tune Epoch 49/50: 100%|██████████| 21/21 [10:33<00:00, 30.18s/it, train_loss=0.0328]


Fine-tune Epoch 49/50 - train_loss: 0.0328 val_acc: 0.9375


Fine-tune Epoch 50/50: 100%|██████████| 21/21 [10:39<00:00, 30.45s/it, train_loss=0.0322]


Fine-tune Epoch 50/50 - train_loss: 0.0322 val_acc: 0.9375
Fine-tune complete. Best val acc: 0.9513888888888888


## Save File

In [2]:
# ===== Save All (run after Cell 12) =====
import os, time, json, sys, shutil, torch, random, numpy as np, subprocess

OUT_DIR = "/kaggle/working/simsiam_task4"   # same working folder used across notebook
ARCHIVE = "/kaggle/working/simsiam_cell12_allsave.zip"

os.makedirs(OUT_DIR, exist_ok=True)

# 1) Save manual checkpoint including RNG and manifests
try:
    ck = {
        "timestamp": time.time(),
        "python_version": sys.version,
        "manifest": globals().get("manifest", None),
        "split_manifest": globals().get("split_manifest", None),
        "seed": globals().get("SEED", None),
    }
    if "model" in globals():
        try: ck["model_state"] = model.state_dict()
        except Exception: pass
    if "optimizer" in globals():
        try: ck["optimizer_state"] = optimizer.state_dict()
        except Exception: pass
    if "scheduler" in globals():
        try: ck["scheduler_state"] = scheduler.state_dict()
        except Exception: pass

    # last epoch info if present
    ck["last_epoch_pretrain"] = globals().get("start_epoch", None)
    ck["last_epoch_finetune"] = globals().get("epoch", None)

    # RNG states
    ck["py_random_state"] = random.getstate()
    ck["np_random_state"] = np.random.get_state()
    ck["torch_cpu_rng"] = torch.get_rng_state()
    if torch.cuda.is_available():
        try: ck["torch_cuda_rng_all"] = torch.cuda.get_rng_state_all()
        except Exception: pass

    torch.save(ck, os.path.join(OUT_DIR, "manual_checkpoint_after_cell12.pth"))
    print("Saved manual checkpoint to:", os.path.join(OUT_DIR, "manual_checkpoint_after_cell12.pth"))
except Exception as e:
    print("Warning saving manual checkpoint:", e)

# 2) Save encoder (prefer current encoder or ft_model.encoder)
try:
    if "ft_model" in globals() and hasattr(ft_model, "encoder"):
        enc = ft_model.encoder
    elif "model" in globals() and hasattr(model, "encoder"):
        enc = model.encoder
    elif "encoder" in globals() and encoder is not None:
        enc = encoder
    else:
        enc = None

    if enc is not None:
        enc_path = os.path.join(OUT_DIR, "simsiam_encoder_after_cell12.pth")
        torch.save({"encoder_state_dict": enc.state_dict(), "feat_dim": getattr(enc, 'feat_dim', None)}, enc_path)
        print("Saved encoder to", enc_path)
    else:
        print("No encoder object in memory to save.")
except Exception as e:
    print("Warning saving encoder:", e)

# 3) Save finetune resume & best checkpoints if present on disk or in memory
try:
    # attempt to save in-memory ft_model as resume
    if "ft_model" in globals():
        resume_path = os.path.join(OUT_DIR, "finetune_resume.pth")
        torch.save({
            "epoch": globals().get("epoch", None),
            "model_state": ft_model.state_dict(),
            "optimizer_state": globals().get("ft_optimizer").state_dict() if "ft_optimizer" in globals() else None,
            "scheduler_state": globals().get("ft_scheduler").state_dict() if "ft_scheduler" in globals() else None,
            "val_acc": globals().get("best_val_acc", None)
        }, resume_path)
        print("Saved finetune resume checkpoint:", resume_path)
    # copy any existing checkpoints from OUT_DIR to ensure they are included
    for fname in ["finetune_best.pth", "simsiam_encoder.pth", "simsiam_encoder_memory.pth", "simsiam_encoder_after_finetune.pth", "simsiam_latest.pth"]:
        src = os.path.join(OUT_DIR, fname)
        if os.path.exists(src):
            print("Found existing:", src)
except Exception as e:
    print("Warning saving finetune checkpoints:", e)

# 4) Save features arrays if present in memory
try:
    import numpy as _np
    for name in ("train_feats","val_feats","test_feats"):
        if name in globals():
            path = os.path.join(OUT_DIR, f"{name}.npy")
            _np.save(path, globals()[name])
            print("Saved feature:", path)
except Exception as e:
    print("Warning saving features:", e)

# 5) Copy manifests to OUT_DIR and save any label arrays
try:
    if "manifest" in globals():
        with open(os.path.join(OUT_DIR, "manifest.json"), "w") as f:
            json.dump(manifest, f)
    if "split_manifest" in globals():
        with open(os.path.join(OUT_DIR, "split_manifest.json"), "w") as f:
            json.dump(split_manifest, f)
    print("Saved manifest(s).")
except Exception as e:
    print("Warning saving manifest:", e)

# 6) Save RNG states separately
try:
    rngp = {
        "py_random_state": random.getstate(),
        "np_random_state": np.random.get_state(),
        "torch_cpu_rng": torch.get_rng_state()
    }
    if torch.cuda.is_available():
        try: rngp["torch_cuda_rng_all"] = torch.cuda.get_rng_state_all()
        except: pass
    torch.save(rngp, os.path.join(OUT_DIR, "rng_states_after_cell12.pth"))
    print("Saved RNG states.")
except Exception as e:
    print("Warning saving RNG states:", e)

# 7) Save requirements
try:
    req_path = os.path.join(OUT_DIR, "requirements.txt")
    with open(req_path,"w") as f:
        subprocess.run([sys.executable, "-m", "pip", "freeze"], stdout=f)
    print("Saved requirements.txt")
except Exception as e:
    print("Warning saving requirements:", e)

# 8) Zip the entire OUT_DIR under the requested name
try:
    if os.path.exists(ARCHIVE):
        os.remove(ARCHIVE)
    shutil.make_archive(base_name=ARCHIVE.replace(".zip",""), format="zip", root_dir=OUT_DIR)
    print("Created archive:", ARCHIVE)
    print("Archive size (bytes):", os.path.getsize(ARCHIVE))
except Exception as e:
    print("Error creating archive:", e)

print("SAVE COMPLETE. Download simsiam_cell12_allsave.zip from the notebook output panel.")


Saved manual checkpoint to: /kaggle/working/simsiam_task4/manual_checkpoint_after_cell12.pth
No encoder object in memory to save.
Saved manifest(s).
Saved RNG states.
Saved requirements.txt
Created archive: /kaggle/working/simsiam_cell12_allsave.zip
Archive size (bytes): 29878
SAVE COMPLETE. Download simsiam_cell12_allsave.zip from the notebook output panel.


## Load Data

In [16]:
# ===== LOAD CELL (fixed path) =====
import os, shutil, json, torch, random, numpy as np

# --- Set paths (match your uploaded folder name) ---
ARCHIVE_DIR = "/kaggle/input/simsiam-cell12-allsave"   # <- your actual Kaggle folder
OUT_DIR = "/kaggle/working/simsiam_task4"
os.makedirs(OUT_DIR, exist_ok=True)

# --- Verify source exists ---
if not os.path.exists(ARCHIVE_DIR):
    raise FileNotFoundError(f"Archive folder not found: {ARCHIVE_DIR}")

# --- Copy files from input folder to working OUT_DIR (no unzip step needed) ---
for fn in os.listdir(ARCHIVE_DIR):
    src = os.path.join(ARCHIVE_DIR, fn)
    dst = os.path.join(OUT_DIR, fn)
    if not os.path.exists(dst):
        try:
            shutil.copy(src, dst)
        except Exception as e:
            print("Copy error for", src, "->", e)

print("Copied files to OUT_DIR:", OUT_DIR)
print("Sample files:", os.listdir(OUT_DIR)[:50])

# --- Load manifest and split_manifest if present ---
manifest = None
split_manifest = None
m_path = os.path.join(OUT_DIR, "manifest.json")
s_path = os.path.join(OUT_DIR, "split_manifest.json")
if os.path.exists(m_path):
    with open(m_path, "r") as f: manifest = json.load(f); print("Loaded manifest.json")
if os.path.exists(s_path):
    with open(s_path, "r") as f: split_manifest = json.load(f); print("Loaded split_manifest.json")

# --- Recreate dataset split variables if available ---
train_paths = train_labels = val_paths = val_labels = test_paths = test_labels = classes = None
if split_manifest:
    train_paths  = split_manifest.get("train", None)
    train_labels = split_manifest.get("train_labels", None)
    val_paths    = split_manifest.get("val", None)
    val_labels   = split_manifest.get("val_labels", None)
    test_paths   = split_manifest.get("test", None)
    test_labels  = split_manifest.get("test_labels", None)
    classes      = split_manifest.get("classes", ["Diseased","Dried","Healthy"])
    print(f"Restored splits: Train={len(train_paths) if train_paths else 0}, Val={len(val_paths) if val_paths else 0}, Test={len(test_paths) if test_paths else 0}")
else:
    print("No split_manifest.json found; you may need to recreate splits or provide split file.")

# --- Restore RNG states if present ---
rng_path = os.path.join(OUT_DIR, "rng_states_after_cell12.pth")
if os.path.exists(rng_path):
    try:
        rng_ck = torch.load(rng_path, map_location="cpu", weights_only=False)
        if "py_random_state" in rng_ck: random.setstate(rng_ck["py_random_state"])
        if "np_random_state" in rng_ck: np.random.set_state(rng_ck["np_random_state"])
        if "torch_cpu_rng" in rng_ck: torch.set_rng_state(rng_ck["torch_cpu_rng"])
        if torch.cuda.is_available() and "torch_cuda_rng_all" in rng_ck:
            try: torch.cuda.set_rng_state_all(rng_ck["torch_cuda_rng_all"])
            except: pass
        print("Restored RNG states from", rng_path)
    except Exception as e:
        print("Could not restore RNG states:", e)
else:
    print("No RNG state file found (ok).")

# --- Try loading encoder checkpoints if present (safe) ---
encoder_ck = None
encoder = None
encoder_candidates = [
    "simsiam_encoder_after_cell12.pth",
    "simsiam_encoder_memory.pth",
    "simsiam_encoder.pth"
]
for fname in encoder_candidates:
    p = os.path.join(OUT_DIR, fname)
    if os.path.exists(p):
        try:
            encoder_ck = torch.load(p, map_location="cpu", weights_only=False)
            print("Loaded encoder checkpoint:", p)
            break
        except Exception as e:
            print("Error loading encoder file", p, "->", e)

# If SimSiam class is defined in this notebook (Cell 7), attach encoder
if encoder_ck is not None:
    if "SimSiam" in globals():
        try:
            BACKBONE = globals().get("BACKBONE", "resnet18")
            tmp = SimSiam(backbone=BACKBONE)
            tmp.encoder.load_state_dict(encoder_ck["encoder_state_dict"])
            encoder = tmp.encoder.to("cuda" if torch.cuda.is_available() else "cpu")
            print("Encoder loaded into SimSiam model (in-memory).")
        except Exception as e:
            print("Could not attach encoder to SimSiam instance:", e)
    else:
        print("SimSiam class not defined in this notebook; encoder_ck available for later use.")

# --- Load optional manual checkpoint with metadata if present ---
manual_ckpt = os.path.join(OUT_DIR, "manual_checkpoint_after_cell12.pth")
checkpoint_dict = None
if os.path.exists(manual_ckpt):
    try:
        checkpoint_dict = torch.load(manual_ckpt, map_location="cpu", weights_only=False)
        print("Loaded manual checkpoint:", manual_ckpt)
    except Exception as e:
        print("Could not load manual checkpoint:", e)

# --- Expose variables to global scope so downstream cells can use them directly ---
_globals = {
    "OUT_DIR": OUT_DIR,
    "manifest": manifest,
    "split_manifest": split_manifest,
    "train_paths": train_paths,
    "train_labels": train_labels,
    "val_paths": val_paths,
    "val_labels": val_labels,
    "test_paths": test_paths,
    "test_labels": test_labels,
    "classes": classes,
    "encoder": encoder,
    "encoder_ck": encoder_ck,
    "checkpoint_dict": checkpoint_dict
}
globals().update({k:v for k,v in _globals.items() if v is not None})

print("\nLOAD COMPLETE. Continue from Cell 10 (feature extraction) or Cell 13 (embeddings).")


Copied files to OUT_DIR: /kaggle/working/simsiam_task4
Sample files: ['requirements.txt', 'LogisticRegression.joblib', 'val_labels.npy', 'val_feats.npy', 'probe_results.json', 'train_labels.npy', 'test_labels.npy', 'DecisionTree.joblib', 'simsiam_encoder.pth', 'manual_checkpoint_after_pretrain.pth', 'simsiam_encoder_memory.pth', 'MLP.joblib', 'SVM_RBF.joblib', 'test_feats.npy', 'simsiam_latest.pth', 'RandomForest.joblib', 'split_manifest.json', 'manual_checkpoint_after_cell12.pth', 'rng_states_after_cell12.pth', 'manifest.json', 'train_feats.npy']
Loaded manifest.json
Loaded split_manifest.json
Restored splits: Train=1296, Val=144, Test=360
Restored RNG states from /kaggle/working/simsiam_task4/rng_states_after_cell12.pth
Loaded encoder checkpoint: /kaggle/working/simsiam_task4/simsiam_encoder_memory.pth
Encoder loaded into SimSiam model (in-memory).
Loaded manual checkpoint: /kaggle/working/simsiam_task4/manual_checkpoint_after_cell12.pth

LOAD COMPLETE. Continue from Cell 10 (feature

CELL 13 — Embedding visualization: UMAP, t-SNE, PCA & silhouette

In [17]:
# ===== CELL 13 (Robust Embedding Viz: PCA, UMAP (safe), t-SNE (safe), silhouette) =====
import os
import numpy as np
import matplotlib.pyplot as plt

from sklearn.decomposition import PCA
from sklearn.metrics import silhouette_score
try:
    from sklearn.manifold import TSNE
except Exception:
    TSNE = None

# Safe UMAP import may already be present; if not, set to None and skip gracefully.
try:
    import umap
except Exception:
    umap = None

# --- Load features and labels ---
OUT_DIR = globals().get("OUT_DIR", "/kaggle/working/simsiam_task4")
if not os.path.exists(OUT_DIR):
    os.makedirs(OUT_DIR, exist_ok=True)

def safe_load(npname):
    p = os.path.join(OUT_DIR, npname)
    if not os.path.exists(p):
        raise FileNotFoundError(f"Missing required file: {p}")
    return np.load(p, allow_pickle=False)

train_feats = safe_load("train_feats.npy")
val_feats   = safe_load("val_feats.npy")
test_feats  = safe_load("test_feats.npy")
train_lbls  = safe_load("train_labels.npy")
val_lbls    = safe_load("val_labels.npy")
test_lbls   = safe_load("test_labels.npy")

feats_all = np.vstack([train_feats, val_feats, test_feats]).astype(np.float32)
lbls_all  = np.concatenate([train_lbls, val_lbls, test_lbls]).astype(int)

classes = globals().get("classes", ["Diseased", "Dried", "Healthy"])
SEED = globals().get("SEED", 42)

# small helper to plot and save
def scatter_and_save(X2, labels, title, fname):
    plt.figure(figsize=(8,6))
    for c_idx, c_name in enumerate(classes):
        idx = labels == c_idx
        if np.any(idx):
            plt.scatter(X2[idx,0], X2[idx,1], label=c_name, s=10, alpha=0.6)
    plt.legend()
    plt.title(title)
    outpath = os.path.join(OUT_DIR, fname)
    plt.savefig(outpath, dpi=150, bbox_inches="tight")
    plt.close()
    print("Saved:", outpath)

# --- PCA (always) ---
pca = PCA(n_components=2, random_state=SEED)
proj_pca = pca.fit_transform(feats_all)
scatter_and_save(proj_pca, lbls_all, "PCA projection of features", "pca_proj.png")

# --- UMAP (try / fallback) ---
if umap is None:
    print("UMAP not installed or import failed; skipping UMAP projection.")
else:
    try:
        # convert to float32 (safer) and run UMAP
        reducer = umap.UMAP(n_components=2, random_state=SEED)
        proj_umap = reducer.fit_transform(feats_all)
        scatter_and_save(proj_umap, lbls_all, "UMAP projection of features", "umap_proj.png")
    except Exception as e:
        # compatibility error fallback
        print("UMAP failed:", repr(e))
        # fallback: save a PCA-based proxy for UMAP so downstream cells have a file
        scatter_and_save(proj_pca, lbls_all, "UMAP fallback (PCA proxy)", "umap_proj_fallback_pca.png")
        print("Saved PCA proxy as UMAP fallback.")

# --- t-SNE (try with subsample fallback to avoid very long runs) ---
if TSNE is None:
    print("TSNE not available; skipping t-SNE projection.")
else:
    try:
        n_samples = feats_all.shape[0]
        tsne_max = 2000  # limit to keep runtime reasonable on Kaggle
        if n_samples > tsne_max:
            # stratified subsample roughly balanced across classes
            idx_keep = []
            rng = np.random.RandomState(SEED)
            per_class = max(50, int(tsne_max / max(1, len(classes))))
            lbls_arr = lbls_all
            for c in range(len(classes)):
                positions = np.where(lbls_arr == c)[0]
                if len(positions) == 0:
                    continue
                k = min(len(positions), per_class)
                sel = rng.choice(positions, size=k, replace=False)
                idx_keep.extend(sel.tolist())
            idx_keep = np.array(sorted(idx_keep))
            feats_tsne = feats_all[idx_keep]
            labels_tsne = lbls_all[idx_keep]
            print(f"t-SNE: dataset too large ({n_samples}), subsampling to {len(idx_keep)} samples for speed.")
        else:
            feats_tsne = feats_all
            labels_tsne = lbls_all

        tsne = TSNE(n_components=2, perplexity=30, random_state=SEED, init='pca')
        proj_tsne = tsne.fit_transform(feats_tsne)
        # if subsampled, include suffix in filename
        fname = "tsne_proj.png" if feats_tsne.shape[0] == n_samples else "tsne_proj_subsample.png"
        scatter_and_save(proj_tsne, labels_tsne, "t-SNE projection of features", fname)
    except Exception as e:
        print("t-SNE failed or was interrupted:", repr(e))
        print("Skipping t-SNE.")

# --- silhouette score (only if valid) ---
try:
    # silhouette requires at least 2 classes and at least 2 samples per label
    unique, counts = np.unique(lbls_all, return_counts=True)
    if len(unique) < 2 or np.min(counts) < 2:
        print("Silhouette not computed: need >=2 classes and >=2 samples per class.")
        sil = None
    else:
        sil = silhouette_score(feats_all, lbls_all)
        print("Silhouette score (features):", sil)
        with open(os.path.join(OUT_DIR, "embedding_stats.txt"), "w") as f:
            f.write(f"Silhouette: {sil}\n")
except Exception as e:
    print("Silhouette computation failed:", repr(e))
    sil = None

print("Embedding viz cell finished. Check files in:", OUT_DIR)


Saved: /kaggle/working/simsiam_task4/pca_proj.png
UMAP failed: TypeError("check_array() got an unexpected keyword argument 'ensure_all_finite'")
Saved: /kaggle/working/simsiam_task4/umap_proj_fallback_pca.png
Saved PCA proxy as UMAP fallback.
Saved: /kaggle/working/simsiam_task4/tsne_proj.png
Silhouette score (features): -0.019335013
Embedding viz cell finished. Check files in: /kaggle/working/simsiam_task4


CELL 14 — Label-efficiency experiments (1%,5%,10%,25%,50% labeled) using linear probe

In [18]:
# =========================
# Label-efficiency curves: train logistic regression with limited labeled fractions of train set
# Saves accuracy for each fraction.
# =========================

fractions = [0.01, 0.05, 0.10, 0.25, 0.50, 1.0]
results = {}
total_train = len(train_feats)
for frac in fractions:
    n = max(1, int(total_train * frac))
    # subsample stratified by labels
    # simple stratified subsampling: sample per class proportionally
    subs_idx = []
    train_lbls_arr = np.array(train_lbls)
    for c in range(len(classes)):
        idxs = np.where(train_lbls_arr == c)[0]
        k = max(1, int(len(idxs) * frac))
        rng = np.random.RandomState(SEED)
        sel = rng.choice(idxs, size=k, replace=False)
        subs_idx.extend(sel.tolist())
    subs_idx = sorted(subs_idx)
    X_sub = train_feats[subs_idx]
    y_sub = train_lbls_arr[subs_idx]

    clf = LogisticRegression(max_iter=2000)
    clf.fit(X_sub, y_sub)
    test_pred = clf.predict(test_feats)
    acc = accuracy_score(test_lbls, test_pred)
    results[f"{int(frac*100)}%"] = float(acc)
    print(f"Fraction {int(frac*100)}% -> Test Acc: {acc:.4f}")

with open(os.path.join(OUT_DIR, "label_efficiency.json"), "w") as f:
    json.dump(results, f, indent=2)
print("Saved label-efficiency results to", os.path.join(OUT_DIR, "label_efficiency.json"))


Fraction 1% -> Test Acc: 0.6250
Fraction 5% -> Test Acc: 0.7472
Fraction 10% -> Test Acc: 0.7778
Fraction 25% -> Test Acc: 0.7889
Fraction 50% -> Test Acc: 0.8167
Fraction 100% -> Test Acc: 0.8167
Saved label-efficiency results to /kaggle/working/simsiam_task4/label_efficiency.json


CELL 16 — (optional for cisual check) Augmentation probe (visual check to ensure augmentations are not destroying diagnostic cues)

In [20]:
# =========================
# Augmentation probe: shows a few augmented views for several random images from each class/subfolder
# Run this cell to produce visuals; saves PNGs to OUT_DIR for your report.
# =========================

probe_samples = 3  # how many sample images per class to visualize
out_probe_dir = os.path.join(OUT_DIR, "augmentation_probe")
os.makedirs(out_probe_dir, exist_ok=True)

for cls in classes:
    # find an example image from either Controlled Environment or On Field for that class
    found = None
    for src in sources:
        p = Path(DATA_DIR) / src / cls
        imgs = list(p.glob("*"))
        if len(imgs) > 0:
            found = imgs[:probe_samples]
            break
    if found is None:
        continue
    for i, imgp in enumerate(found):
        outs = aug_probe_image(str(imgp), n=6)
        # plot and save
        fig, axs = plt.subplots(1, len(outs), figsize=(len(outs)*2, 2.4))
        for j, arr in enumerate(outs):
            axs[j].imshow(arr)
            axs[j].axis("off")
        plt.suptitle(f"Augmentation probe: class={cls} sample={i}")
        savepath = os.path.join(out_probe_dir, f"probe_{cls}_{i}.png")
        plt.savefig(savepath, dpi=150, bbox_inches="tight")
        plt.close()
        print("Saved augmentation probe:", savepath)

print("Augmentation probe images saved to", out_probe_dir)


Saved augmentation probe: /kaggle/working/simsiam_task4/augmentation_probe/probe_Diseased_0.png
Saved augmentation probe: /kaggle/working/simsiam_task4/augmentation_probe/probe_Diseased_1.png
Saved augmentation probe: /kaggle/working/simsiam_task4/augmentation_probe/probe_Diseased_2.png
Saved augmentation probe: /kaggle/working/simsiam_task4/augmentation_probe/probe_Dried_0.png
Saved augmentation probe: /kaggle/working/simsiam_task4/augmentation_probe/probe_Dried_1.png
Saved augmentation probe: /kaggle/working/simsiam_task4/augmentation_probe/probe_Dried_2.png
Saved augmentation probe: /kaggle/working/simsiam_task4/augmentation_probe/probe_Healthy_0.png
Saved augmentation probe: /kaggle/working/simsiam_task4/augmentation_probe/probe_Healthy_1.png
Saved augmentation probe: /kaggle/working/simsiam_task4/augmentation_probe/probe_Healthy_2.png
Augmentation probe images saved to /kaggle/working/simsiam_task4/augmentation_probe


Task 4.4: Full Metrics Suite (Per-Class Metrics, Confusion Matrices, ROC Curves, Learning Curves, Best-Model Selection, ZIP Archiver)

In [2]:
# ============================================================
#            TASK 4 COMPLETION MEGA-CELL (WITH ZIP) all roc and stuff
# ============================================================

import os
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import json
import csv
from sklearn.metrics import (
    precision_recall_fscore_support,
    accuracy_score,
    roc_curve,
    auc,
    confusion_matrix
)
from sklearn.model_selection import train_test_split
import joblib
import shutil

# ------------------------------------------------------------
# 1. LOAD FEATURES & LABELS
# ------------------------------------------------------------
BASE = "/kaggle/input/all-files/All Files"

train_feats = np.load(f"{BASE}/train_feats.npy")
train_labels = np.load(f"{BASE}/train_labels.npy")

val_feats = np.load(f"{BASE}/val_feats.npy")
val_labels = np.load(f"{BASE}/val_labels.npy")

test_feats = np.load(f"{BASE}/test_feats.npy")
test_labels = np.load(f"{BASE}/test_labels.npy")

# Load class names
with open("/kaggle/input/simsiam-task4-archive/manifest.json", "r") as f:
    manifest = json.load(f)
CLASSES = manifest["classes"]
NUM_CLASSES = len(CLASSES)

print("Classes:", CLASSES)

# ------------------------------------------------------------
# 2. LOAD CLASSIFIERS
# ------------------------------------------------------------
models = {
    "LogisticRegression": joblib.load(f"{BASE}/LogisticRegression.joblib"),
    "SVM_RBF": joblib.load(f"{BASE}/SVM_RBF.joblib"),
    "RandomForest": joblib.load(f"{BASE}/RandomForest.joblib"),
    "DecisionTree": joblib.load(f"{BASE}/DecisionTree.joblib"),
    "MLP": joblib.load(f"{BASE}/MLP.joblib"),
}

# ------------------------------------------------------------
# 3. OUTPUT DIRECTORY
# ------------------------------------------------------------
OUT = "/kaggle/working/task4_outputs"
os.makedirs(OUT, exist_ok=True)

# ------------------------------------------------------------
# 4. METRICS FUNCTION
# ------------------------------------------------------------
def compute_metrics(model, name):
    preds = model.predict(test_feats)
    acc = accuracy_score(test_labels, preds)

    precision, recall, f1, support = precision_recall_fscore_support(
        test_labels, preds, labels=list(range(NUM_CLASSES)), zero_division=0
    )

    cm = confusion_matrix(test_labels, preds)

    result = {
        "model": name,
        "overall_accuracy": float(acc),
        "per_class": {}
    }

    for i, cls in enumerate(CLASSES):
        result["per_class"][cls] = {
            "precision": float(precision[i]),
            "recall": float(recall[i]),
            "f1": float(f1[i]),
            "support": int(support[i])
        }

    return result, cm

all_metrics = {}

# ------------------------------------------------------------
# 5. GENERATE METRICS + CONFUSION MATRICES
# ------------------------------------------------------------
for name, clf in models.items():
    print(f"Computing metrics for {name}...")
    metrics, cm = compute_metrics(clf, name)

    # Save JSON
    with open(f"{OUT}/metrics_{name}.json", "w") as f:
        json.dump(metrics, f, indent=2)

    # Confusion matrix figure
    plt.figure(figsize=(6,5))
    sns.heatmap(cm, annot=True, fmt="d", cmap="Blues",
                xticklabels=CLASSES, yticklabels=CLASSES)
    plt.title(f"Confusion Matrix - {name}")
    plt.savefig(f"{OUT}/confusion_{name}.png")
    plt.close()

    all_metrics[name] = metrics

# ------------------------------------------------------------
# 6. SAVE ALL METRICS AS CSV
# ------------------------------------------------------------
csv_path = f"{OUT}/per_class_metrics.csv"
with open(csv_path, "w", newline="") as f:
    writer = csv.writer(f)
    header = ["Model", "Class", "Precision", "Recall", "F1", "Support", "Overall Accuracy"]
    writer.writerow(header)

    for model_name, data in all_metrics.items():
        acc = data["overall_accuracy"]
        for cls, d in data["per_class"].items():
            writer.writerow([
                model_name,
                cls,
                d["precision"],
                d["recall"],
                d["f1"],
                d["support"],
                acc
            ])

print("Saved CSV:", csv_path)

# ------------------------------------------------------------
# 7. SAFE predict_proba wrapper
# ------------------------------------------------------------
def safe_predict_proba(model, feats):
    try:
        return model.predict_proba(feats)
    except:
        try:
            df = model.decision_function(feats)
            exp = np.exp(df - np.max(df, axis=1, keepdims=True))
            return exp / np.sum(exp, axis=1, keepdims=True)
        except:
            preds = model.predict(feats)
            one_hot = np.zeros((len(preds), NUM_CLASSES))
            for i, p in enumerate(preds):
                one_hot[i][p] = 1
            return one_hot

# ------------------------------------------------------------
# 8. ROC CURVES — PER CLASSIFIER
# ------------------------------------------------------------
for name, clf in models.items():
    probs = safe_predict_proba(clf, test_feats)

    plt.figure(figsize=(7,6))
    for i, cls in enumerate(CLASSES):
        fpr, tpr, _ = roc_curve(test_labels == i, probs[:, i])
        auc_score = auc(fpr, tpr)
        plt.plot(fpr, tpr, label=f"{cls} (AUC={auc_score:.3f})")

    plt.plot([0,1],[0,1],"k--")
    plt.title(f"ROC Curve - {name}")
    plt.xlabel("False Positive Rate")
    plt.ylabel("True Positive Rate")
    plt.legend()
    plt.savefig(f"{OUT}/roc_{name}.png")
    plt.close()

print("Saved per-model ROC curves.")

# ------------------------------------------------------------
# 9. ROC CURVES — PER CLASS COMPARISON
# ------------------------------------------------------------
for c_idx, cls in enumerate(CLASSES):
    plt.figure(figsize=(7,6))
    for name, clf in models.items():
        probs = safe_predict_proba(clf, test_feats)
        fpr, tpr, _ = roc_curve(test_labels == c_idx, probs[:, c_idx])
        auc_score = auc(fpr, tpr)
        plt.plot(fpr, tpr, label=f"{name} (AUC={auc_score:.3f})")

    plt.plot([0,1],[0,1], 'k--')
    plt.title(f"ROC Comparison – Class: {cls}")
    plt.xlabel("False Positive Rate")
    plt.ylabel("True Positive Rate")
    plt.legend()
    plt.savefig(f"{OUT}/roc_class_{cls}.png")
    plt.close()

print("Saved per-class ROC comparison plots.")

# ------------------------------------------------------------
# 10. LEARNING CURVES FOR ALL MODELS
# ------------------------------------------------------------
def learning_curve_plot(model, name):
    sizes = np.linspace(0.1, 1.0, 10)
    sizes = (sizes * len(train_feats)).astype(int)

    train_accs = []
    val_accs = []

    for s in sizes:
        X_s = train_feats[:s]
        y_s = train_labels[:s]

        model.fit(X_s, y_s)
        train_pred = model.predict(X_s)
        val_pred = model.predict(val_feats)

        train_accs.append(accuracy_score(y_s, train_pred))
        val_accs.append(accuracy_score(val_labels, val_pred))

    plt.figure(figsize=(7,6))
    plt.plot(sizes, train_accs, label="Train Acc", marker="o")
    plt.plot(sizes, val_accs, label="Val Acc", marker="o")
    plt.title(f"Learning Curve - {name}")
    plt.xlabel("Training Samples")
    plt.ylabel("Accuracy")
    plt.legend()
    plt.grid(True)
    plt.savefig(f"{OUT}/learning_curve_{name}.png")
    plt.close()

import copy
for name, clf in models.items():
    print(f"Learning curve: {name}")
    learning_curve_plot(copy.deepcopy(clf), name)

# ------------------------------------------------------------
# 11. BEST MODEL SELECTION
# ------------------------------------------------------------
best_model = None
best_acc = -1

for name, clf in models.items():
    preds = clf.predict(val_feats)
    acc = accuracy_score(val_labels, preds)
    if acc > best_acc:
        best_acc = acc
        best_model = name

with open(f"{OUT}/best_model.txt", "w") as f:
    f.write(f"Best model (by validation accuracy): {best_model} ({best_acc:.4f})\n")

print("Best model:", best_model)

# ------------------------------------------------------------
# 12. ZIP ALL OUTPUTS
# ------------------------------------------------------------
ZIP_PATH = "/kaggle/working/task4_outputs.zip"

# Remove existing zip if rerun
if os.path.exists(ZIP_PATH):
    os.remove(ZIP_PATH)

shutil.make_archive("/kaggle/working/task4_outputs", "zip", OUT)
print("ZIP file created:", ZIP_PATH)

print("\nALL TASK 4 OUTPUTS COMPLETE & ARCHIVED.")


Classes: ['Diseased', 'Dried', 'Healthy']
Computing metrics for LogisticRegression...
Computing metrics for SVM_RBF...
Computing metrics for RandomForest...
Computing metrics for DecisionTree...
Computing metrics for MLP...
Saved CSV: /kaggle/working/task4_outputs/per_class_metrics.csv
Saved per-model ROC curves.
Saved per-class ROC comparison plots.
Learning curve: LogisticRegression
Learning curve: SVM_RBF
Learning curve: RandomForest
Learning curve: DecisionTree
Learning curve: MLP
Best model: MLP
ZIP file created: /kaggle/working/task4_outputs.zip

ALL TASK 4 OUTPUTS COMPLETE & ARCHIVED.


CELL 15 — Ablation: varying pretraining epochs (100, 200, 400) — skeleton runner

In [None]:
# =========================
# Cell 15 (SAVE-EVERY-20-EPOCHS) — Ablation runner with periodic save (every N epochs)
# - Saves full checkpoint (weights+opt+sched+encoder+metadata) every `save_interval` epochs
# - Zips run dir on each save (for easy download)
# - Auto-resumes from latest saved epoch (which will be a multiple of save_interval)
# - Shows tqdm progress and prints epoch summaries
# =========================

import os, time, json, shutil
import torch
from torch import optim
from tqdm import tqdm

# ---------- Config ----------
ablation_epochs = [100, 200, 400]    # ablation configs to run
save_interval = 20                    # SAVE every N epochs (set 20 as you requested)
zip_every_k_backups = 1               # create zip each time we save (1 => on every save); 0 to disable
zip_output_name_template = "simsiam_ablation_epochs_{E}.zip"  # saved to /kaggle/working
# ----------------------------

# hyperparams (fallback to notebook globals)
learning_rate = globals().get("learning_rate", 0.05)
momentum = globals().get("momentum", 0.9)
weight_decay = globals().get("weight_decay", 1e-4)

# sanity checks
if "train_loader" not in globals():
    raise RuntimeError("train_loader not found — run the data preparation / pretraining cells first.")
if "SimSiam" not in globals():
    raise RuntimeError("SimSiam class not defined — run the model definition cell first (Cell 7).")

OUT_DIR = globals().get("OUT_DIR", "/kaggle/working/simsiam_task4")
os.makedirs(OUT_DIR, exist_ok=True)

device = globals().get("DEVICE", "cuda" if torch.cuda.is_available() else "cpu")
BACKBONE = globals().get("BACKBONE", "resnet18")

# negative cosine similarity helper (same as pretrain)
def negative_cosine_similarity(p, z):
    z = z.detach()
    p = torch.nn.functional.normalize(p, dim=1)
    z = torch.nn.functional.normalize(z, dim=1)
    return - (p * z).sum(dim=1).mean()

# save checkpoint helper (full save)
def save_full_checkpoint(model, optimizer, scheduler, run_dir, epoch_num, avg_loss):
    """Save full checkpoint for epoch_num (1-based)."""
    epoch_dir = os.path.join(run_dir, f"epoch_{epoch_num:03d}")
    os.makedirs(epoch_dir, exist_ok=True)
    ck = {
        "epoch": int(epoch_num),
        "timestamp": time.time(),
        "model_state": model.state_dict(),
        "optimizer_state": optimizer.state_dict() if optimizer is not None else None,
        "scheduler_state": scheduler.state_dict() if scheduler is not None else None,
        "avg_loss": float(avg_loss),
        "manifest": globals().get("split_manifest", None)
    }
    ck_path = os.path.join(epoch_dir, "checkpoint.pth")
    torch.save(ck, ck_path)
    # encoder snapshot (convenience)
    try:
        enc_path = os.path.join(epoch_dir, "encoder.pth")
        torch.save({"encoder_state_dict": model.encoder.state_dict(), "feat_dim": model.feat_dim}, enc_path)
    except Exception:
        pass
    # metadata.json
    meta = {"epoch": int(epoch_num), "avg_loss": float(avg_loss), "saved_at": time.time()}
    with open(os.path.join(epoch_dir, "metadata.json"), "w") as f:
        json.dump(meta, f)
    return ck_path

# Main ablation loop (periodic save)
for E in ablation_epochs:
    run_dir = os.path.join(OUT_DIR, f"ablation_epochs_{E}")
    os.makedirs(run_dir, exist_ok=True)
    zip_out_path = os.path.join("/kaggle/working", zip_output_name_template.format(E=E))

    print(f"\n=== Ablation config: {E} epochs | saving every {save_interval} epochs | run_dir: {run_dir} ===")

    # Build model/optimizer/scheduler fresh (or will load later if resuming)
    model_ab = SimSiam(backbone=BACKBONE).to(device)
    opt_ab = optim.SGD(model_ab.parameters(), lr=learning_rate, momentum=momentum, weight_decay=weight_decay)
    sched_ab = torch.optim.lr_scheduler.CosineAnnealingLR(opt_ab, T_max=E)

    # Detect last saved epoch (must be multiple of save_interval typically)
    existing_epochs = []
    if os.path.exists(run_dir):
        for name in os.listdir(run_dir):
            if name.startswith("epoch_"):
                try:
                    existing_epochs.append(int(name.split("_")[1]))
                except Exception:
                    pass
    last_saved = max(existing_epochs) if existing_epochs else 0
    # Resume from next epoch (1-based)
    start_epoch = last_saved + 1

    # If resuming, attempt to load the latest full checkpoint (best-effort)
    if last_saved > 0:
        ckpt_path = os.path.join(run_dir, f"epoch_{last_saved:03d}", "checkpoint.pth")
        if os.path.exists(ckpt_path):
            print(f"Resuming from saved checkpoint at epoch {last_saved}: {ckpt_path}")
            ck = torch.load(ckpt_path, map_location=device, weights_only=False)
            try:
                model_ab.load_state_dict(ck["model_state"])
            except Exception as e:
                print("Warning: model_state load partial/failed ->", e)
            try:
                if ck.get("optimizer_state") is not None:
                    opt_ab.load_state_dict(ck["optimizer_state"])
            except Exception as e:
                print("Warning: optimizer_state load failed ->", e)
            try:
                if ck.get("scheduler_state") is not None:
                    sched_ab.load_state_dict(ck["scheduler_state"])
            except Exception:
                pass
            print(f"Resumed. Next epoch will be {start_epoch} (1-based) out of {E}.")
        else:
            print("Found epoch folders but checkpoint.pth missing — starting from epoch 1.")
            start_epoch = 1
    else:
        start_epoch = 1

    # If the last_saved isn't exactly the last completed epoch before interruption,
    # you will lose progress between last_saved and the current interruption.
    if last_saved > 0:
        print(f"Note: last saved epoch = {last_saved}. Progress after that may be lost if run stopped earlier than next save point.")

    # Training loop: epochs are 1..E inclusive
    try:
        for epoch in range(start_epoch, E + 1):   # epoch is 1-based here
            model_ab.train()
            running_loss = 0.0
            n_steps = 0

            loop = tqdm(train_loader, desc=f"Ablation E={E} Epoch {epoch}/{E}", leave=False)
            for x1, x2, _, _ in loop:
                x1 = x1.to(device); x2 = x2.to(device)

                p1, p2, z1, z2 = model_ab(x1, x2)
                loss = 0.5 * negative_cosine_similarity(p1, z2) + 0.5 * negative_cosine_similarity(p2, z1)

                opt_ab.zero_grad()
                loss.backward()
                opt_ab.step()

                running_loss += loss.item()
                n_steps += 1
                loop.set_postfix(loss=f"{loss.item():.4f}")

            avg_loss = (running_loss / n_steps) if n_steps > 0 else 0.0
            sched_ab.step()
            print(f"Epoch {epoch}/{E} completed - Avg Loss: {avg_loss:.4f}")

            # Save only at save_interval multiples OR on final epoch
            to_save = (epoch % save_interval == 0) or (epoch == E)
            if to_save:
                try:
                    ck_path = save_full_checkpoint(model_ab, opt_ab, sched_ab, run_dir, epoch, avg_loss)
                    print("Saved full checkpoint for epoch:", epoch, "->", ck_path)
                except Exception as e:
                    print("Warning: could not save checkpoint:", e)

                # periodic zip backup if enabled
                if zip_every_k_backups and zip_every_k_backups > 0:
                    try:
                        # overwrite previous zip for same E
                        if os.path.exists(zip_out_path):
                            os.remove(zip_out_path)
                        shutil.make_archive(base_name=zip_out_path.replace(".zip",""), format="zip", root_dir=run_dir)
                        print("Saved ZIP backup to:", zip_out_path)
                    except Exception as e:
                        print("Warning: ZIP backup failed ->", e)

    except KeyboardInterrupt:
        # On interrupt, save the most recent multiple-of-save_interval checkpoint if possible
        print("KeyboardInterrupt caught — attempting to save resume checkpoint (if we are on a save point).")
        try:
            # if current epoch is a save point, save it; otherwise save last_saved again (best-effort)
            cur_epoch = epoch
            if (cur_epoch % save_interval == 0) or (cur_epoch == E):
                save_full_checkpoint(model_ab, opt_ab, sched_ab, run_dir, cur_epoch, avg_loss if 'avg_loss' in locals() else 0.0)
                print("Saved checkpoint for interrupted epoch:", cur_epoch)
            else:
                print(f"Current epoch {cur_epoch} not a save point. Last permanent save remains epoch {last_saved}.")
        except Exception as e:
            print("Could not save on interrupt:", e)
        raise

    # final zip after finishing all epochs for this E
    try:
        if zip_every_k_backups and zip_every_k_backups > 0:
            if os.path.exists(zip_out_path):
                os.remove(zip_out_path)
            shutil.make_archive(base_name=zip_out_path.replace(".zip",""), format="zip", root_dir=run_dir)
            print("Completed ablation run. Final ZIP:", zip_out_path)
    except Exception as e:
        print("Could not create final ZIP:", e)

print("\nAll ablation configs processed.")



=== Ablation config: 100 epochs | saving every 20 epochs | run_dir: /kaggle/working/simsiam_task4/ablation_epochs_100 ===


                                                                                         

Epoch 1/100 completed - Avg Loss: -0.3544


                                                                                         

Epoch 2/100 completed - Avg Loss: -0.6602


                                                                                         

Epoch 3/100 completed - Avg Loss: -0.8142


                                                                                         

Epoch 4/100 completed - Avg Loss: -0.8335


                                                                                         

Epoch 5/100 completed - Avg Loss: -0.8616


                                                                                         

Epoch 6/100 completed - Avg Loss: -0.8948


                                                                                         

Epoch 7/100 completed - Avg Loss: -0.8459


                                                                                         

Epoch 8/100 completed - Avg Loss: -0.8835


                                                                                         

Epoch 9/100 completed - Avg Loss: -0.9091


                                                                                          

Epoch 10/100 completed - Avg Loss: -0.9155


                                                                                          

Epoch 11/100 completed - Avg Loss: -0.9060


                                                                                          

Epoch 12/100 completed - Avg Loss: -0.9051


                                                                                          

Epoch 13/100 completed - Avg Loss: -0.9177


                                                                                          

Epoch 14/100 completed - Avg Loss: -0.9089


                                                                                          

Epoch 15/100 completed - Avg Loss: -0.9111


                                                                                          

Epoch 16/100 completed - Avg Loss: -0.9254


                                                                                          

Epoch 17/100 completed - Avg Loss: -0.9219


                                                                                          

Epoch 18/100 completed - Avg Loss: -0.9178


                                                                                          

Epoch 19/100 completed - Avg Loss: -0.8918


                                                                                          

Epoch 20/100 completed - Avg Loss: -0.9234
Saved full checkpoint for epoch: 20 -> /kaggle/working/simsiam_task4/ablation_epochs_100/epoch_020/checkpoint.pth
Saved ZIP backup to: /kaggle/working/simsiam_ablation_epochs_100.zip


                                                                                          

Epoch 21/100 completed - Avg Loss: -0.9221


                                                                                          

Epoch 22/100 completed - Avg Loss: -0.9404


                                                                                          

Epoch 23/100 completed - Avg Loss: -0.9371


                                                                                          

Epoch 24/100 completed - Avg Loss: -0.9062


                                                                                          

Epoch 25/100 completed - Avg Loss: -0.8888


                                                                                          

Epoch 26/100 completed - Avg Loss: -0.9240


                                                                                          

Epoch 27/100 completed - Avg Loss: -0.9341


                                                                                          

Epoch 28/100 completed - Avg Loss: -0.9380


                                                                                          

Epoch 29/100 completed - Avg Loss: -0.9371


                                                                                          

Epoch 30/100 completed - Avg Loss: -0.9390


                                                                                          

Epoch 31/100 completed - Avg Loss: -0.9326


                                                                                          

Epoch 32/100 completed - Avg Loss: -0.9433


                                                                                          

Epoch 33/100 completed - Avg Loss: -0.9424


                                                                                          

Epoch 34/100 completed - Avg Loss: -0.9477


                                                                                          

Epoch 35/100 completed - Avg Loss: -0.9463


                                                                                          

Epoch 36/100 completed - Avg Loss: -0.9468


                                                                                          

Epoch 37/100 completed - Avg Loss: -0.9433


                                                                                          

Epoch 38/100 completed - Avg Loss: -0.9434


                                                                                          

Epoch 39/100 completed - Avg Loss: -0.9445


                                                                                          

Epoch 40/100 completed - Avg Loss: -0.9481
Saved full checkpoint for epoch: 40 -> /kaggle/working/simsiam_task4/ablation_epochs_100/epoch_040/checkpoint.pth
Saved ZIP backup to: /kaggle/working/simsiam_ablation_epochs_100.zip


                                                                                          

Epoch 41/100 completed - Avg Loss: -0.9451


                                                                                          

Epoch 42/100 completed - Avg Loss: -0.9364


                                                                                          

Epoch 43/100 completed - Avg Loss: -0.9513


                                                                                          

Epoch 44/100 completed - Avg Loss: -0.9480


                                                                                          

Epoch 45/100 completed - Avg Loss: -0.9490


                                                                                          

Epoch 46/100 completed - Avg Loss: -0.9489


                                                                                          

Epoch 47/100 completed - Avg Loss: -0.9502


                                                                                          

Epoch 48/100 completed - Avg Loss: -0.9503


                                                                                          

Epoch 49/100 completed - Avg Loss: -0.9509


                                                                                          

Epoch 50/100 completed - Avg Loss: -0.9496


                                                                                          

Epoch 51/100 completed - Avg Loss: -0.9520


                                                                                          

Epoch 52/100 completed - Avg Loss: -0.9510


                                                                                          

Epoch 53/100 completed - Avg Loss: -0.9535


                                                                                          

Epoch 54/100 completed - Avg Loss: -0.9472


                                                                                          

Epoch 55/100 completed - Avg Loss: -0.9480


                                                                                          

Epoch 56/100 completed - Avg Loss: -0.9471


                                                                                          

Epoch 57/100 completed - Avg Loss: -0.9479


                                                                                          

Epoch 58/100 completed - Avg Loss: -0.9500


                                                                                          

Epoch 59/100 completed - Avg Loss: -0.9455


                                                                                          

Epoch 60/100 completed - Avg Loss: -0.9424
Saved full checkpoint for epoch: 60 -> /kaggle/working/simsiam_task4/ablation_epochs_100/epoch_060/checkpoint.pth
Saved ZIP backup to: /kaggle/working/simsiam_ablation_epochs_100.zip


                                                                                          

Epoch 61/100 completed - Avg Loss: -0.9420


                                                                                          

Epoch 62/100 completed - Avg Loss: -0.9426


                                                                                          

Epoch 63/100 completed - Avg Loss: -0.9363


                                                                                          

Epoch 64/100 completed - Avg Loss: -0.9406


                                                                                          

Epoch 65/100 completed - Avg Loss: -0.9366


                                                                                          

Epoch 66/100 completed - Avg Loss: -0.9419


                                                                                          

Epoch 67/100 completed - Avg Loss: -0.9453


                                                                                          

Epoch 68/100 completed - Avg Loss: -0.9445


                                                                                          

Epoch 69/100 completed - Avg Loss: -0.9445


                                                                                          

Epoch 71/100 completed - Avg Loss: -0.9411


                                                                                          

Epoch 72/100 completed - Avg Loss: -0.9436


                                                                                          

Epoch 73/100 completed - Avg Loss: -0.9392


                                                                                          

Epoch 74/100 completed - Avg Loss: -0.9386


                                                                                          

Epoch 75/100 completed - Avg Loss: -0.9390


                                                                                          

Epoch 76/100 completed - Avg Loss: -0.9381


                                                                                          

Epoch 77/100 completed - Avg Loss: -0.9356


                                                                                          

Epoch 78/100 completed - Avg Loss: -0.9323


Ablation E=100 Epoch 79/100:  30%|███       | 6/20 [01:43<03:30, 15.02s/it, loss=-0.9433]

In [19]:
# ===== Cell 15 (RESUME FROM 80 -> 100, save every 5 epochs) =====
import os, shutil, json, time, torch
from torch import optim
from tqdm import tqdm

# -------- CONFIG --------
E_target = 100
# Where to keep the run folders (where your epoch_xxx folders will live)
RUN_ROOT = "/kaggle/working/simsiam_task4"
RUN_DIR  = os.path.join(RUN_ROOT, f"ablation_epochs_{E_target}")   # target run dir
# Uploaded archive location you mentioned
INPUT_ARCHIVE = "/kaggle/input/simsiam-ablation-epochs-100"
# Save policy now
save_interval = 5   # save every 5 epochs (85,90,95,100)
ZIP_OUT = os.path.join("/kaggle/working", f"simsiam_ablation_epochs_{E_target}.zip")
# Device / model fallback values (use your notebook globals if set)
device = globals().get("DEVICE", "cuda" if torch.cuda.is_available() else "cpu")
BACKBONE = globals().get("BACKBONE", "resnet18")
learning_rate = globals().get("learning_rate", 0.05)
momentum = globals().get("momentum", 0.9)
weight_decay = globals().get("weight_decay", 1e-4)
# ------------------------

# Sanity: model and data must be defined earlier in the notebook
if "SimSiam" not in globals():
    raise RuntimeError("SimSiam not defined — run model-definition cell first.")
if "train_loader" not in globals():
    raise RuntimeError("train_loader not found — run data prep / pretraining cells first.")

os.makedirs(RUN_DIR, exist_ok=True)

# Helper loss (same as used)
def negative_cosine_similarity(p, z):
    z = z.detach()
    p = torch.nn.functional.normalize(p, dim=1)
    z = torch.nn.functional.normalize(z, dim=1)
    return - (p * z).sum(dim=1).mean()

# Helper save function
def save_full_checkpoint(model, optimizer, scheduler, run_dir, epoch_num, avg_loss):
    epoch_dir = os.path.join(run_dir, f"epoch_{epoch_num:03d}")
    os.makedirs(epoch_dir, exist_ok=True)
    ck = {
        "epoch": int(epoch_num),
        "timestamp": time.time(),
        "model_state": model.state_dict(),
        "optimizer_state": optimizer.state_dict() if optimizer is not None else None,
        "scheduler_state": scheduler.state_dict() if scheduler is not None else None,
        "avg_loss": float(avg_loss),
        "manifest": globals().get("split_manifest", None)
    }
    ck_path = os.path.join(epoch_dir, "checkpoint.pth")
    torch.save(ck, ck_path)
    # encoder snapshot
    try:
        torch.save({"encoder_state_dict": model.encoder.state_dict(), "feat_dim": model.feat_dim},
                   os.path.join(epoch_dir, "encoder.pth"))
    except Exception:
        pass
    # metadata
    with open(os.path.join(epoch_dir, "metadata.json"), "w") as f:
        json.dump({"epoch": epoch_num, "avg_loss": float(avg_loss), "saved_at": time.time()}, f)
    return ck_path

# 1) Ensure we have the existing checkpoint folders in RUN_DIR
# Prefer already-present working dir, else copy from INPUT_ARCHIVE
def ensure_existing_checkpoints():
    # if run dir already has epoch folders, do nothing
    has_epochs = any(n.startswith("epoch_") for n in os.listdir(RUN_DIR)) if os.path.exists(RUN_DIR) else False
    if has_epochs:
        return True
    # else try copy from input archive
    if os.path.exists(INPUT_ARCHIVE):
        copied = 0
        for root, dirs, files in os.walk(INPUT_ARCHIVE):
            for d in dirs:
                if d.startswith("epoch_"):
                    src = os.path.join(root, d)
                    dst = os.path.join(RUN_DIR, d)
                    if not os.path.exists(dst):
                        try:
                            shutil.copytree(src, dst)
                            copied += 1
                        except Exception as e:
                            print("Copy error:", e)
        if copied > 0:
            print(f"Copied {copied} epoch folders from input archive -> {RUN_DIR}")
            return True
        else:
            print("No epoch_* folders found in input archive.")
            return False
    else:
        print("Input archive folder not present:", INPUT_ARCHIVE)
        return False

ok = ensure_existing_checkpoints()
if not ok:
    raise FileNotFoundError("Could not locate saved epoch folders in working or input archive. Place epoch_080 checkpoint into RUN_DIR or upload it.")

# 2) Detect highest saved epoch
saved_epochs = []
for name in os.listdir(RUN_DIR):
    if name.startswith("epoch_"):
        try:
            saved_epochs.append(int(name.split("_")[1]))
        except:
            pass
if not saved_epochs:
    raise RuntimeError("No epoch_XXX folders present in RUN_DIR after copy step.")
saved_epochs = sorted(saved_epochs)
print("Detected saved epochs:", saved_epochs)
last_saved = max(saved_epochs)
print("Last saved epoch:", last_saved)

# We expect last_saved >= 80; if not, you can set last_saved manually
if last_saved < 80:
    print("Warning: last_saved < 80; check your uploaded files. Continuing will start from last_saved+1.")
# We'll resume from last_saved+1 (so if last_saved==80 -> resume at 81)
start_epoch = last_saved + 1
if start_epoch > E_target:
    print("Target already reached. Exiting.")
else:
    print(f"Resume training from epoch {start_epoch} -> {E_target} (inclusive)")

# 3) Build model, optimizer, scheduler and try to load checkpoint for last_saved
model = SimSiam(backbone=BACKBONE).to(device)
optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=momentum, weight_decay=weight_decay)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=E_target)

ck_path = os.path.join(RUN_DIR, f"epoch_{last_saved:03d}", "checkpoint.pth")
if os.path.exists(ck_path):
    print("Loading checkpoint:", ck_path)
    ck = torch.load(ck_path, map_location=device, weights_only=False)
    # load model
    if ck.get("model_state") is not None:
        try:
            model.load_state_dict(ck["model_state"])
            print("Model state loaded.")
        except Exception as e:
            print("Model load warning:", e)
    # load optimizer
    if ck.get("optimizer_state") is not None:
        try:
            optimizer.load_state_dict(ck["optimizer_state"])
            print("Optimizer state loaded.")
        except Exception as e:
            print("Optimizer load warning:", e)
    # load scheduler
    if ck.get("scheduler_state") is not None:
        try:
            scheduler.load_state_dict(ck["scheduler_state"])
            print("Scheduler state loaded.")
        except Exception:
            pass
    # try restore rng states (if saved in checkpoint)
    if "py_random_state" in ck:
        import random, numpy as np
        try:
            random.setstate(ck["py_random_state"])
            np.random.set_state(ck["np_random_state"])
            torch.set_rng_state(ck["torch_cpu_rng"])
            if torch.cuda.is_available() and "torch_cuda_rng_all" in ck:
                try: torch.cuda.set_rng_state_all(ck["torch_cuda_rng_all"])
                except: pass
            print("Restored RNG states from checkpoint (if present).")
        except Exception:
            pass
else:
    print("No checkpoint.pth found for last_saved epoch, will continue from fresh weights.")

# 4) Resume loop: start_epoch .. E_target, print per-epoch, save every save_interval
try:
    for epoch in range(start_epoch, E_target + 1):
        model.train()
        running_loss = 0.0
        steps = 0
        loop = tqdm(train_loader, desc=f"Epoch {epoch}/{E_target}", leave=False)
        for x1, x2, _, _ in loop:
            x1 = x1.to(device); x2 = x2.to(device)
            p1, p2, z1, z2 = model(x1, x2)
            loss = 0.5 * negative_cosine_similarity(p1, z2) + 0.5 * negative_cosine_similarity(p2, z1)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            running_loss += float(loss.item())
            steps += 1
            loop.set_postfix(loss=f"{loss.item():.4f}")
        scheduler.step()
        avg_loss = (running_loss / steps) if steps>0 else 0.0
        print(f"Epoch {epoch}/{E_target} completed - Avg Loss: {avg_loss:.4f}")

        # save at multiples of save_interval OR final epoch
        if (epoch % save_interval == 0) or (epoch == E_target):
            try:
                ck_out = save_full_checkpoint(model, optimizer, scheduler, RUN_DIR, epoch, avg_loss)
                print("Saved checkpoint:", ck_out)
            except Exception as e:
                print("Warning saving checkpoint:", e)
            # update zip
            try:
                if os.path.exists(ZIP_OUT):
                    os.remove(ZIP_OUT)
                shutil.make_archive(base_name=ZIP_OUT.replace(".zip",""), format="zip", root_dir=RUN_DIR)
                print("Updated ZIP:", ZIP_OUT)
            except Exception as e:
                print("Warning creating zip:", e)

except KeyboardInterrupt:
    print("Interrupted by user. Last permanent saved epoch is last existing epoch folder.")
    raise

print("Resume finished. Final folders in:", RUN_DIR)
print("Download the zip from Kaggle output:", ZIP_OUT)


Copied 4 epoch folders from input archive -> /kaggle/working/simsiam_task4/ablation_epochs_100
Detected saved epochs: [20, 40, 60, 80]
Last saved epoch: 80
Resume training from epoch 81 -> 100 (inclusive)




Loading checkpoint: /kaggle/working/simsiam_task4/ablation_epochs_100/epoch_080/checkpoint.pth
Model state loaded.
Optimizer state loaded.
Scheduler state loaded.


                                                                           

Epoch 81/100 completed - Avg Loss: -0.9401


                                                                           

Epoch 82/100 completed - Avg Loss: -0.9332


                                                                           

Epoch 83/100 completed - Avg Loss: -0.9370


                                                                           

Epoch 84/100 completed - Avg Loss: -0.9354


                                                                           

Epoch 85/100 completed - Avg Loss: -0.9359
Saved checkpoint: /kaggle/working/simsiam_task4/ablation_epochs_100/epoch_085/checkpoint.pth
Updated ZIP: /kaggle/working/simsiam_ablation_epochs_100.zip


                                                                           

Epoch 86/100 completed - Avg Loss: -0.9361


                                                                           

Epoch 87/100 completed - Avg Loss: -0.9341


                                                                           

Epoch 88/100 completed - Avg Loss: -0.9346


                                                                           

Epoch 89/100 completed - Avg Loss: -0.9357


                                                                           

Epoch 90/100 completed - Avg Loss: -0.9347
Saved checkpoint: /kaggle/working/simsiam_task4/ablation_epochs_100/epoch_090/checkpoint.pth
Updated ZIP: /kaggle/working/simsiam_ablation_epochs_100.zip


                                                                           

Epoch 91/100 completed - Avg Loss: -0.9339


                                                                           

Epoch 92/100 completed - Avg Loss: -0.9330


                                                                           

Epoch 93/100 completed - Avg Loss: -0.9331


                                                                           

Epoch 94/100 completed - Avg Loss: -0.9349


                                                                           

Epoch 95/100 completed - Avg Loss: -0.9344
Saved checkpoint: /kaggle/working/simsiam_task4/ablation_epochs_100/epoch_095/checkpoint.pth
Updated ZIP: /kaggle/working/simsiam_ablation_epochs_100.zip


                                                                           

Epoch 96/100 completed - Avg Loss: -0.9338


                                                                           

Epoch 97/100 completed - Avg Loss: -0.9341


                                                                           

Epoch 98/100 completed - Avg Loss: -0.9315


                                                                           

Epoch 99/100 completed - Avg Loss: -0.9322


                                                                            

Epoch 100/100 completed - Avg Loss: -0.9313
Saved checkpoint: /kaggle/working/simsiam_task4/ablation_epochs_100/epoch_100/checkpoint.pth
Updated ZIP: /kaggle/working/simsiam_ablation_epochs_100.zip
Resume finished. Final folders in: /kaggle/working/simsiam_task4/ablation_epochs_100
Download the zip from Kaggle output: /kaggle/working/simsiam_ablation_epochs_100.zip


In [7]:
print("""
=== Ablation config: 100 epochs | saving every 20 epochs | run_dir: /kaggle/working/simsiam_task4/ablation_epochs_100 ===

Epoch 1/100 completed - Avg Loss: -0.3544
                                                                                         
Epoch 2/100 completed - Avg Loss: -0.6602
                                                                                         
Epoch 3/100 completed - Avg Loss: -0.8142
                                                                                         
Epoch 4/100 completed - Avg Loss: -0.8335
                                                                                         
Epoch 5/100 completed - Avg Loss: -0.8616
                                                                                         
Epoch 6/100 completed - Avg Loss: -0.8948
                                                                                         
Epoch 7/100 completed - Avg Loss: -0.8459
                                                                                         
Epoch 8/100 completed - Avg Loss: -0.8835
                                                                                         
Epoch 9/100 completed - Avg Loss: -0.9091
                                                                                          
Epoch 10/100 completed - Avg Loss: -0.9155
                                                                                          
Epoch 11/100 completed - Avg Loss: -0.9060
                                                                                          
Epoch 12/100 completed - Avg Loss: -0.9051
                                                                                          
Epoch 13/100 completed - Avg Loss: -0.9177
                                                                                          
Epoch 14/100 completed - Avg Loss: -0.9089
                                                                                          
Epoch 15/100 completed - Avg Loss: -0.9111
                                                                                          
Epoch 16/100 completed - Avg Loss: -0.9254
                                                                                          
Epoch 17/100 completed - Avg Loss: -0.9219
                                                                                          
Epoch 18/100 completed - Avg Loss: -0.9178
                                                                                          
Epoch 19/100 completed - Avg Loss: -0.8918
                                                                                          
Epoch 20/100 completed - Avg Loss: -0.9234
Saved full checkpoint for epoch: 20 -> /kaggle/working/simsiam_task4/ablation_epochs_100/epoch_020/checkpoint.pth
Saved ZIP backup to: /kaggle/working/simsiam_ablation_epochs_100.zip
                                                                                          
Epoch 21/100 completed - Avg Loss: -0.9221
                                                                                          
Epoch 22/100 completed - Avg Loss: -0.9404
                                                                                          
Epoch 23/100 completed - Avg Loss: -0.9371
                                                                                          
Epoch 24/100 completed - Avg Loss: -0.9062
                                                                                          
Epoch 25/100 completed - Avg Loss: -0.8888
                                                                                          
Epoch 26/100 completed - Avg Loss: -0.9240
                                                                                          
Epoch 27/100 completed - Avg Loss: -0.9341
                                                                                          
Epoch 28/100 completed - Avg Loss: -0.9380
                                                                                          
Epoch 29/100 completed - Avg Loss: -0.9371
                                                                                          
Epoch 30/100 completed - Avg Loss: -0.9390
                                                                                          
Epoch 31/100 completed - Avg Loss: -0.9326
                                                                                          
Epoch 32/100 completed - Avg Loss: -0.9433
                                                                                          
Epoch 33/100 completed - Avg Loss: -0.9424
                                                                                          
Epoch 34/100 completed - Avg Loss: -0.9477
                                                                                          
Epoch 35/100 completed - Avg Loss: -0.9463
                                                                                          
Epoch 36/100 completed - Avg Loss: -0.9468
                                                                                          
Epoch 37/100 completed - Avg Loss: -0.9433
                                                                                          
Epoch 38/100 completed - Avg Loss: -0.9434
                                                                                          
Epoch 39/100 completed - Avg Loss: -0.9445
                                                                                          
Epoch 40/100 completed - Avg Loss: -0.9481
Saved full checkpoint for epoch: 40 -> /kaggle/working/simsiam_task4/ablation_epochs_100/epoch_040/checkpoint.pth
Saved ZIP backup to: /kaggle/working/simsiam_ablation_epochs_100.zip
                                                                                          
Epoch 41/100 completed - Avg Loss: -0.9451
                                                                                          
Epoch 42/100 completed - Avg Loss: -0.9364
                                                                                          
Epoch 43/100 completed - Avg Loss: -0.9513
                                                                                          
Epoch 44/100 completed - Avg Loss: -0.9480
                                                                                          
Epoch 45/100 completed - Avg Loss: -0.9490
                                                                                          
Epoch 46/100 completed - Avg Loss: -0.9489
                                                                                          
Epoch 47/100 completed - Avg Loss: -0.9502
                                                                                          
Epoch 48/100 completed - Avg Loss: -0.9503
                                                                                          
Epoch 49/100 completed - Avg Loss: -0.9509
                                                                                          
Epoch 50/100 completed - Avg Loss: -0.9496
                                                                                          
Epoch 51/100 completed - Avg Loss: -0.9520
                                                                                          
Epoch 52/100 completed - Avg Loss: -0.9510
                                                                                          
Epoch 53/100 completed - Avg Loss: -0.9535
                                                                                          
Epoch 54/100 completed - Avg Loss: -0.9472
                                                                                          
Epoch 55/100 completed - Avg Loss: -0.9480
                                                                                          
Epoch 56/100 completed - Avg Loss: -0.9471
                                                                                          
Epoch 57/100 completed - Avg Loss: -0.9479
                                                                                          
Epoch 58/100 completed - Avg Loss: -0.9500
                                                                                          
Epoch 59/100 completed - Avg Loss: -0.9455
                                                                                          
Epoch 60/100 completed - Avg Loss: -0.9424
Saved full checkpoint for epoch: 60 -> /kaggle/working/simsiam_task4/ablation_epochs_100/epoch_060/checkpoint.pth
Saved ZIP backup to: /kaggle/working/simsiam_ablation_epochs_100.zip
                                                                                          
Epoch 61/100 completed - Avg Loss: -0.9420
                                                                                          
Epoch 62/100 completed - Avg Loss: -0.9426
                                                                                          
Epoch 63/100 completed - Avg Loss: -0.9363
                                                                                          
Epoch 64/100 completed - Avg Loss: -0.9406
                                                                                          
Epoch 65/100 completed - Avg Loss: -0.9366
                                                                                          
Epoch 66/100 completed - Avg Loss: -0.9419
                                                                                          
Epoch 67/100 completed - Avg Loss: -0.9453
                                                                                          
Epoch 68/100 completed - Avg Loss: -0.9445
                                                                                          
Epoch 69/100 completed - Avg Loss: -0.9445
                                                                                          
Epoch 71/100 completed - Avg Loss: -0.9411
                                                                                          
Epoch 72/100 completed - Avg Loss: -0.9436
                                                                                          
Epoch 73/100 completed - Avg Loss: -0.9392
                                                                                          
Epoch 74/100 completed - Avg Loss: -0.9386
                                                                                          
Epoch 75/100 completed - Avg Loss: -0.9390
                                                                                          
Epoch 76/100 completed - Avg Loss: -0.9381
                                                                                          
Epoch 77/100 completed - Avg Loss: -0.9356
                                                                                          
Epoch 78/100 completed - Avg Loss: -0.9323
                                                                                          
Epoch 79/100 completed - Avg Loss: -0.9391       
                                                                                          
Epoch 80/100 completed - Avg Loss: -0.9388

Epoch 81/100 completed - Avg Loss: -0.9401
                                                                           
Epoch 82/100 completed - Avg Loss: -0.9332
                                                                           
Epoch 83/100 completed - Avg Loss: -0.9370
                                                                           
Epoch 84/100 completed - Avg Loss: -0.9354
                                                                           
Epoch 85/100 completed - Avg Loss: -0.9359
Saved full checkpoint for epoch: 85 ->  /kaggle/working/simsiam_task4/ablation_epochs_100/epoch_085/checkpoint.pth
Saved ZIP backup to: /kaggle/working/simsiam_ablation_epochs_100.zip
                                                                           
Epoch 86/100 completed - Avg Loss: -0.9361
                                                                           
Epoch 87/100 completed - Avg Loss: -0.9341
                                                                           
Epoch 88/100 completed - Avg Loss: -0.9346
                                                                           
Epoch 89/100 completed - Avg Loss: -0.9357
                                                                           
Epoch 90/100 completed - Avg Loss: -0.9347
Saved full checkpoint for epoch: 90 ->  /kaggle/working/simsiam_task4/ablation_epochs_100/epoch_090/checkpoint.pth
Saved ZIP backup to: /kaggle/working/simsiam_ablation_epochs_100.zip
                                                                           
Epoch 91/100 completed - Avg Loss: -0.9339
                                                                           
Epoch 92/100 completed - Avg Loss: -0.9330
                                                                           
Epoch 93/100 completed - Avg Loss: -0.9331
                                                                           
Epoch 94/100 completed - Avg Loss: -0.9349
                                                                           
Epoch 95/100 completed - Avg Loss: -0.9344
Saved full checkpoint for epoch: 95 ->  /kaggle/working/simsiam_task4/ablation_epochs_100/epoch_095/checkpoint.pth
Saved ZIP backup to: /kaggle/working/simsiam_ablation_epochs_100.zip
                                                                           
Epoch 96/100 completed - Avg Loss: -0.9338
                                                                           
Epoch 97/100 completed - Avg Loss: -0.9341
                                                                           
Epoch 98/100 completed - Avg Loss: -0.9315
                                                                           
Epoch 99/100 completed - Avg Loss: -0.9322
                                                                            
Epoch 100/100 completed - Avg Loss: -0.9313
Saved full checkpoint for epoch: 100 ->  /kaggle/working/simsiam_task4/ablation_epochs_100/epoch_100/checkpoint.pth       
Saved ZIP backup to: /kaggle/working/simsiam_ablation_epochs_100.zip
""")



=== Ablation config: 100 epochs | saving every 20 epochs | run_dir: /kaggle/working/simsiam_task4/ablation_epochs_100 ===

Epoch 1/100 completed - Avg Loss: -0.3544
                                                                                         
Epoch 2/100 completed - Avg Loss: -0.6602
                                                                                         
Epoch 3/100 completed - Avg Loss: -0.8142
                                                                                         
Epoch 4/100 completed - Avg Loss: -0.8335
                                                                                         
Epoch 5/100 completed - Avg Loss: -0.8616
                                                                                         
Epoch 6/100 completed - Avg Loss: -0.8948
                                                                                         
Epoch 7/100 completed - Avg Loss: -0.8459
                                          

In [2]:
# ===== Manifest path-fixer cell =====
# Run this cell BEFORE the Ablation / Ratio cells to fix broken file paths in split_manifest.json.
import os, json, glob
from pathlib import Path

# CONFIG - edit only if your files live elsewhere
ORIGINAL_SPLIT_MANIFEST = "/kaggle/input/simsiam-task4-archive/split_manifest.json"
DATASET_ROOT = "/kaggle/input/betel-leaf"   # where the dataset images live
OUTPUT_FIXED_MANIFEST = "/kaggle/working/corrected_split_manifest.json"
MISSING_LIST = "/kaggle/working/missing_files.txt"

print("Loading manifest:", ORIGINAL_SPLIT_MANIFEST)
with open(ORIGINAL_SPLIT_MANIFEST, "r") as f:
    manifest = json.load(f)

# Expect lists named "train","val","test" and label lists "train_labels", etc.
splits = ["train", "val", "test"]
label_keys = {"train":"train_labels", "val":"val_labels", "test":"test_labels"}

fixed = {k: [] for k in manifest if isinstance(manifest.get(k), list)}
missing = []

# Build a fast index of available files under DATASET_ROOT by basename -> fullpath (first match wins)
print("Indexing dataset files under", DATASET_ROOT, " (this may take a few seconds)...")
all_files = list(Path(DATASET_ROOT).rglob("*.*"))
basename_index = {}
for p in all_files:
    b = p.name
    if b not in basename_index:
        basename_index[b] = str(p)

print("Indexed", len(all_files), "files. Unique basenames:", len(basename_index))

for s in splits:
    paths = manifest.get(s, [])
    labels = manifest.get(label_keys[s], [])
    if len(labels) != len(paths):
        print(f"Warning: split {s} has mismatched counts: {len(paths)} paths vs {len(labels)} labels. Will align by min length.")
    new_paths = []
    new_labels = []
    for i, p in enumerate(paths):
        if p is None: 
            missing.append((s, p))
            continue
        # If absolute path exists as-is, keep
        if os.path.exists(p):
            new_paths.append(p)
            if i < len(labels):
                new_labels.append(labels[i])
            continue
        # Try common adjustments: sometimes manifest paths are missing a prefix. Try joining with DATASET_ROOT
        candidate1 = os.path.join(DATASET_ROOT, p) if not p.startswith(DATASET_ROOT) else p
        if os.path.exists(candidate1):
            new_paths.append(candidate1)
            if i < len(labels):
                new_labels.append(labels[i])
            continue
        # Try basename lookup in dataset
        b = os.path.basename(p)
        if b in basename_index:
            found = basename_index[b]
            new_paths.append(found)
            if i < len(labels):
                new_labels.append(labels[i])
            continue
        # Try searching for similar basenames with partial match (rare) - look for any file containing the basename fragment
        candidates = [str(pp) for pp in all_files if b.split(".")[0] in pp.name]
        if len(candidates) == 1:
            new_paths.append(candidates[0])
            if i < len(labels):
                new_labels.append(labels[i])
            continue
        # Couldn't find it
        missing.append((s, p))
    fixed[s] = new_paths
    fixed[label_keys[s]] = new_labels

# Keep other keys intact (classes, any metadata)
for k,v in manifest.items():
    if k not in fixed:
        fixed[k] = v

# Save corrected manifest
with open(OUTPUT_FIXED_MANIFEST, "w") as f:
    json.dump(fixed, f, indent=2)

print("Saved corrected manifest to:", OUTPUT_FIXED_MANIFEST)
print("Total missing references that could not be resolved:", len(missing))
if len(missing) > 0:
    print("Writing missing list to", MISSING_LIST)
    with open(MISSING_LIST, "w") as fm:
        for s,p in missing:
            fm.write(f"{s}\t{p}\n")
    print("Sample of missing entries (first 20):")
    for e in missing[:20]:
        print("-", e)

# Helpful tips for next steps
print("\nNext steps:")
print("1) Inspect", MISSING_LIST, "to check whether files are truly missing or misnamed.")
print("2) If many files are missing, you may need to re-upload dataset files or ask teammates.")
print("3) Re-run the Ablation cell after replacing SPLIT_MANIFEST path with this corrected manifest path.")
print("\nIf you want, I can automatically update the Ablation/Ratio cells to use this corrected manifest path (/kaggle/working/corrected_split_manifest.json).")


Loading manifest: /kaggle/input/simsiam-task4-archive/split_manifest.json
Indexing dataset files under /kaggle/input/betel-leaf  (this may take a few seconds)...
Indexed 1800 files. Unique basenames: 1800
Saved corrected manifest to: /kaggle/working/corrected_split_manifest.json
Total missing references that could not be resolved: 0

Next steps:
1) Inspect /kaggle/working/missing_files.txt to check whether files are truly missing or misnamed.
2) If many files are missing, you may need to re-upload dataset files or ask teammates.
3) Re-run the Ablation cell after replacing SPLIT_MANIFEST path with this corrected manifest path.

If you want, I can automatically update the Ablation/Ratio cells to use this corrected manifest path (/kaggle/working/corrected_split_manifest.json).


In [3]:
# ===== CELL A: Ablation Evaluation (epoch_020 -> epoch_100) =====
# Self-contained cell — paste into a new notebook (after runtime GPU selection).
import os, json, time, shutil, glob
from pathlib import Path
import numpy as np
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
from PIL import Image
from torchvision import transforms, models
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, roc_auc_score, roc_curve
from sklearn.metrics import precision_recall_fscore_support, silhouette_score
from sklearn.preprocessing import label_binarize
import joblib
import matplotlib.pyplot as plt
import seaborn as sns

# -------- Config (edit if needed) --------
BACKBONE = "resnet18"
RESOLUTION = 224
BATCH_SIZE = 64
NUM_WORKERS = 2
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
OUT_ROOT = "/kaggle/working/task7_ablation"   # outputs saved here
os.makedirs(OUT_ROOT, exist_ok=True)

# Uploaded locations (from your Kaggle dataset structure)
SPLIT_MANIFEST = "/kaggle/working/corrected_split_manifest.json"   # your split manifest. :contentReference[oaicite:6]{index=6}
ABLATION_BASE = "/kaggle/input/all-files/All Files/simsiam_ablation_epochs_100"  # folder with epoch_* subfolders. :contentReference[oaicite:7]{index=7}

# Classifier list (same as your probe list)
CLASSIFIERS = {
    "LogisticRegression": LogisticRegression(max_iter=2000),
    "SVM_RBF": SVC(kernel="rbf", probability=True),
    "RandomForest": RandomForestClassifier(n_estimators=100),
    "DecisionTree": DecisionTreeClassifier(),
    "MLP": MLPClassifier(hidden_layer_sizes=(512,), max_iter=500)
}

# Use these flags to control heavy operations
DO_FINETUNE = False   # set True only if you want to fine-tune encoder for each checkpoint (very heavy)
RUN_LABEL_EFFICIENCY = True  # compute label-efficiency curves per checkpoint (costly but useful)

# -------- Helpers --------
def build_encoder(backbone="resnet18"):
    if backbone == "resnet18":
        base = models.resnet18(weights=None)
        feat_dim = 512
    elif backbone == "resnet50":
        base = models.resnet50(weights=None)
        feat_dim = 2048
    else:
        raise ValueError("Unsupported backbone")
    modules = list(base.children())[:-1]
    encoder = nn.Sequential(*modules)
    encoder.feat_dim = feat_dim
    return encoder

def try_load_encoder(encoder, path):
    ck = torch.load(path, map_location="cpu")
    # many checkpoint formats: try to find encoder state dict
    if isinstance(ck, dict):
        # common keys
        for key in ["encoder_state_dict", "encoder", "model_state", "model", "state_dict"]:
            if key in ck:
                st = ck[key]
                break
        else:
            st = ck
    else:
        st = ck
    # try direct load; if fails, try mapping prefixes
    try:
        encoder.load_state_dict(st)
        return True
    except Exception:
        mapped = {}
        for k,v in st.items():
            newk = k
            if k.startswith("encoder."):
                newk = k.replace("encoder.", "")
            if k.startswith("module.encoder."):
                newk = k.replace("module.encoder.", "")
            mapped[newk] = v
        try:
            encoder.load_state_dict(mapped)
            return True
        except Exception as e:
            print("Failed to load encoder weights from", path, "error:", e)
            return False

# Dataset wrapper using split_manifest paths
class ManifestDataset(Dataset):
    def __init__(self, paths, labels, transform=None):
        self.paths = paths
        self.labels = labels
        self.transform = transform
    def __len__(self):
        return len(self.paths)
    def __getitem__(self, idx):
        p = self.paths[idx]
        lbl = self.labels[idx]
        img = Image.open(p).convert("RGB")
        if self.transform:
            img = self.transform(img)
        return img, lbl, p

eval_transform = transforms.Compose([
    transforms.Resize(int(RESOLUTION * 1.1)),
    transforms.CenterCrop(RESOLUTION),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485,0.456,0.406], std=[0.229,0.224,0.225])
])

def extract_features(encoder, paths, batch_size=64, workers=2, save_path=None):
    ds = ManifestDataset(paths, [0]*len(paths), transform=eval_transform)
    loader = DataLoader(ds, batch_size=batch_size, shuffle=False, num_workers=workers)
    feats = []
    encoder.eval()
    enc = encoder.to(DEVICE)
    with torch.no_grad():
        for imgs, _, _ in loader:
            imgs = imgs.to(DEVICE)
            h = enc(imgs).view(imgs.size(0), -1).cpu().numpy()
            feats.append(h)
    feats = np.vstack(feats)
    if save_path:
        np.save(save_path, feats)
    return feats

def train_and_eval_probes(train_feats, train_labels, val_feats, val_labels, test_feats, test_labels, out_prefix):
    results = {}
    for name, clf in CLASSIFIERS.items():
        print("Training probe:", name)
        # clone classifier instance to avoid scikit state reuse
        import copy
        clf_local = copy.deepcopy(clf)
        clf_local.fit(train_feats, train_labels)
        # save model
        joblib.dump(clf_local, out_prefix + f"_{name}.joblib")
        # metrics
        y_pred = clf_local.predict(test_feats)
        acc = accuracy_score(test_labels, y_pred)
        prec, rec, f1, supp = precision_recall_fscore_support(test_labels, y_pred, average=None, zero_division=0)
        # per-class auc if predict_proba available
        aucs = {}
        try:
            if hasattr(clf_local, "predict_proba"):
                probs = clf_local.predict_proba(test_feats)
                y_bin = label_binarize(test_labels, classes=list(range(np.max(test_labels)+1)))
                # compute per-class AUC safely
                for i in range(y_bin.shape[1]):
                    try:
                        aucs[i] = float(roc_auc_score(y_bin[:,i], probs[:,i]))
                    except Exception:
                        aucs[i] = None
                macro = roc_auc_score(y_bin, probs, average="macro")
                micro = roc_auc_score(y_bin, probs, average="micro")
            else:
                aucs = None
                macro = None; micro = None
        except Exception as e:
            aucs = None; macro = None; micro = None
        results[name] = {
            "accuracy": float(acc),
            "per_class_prec": prec.tolist(),
            "per_class_rec": rec.tolist(),
            "per_class_f1": f1.tolist(),
            "per_class_support": supp.tolist(),
            "per_class_auc": aucs,
            "macro_auc": float(macro) if macro is not None else None,
            "micro_auc": float(micro) if micro is not None else None
        }
    return results

# -------- Load split manifest (train/val/test lists) --------
with open(SPLIT_MANIFEST, "r") as f:
    split = json.load(f)
train_paths = split["train"]
train_labels = split["train_labels"]
val_paths = split["val"]
val_labels = split["val_labels"]
test_paths = split["test"]
test_labels = split["test_labels"]
classes = split.get("classes", None)
if classes is None:
    classes = [str(i) for i in range(max(train_labels)+1)]

# -------- Find ablation checkpoint subfolders --------
epoch_dirs = sorted(glob.glob(os.path.join(ABLATION_BASE, "epoch_*")))
print("Found ablation epoch dirs:", epoch_dirs)

ablation_summary = []
for ed in epoch_dirs:
    try:
        epoch_name = os.path.basename(ed)
        enc_path = os.path.join(ed, "encoder.pth")
        if not os.path.exists(enc_path):
            print("No encoder.pth in", ed, "skipping")
            continue
        print("Processing", epoch_name)
        out_dir = os.path.join(OUT_ROOT, epoch_name)
        os.makedirs(out_dir, exist_ok=True)
        # Build encoder and load weights
        encoder = build_encoder(BACKBONE)
        ok = try_load_encoder(encoder, enc_path)
        if not ok:
            print("Failed to load encoder for", epoch_name)
            continue

        # Extract features if not cached
        train_feat_path = os.path.join(out_dir, "train_feats.npy")
        val_feat_path = os.path.join(out_dir, "val_feats.npy")
        test_feat_path = os.path.join(out_dir, "test_feats.npy")

        if not (os.path.exists(train_feat_path) and os.path.exists(val_feat_path) and os.path.exists(test_feat_path)):
            print("Extracting features for", epoch_name)
            tr_feats = extract_features(encoder, train_paths, batch_size=BATCH_SIZE, workers=NUM_WORKERS, save_path=train_feat_path)
            v_feats = extract_features(encoder, val_paths, batch_size=BATCH_SIZE, workers=NUM_WORKERS, save_path=val_feat_path)
            te_feats = extract_features(encoder, test_paths, batch_size=BATCH_SIZE, workers=NUM_WORKERS, save_path=test_feat_path)
        else:
            print("Loading cached features for", epoch_name)
            tr_feats = np.load(train_feat_path)
            v_feats = np.load(val_feat_path)
            te_feats = np.load(test_feat_path)

        # compute silhouette on concatenated features (quick)
        try:
            feats_all = np.vstack([tr_feats, v_feats, te_feats])
            lbls_all = np.array(train_labels + val_labels + test_labels)
            sil = silhouette_score(feats_all, lbls_all) if len(np.unique(lbls_all))>1 else None
        except Exception as e:
            print("Silhouette failed:", e)
            sil = None

        # Train probes and get results
        probe_results = train_and_eval_probes(tr_feats, train_labels, v_feats, val_labels, te_feats, test_labels, out_prefix=os.path.join(out_dir, "probe"))

        # Label-efficiency (optional) - train logistic on fractions
        label_eff = {}
        if RUN_LABEL_EFFICIENCY:
            fractions = [0.01, 0.05, 0.10, 0.25, 0.50, 1.0]
            total = tr_feats.shape[0]
            for frac in fractions:
                n = max(1, int(total * frac))
                # simple stratified subsample
                idxs = np.arange(total)
                rng = np.random.RandomState(42)
                # naive selection, better to per-class sample but for brevity:
                sel = rng.choice(idxs, size=n, replace=False)
                clf = LogisticRegression(max_iter=2000)
                clf.fit(tr_feats[sel], np.array(train_labels)[sel])
                pred = clf.predict(te_feats)
                acc = accuracy_score(test_labels, pred)
                label_eff[f"{int(frac*100)}%"] = float(acc)

            # save
            with open(os.path.join(out_dir, "label_efficiency.json"), "w") as f:
                json.dump(label_eff, f, indent=2)

        # Save summary for this epoch
        summary = {
            "epoch_dir": epoch_name,
            "enc_path": enc_path,
            "silhouette": float(sil) if sil is not None else None,
            "probe_results": probe_results,
            "label_efficiency": label_eff
        }
        ablation_summary.append(summary)
        with open(os.path.join(out_dir, "ablation_summary.json"), "w") as f:
            json.dump(summary, f, indent=2)

        # Save confusion matrix plots for each probe
        for name in probe_results.keys():
            # load saved model if exists
            try:
                clf = joblib.load(os.path.join(out_dir, f"probe_{name}.joblib"))
            except Exception:
                clf = None
            if clf is None: continue
            y_pred = clf.predict(te_feats)
            cm = confusion_matrix(test_labels, y_pred)
            plt.figure(figsize=(6,5))
            sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=classes, yticklabels=classes)
            plt.title(f"Confusion Matrix - {epoch_name} - {name}")
            plt.savefig(os.path.join(out_dir, f"confusion_{name}.png"))
            plt.close()

    except Exception as e:
        print("Error processing", ed, e)

# Save aggregated ablation summary
with open(os.path.join(OUT_ROOT, "ablation_results.json"), "w") as f:
    json.dump(ablation_summary, f, indent=2)

# Create CSV summary table
import csv
csv_path = os.path.join(OUT_ROOT, "ablation_table.csv")
with open(csv_path, "w", newline="") as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(["epoch_dir","silhouette","probe_name","accuracy","macro_auc","micro_auc"])
    for s in ablation_summary:
        ed = s["epoch_dir"]
        sil = s["silhouette"]
        for probe_name, pr in s["probe_results"].items():
            writer.writerow([ed, sil, probe_name, pr.get("accuracy", None), pr.get("macro_auc", None), pr.get("micro_auc", None)])

# Zip outputs
zipname = os.path.join("/kaggle/working", "task7_ablation_outputs")
if os.path.exists(zipname + ".zip"):
    os.remove(zipname + ".zip")
shutil.make_archive(base_name=zipname, format="zip", root_dir=OUT_ROOT)
print("Ablation outputs zipped to", zipname + ".zip")
print("Done. Outputs in", OUT_ROOT)


Found ablation epoch dirs: ['/kaggle/input/all-files/All Files/simsiam_ablation_epochs_100/epoch_020', '/kaggle/input/all-files/All Files/simsiam_ablation_epochs_100/epoch_040', '/kaggle/input/all-files/All Files/simsiam_ablation_epochs_100/epoch_060', '/kaggle/input/all-files/All Files/simsiam_ablation_epochs_100/epoch_080', '/kaggle/input/all-files/All Files/simsiam_ablation_epochs_100/epoch_085', '/kaggle/input/all-files/All Files/simsiam_ablation_epochs_100/epoch_090', '/kaggle/input/all-files/All Files/simsiam_ablation_epochs_100/epoch_095', '/kaggle/input/all-files/All Files/simsiam_ablation_epochs_100/epoch_100']
Processing epoch_020
Extracting features for epoch_020
Training probe: LogisticRegression
Training probe: SVM_RBF
Training probe: RandomForest
Training probe: DecisionTree
Training probe: MLP
Processing epoch_040
Extracting features for epoch_040
Training probe: LogisticRegression
Training probe: SVM_RBF
Training probe: RandomForest
Training probe: DecisionTree
Training

In [9]:
# ===== FIXED CELL B (final): Ratio Sweep Evaluation (robust integer split) =====
# Paste into a notebook. This replaces previous CELL B and fixes the "sum > N" error.
import os, json, time, shutil
from pathlib import Path
import numpy as np
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models
from PIL import Image
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, roc_auc_score, confusion_matrix, precision_recall_fscore_support
from sklearn.preprocessing import label_binarize
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
import numpy.random as npr

# -------- Config --------
BACKBONE = "resnet18"
RESOLUTION = 224
BATCH_SIZE = 64
NUM_WORKERS = 2
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
OUT_ROOT = "/kaggle/working/task7_ratios"
os.makedirs(OUT_ROOT, exist_ok=True)

# Files (uploaded)
SPLIT_MANIFEST = "/kaggle/input/simsiam-task4-archive/split_manifest.json"
BASE_FEATURES_DIR = "/kaggle/input/all-files/All Files"
BASE_TRAIN_FEATS = os.path.join(BASE_FEATURES_DIR, "train_feats.npy")
BASE_TRAIN_LABELS = os.path.join(BASE_FEATURES_DIR, "train_labels.npy")
BASE_VAL_FEATS = os.path.join(BASE_FEATURES_DIR, "val_feats.npy")
BASE_VAL_LABELS = os.path.join(BASE_FEATURES_DIR, "val_labels.npy")
BASE_TEST_FEATS = os.path.join(BASE_FEATURES_DIR, "test_feats.npy")
BASE_TEST_LABELS = os.path.join(BASE_FEATURES_DIR, "test_labels.npy")

CLASSIFIERS = {
    "LogisticRegression": LogisticRegression(max_iter=2000),
    "SVM_RBF": SVC(kernel="rbf", probability=True),
    "RandomForest": RandomForestClassifier(n_estimators=100),
    "DecisionTree": DecisionTreeClassifier(),
    "MLP": MLPClassifier(hidden_layer_sizes=(512,), max_iter=500)
}

# -------- Helpers (feature extraction kept for fallback) --------
class ImageDataset(Dataset):
    def __init__(self, paths, labels, transform=None):
        self.paths = paths
        self.labels = labels
        self.transform = transform
    def __len__(self):
        return len(self.paths)
    def __getitem__(self, idx):
        p = self.paths[idx]
        lbl = self.labels[idx]
        img = Image.open(p).convert("RGB")
        if self.transform:
            img = self.transform(img)
        return img, lbl, p

eval_transform = transforms.Compose([
    transforms.Resize(int(RESOLUTION * 1.1)),
    transforms.CenterCrop(RESOLUTION),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485,0.456,0.406], std=[0.229,0.224,0.225])
])

def build_encoder(backbone="resnet18"):
    if backbone == "resnet18":
        base = models.resnet18(weights=None)
        feat_dim = 512
    elif backbone == "resnet50":
        base = models.resnet50(weights=None)
        feat_dim = 2048
    else:
        raise ValueError("Unsupported backbone")
    modules = list(base.children())[:-1]
    encoder = nn.Sequential(*modules)
    encoder.feat_dim = feat_dim
    return encoder

def load_encoder_weights(encoder, ckpt_path):
    ck = torch.load(ckpt_path, map_location="cpu")
    if isinstance(ck, dict):
        for key in ["encoder_state_dict","encoder","model_state","state_dict","model"]:
            if key in ck:
                state = ck[key]; break
        else:
            state = ck
    else:
        state = ck
    try:
        encoder.load_state_dict(state)
        return True
    except Exception:
        mapped = {}
        for k,v in state.items():
            newk = k
            if k.startswith("encoder."): newk = k.replace("encoder.","")
            if k.startswith("module.encoder."): newk = k.replace("module.encoder.","")
            mapped[newk] = v
        try:
            encoder.load_state_dict(mapped)
            return True
        except Exception as e:
            print("Failed to load encoder:", e)
            return False

def extract_features_from_paths(encoder, paths, batch_size=64, workers=2, save_path=None):
    ds = ImageDataset(paths, [0]*len(paths), transform=eval_transform)
    loader = DataLoader(ds, batch_size=batch_size, shuffle=False, num_workers=workers)
    feats = []
    encoder.eval()
    enc = encoder.to(DEVICE)
    with torch.no_grad():
        for imgs, _, _ in loader:
            imgs = imgs.to(DEVICE)
            h = enc(imgs).view(imgs.size(0), -1).cpu().numpy()
            feats.append(h)
    feats = np.vstack(feats)
    if save_path: np.save(save_path, feats)
    return feats

# Robust stratified subsampler for label-efficiency
def stratified_subsample_indices(y, k, random_state=42):
    y = np.array(y)
    n = len(y)
    unique, counts = np.unique(y, return_counts=True)
    n_classes = len(unique)
    if n_classes < 2:
        return None
    if k < 2:
        return None
    rng = npr.RandomState(random_state)
    try:
        # attempt stratified train_test_split with train_size=k
        train_idx, _ = train_test_split(np.arange(n), train_size=k, stratify=y, random_state=random_state)
        if len(np.unique(y[train_idx])) >= 2:
            return train_idx
    except Exception:
        pass
    # fallback proportional per-class sampling
    prop = counts / counts.sum()
    desired = np.floor(prop * k).astype(int)
    for i, uc in enumerate(unique):
        if desired[i] == 0 and k >= n_classes and counts[i] > 0:
            desired[i] = 1
    rem = int(k - desired.sum())
    if rem > 0:
        leftover = (prop * k) - desired
        order = np.argsort(-leftover)
        for idx in order:
            if rem <= 0:
                break
            desired[idx] += 1
            rem -= 1
    selected = []
    for cls_idx, cls in enumerate(unique):
        cls_inds = np.where(y == cls)[0]
        cnt = desired[cls_idx]
        if cnt <= 0:
            continue
        if cnt > len(cls_inds):
            cnt = len(cls_inds)
        chosen = rng.choice(cls_inds, size=cnt, replace=False)
        selected.extend(chosen.tolist())
    selected = np.array(selected, dtype=int)
    if len(selected) < 2 or len(np.unique(y[selected])) < 2:
        return None
    if len(selected) > k:
        selected = selected[:k]
    return selected

# -------- Read master manifest to get full file list and labels --------
with open(SPLIT_MANIFEST, "r") as f:
    sm = json.load(f)

# Build feats_all & labels_all using available cached files; fallback to extracting from images if needed
feats_all = None
labels_all = None

have_train = os.path.exists(BASE_TRAIN_FEATS) and os.path.exists(BASE_TRAIN_LABELS)
have_val = os.path.exists(BASE_VAL_FEATS) and os.path.exists(BASE_VAL_LABELS)
have_test = os.path.exists(BASE_TEST_FEATS) and os.path.exists(BASE_TEST_LABELS)

if have_train and have_val and have_test:
    tr = np.load(BASE_TRAIN_FEATS); tr_lbl = np.load(BASE_TRAIN_LABELS)
    v = np.load(BASE_VAL_FEATS); v_lbl = np.load(BASE_VAL_LABELS)
    te = np.load(BASE_TEST_FEATS); te_lbl = np.load(BASE_TEST_LABELS)
    feats_all = np.vstack([tr, v, te])
    labels_all = np.hstack([tr_lbl, v_lbl, te_lbl]).astype(int)
elif have_train and have_test:
    tr = np.load(BASE_TRAIN_FEATS); tr_lbl = np.load(BASE_TRAIN_LABELS)
    te = np.load(BASE_TEST_FEATS); te_lbl = np.load(BASE_TEST_LABELS)
    feats_all = np.vstack([tr, te])
    labels_all = np.hstack([tr_lbl, te_lbl]).astype(int)
else:
    # fallback: build list of image paths from manifest and extract features using encoder
    print("No adequate cached features found — will extract features from images. This will take longer.")
    all_paths = sm["train"] + sm["val"] + sm["test"]
    all_labels = sm["train_labels"] + sm["val_labels"] + sm["test_labels"]
    BASE_ENCODER_CKPT = "/kaggle/input/simsiam-task4-archive/simsiam_encoder.pth"
    encoder = build_encoder(BACKBONE)
    if not load_encoder_weights(encoder, BASE_ENCODER_CKPT):
        raise RuntimeError("Cannot load encoder weights from " + BASE_ENCODER_CKPT)
    feats_all = extract_features_from_paths(encoder, all_paths, batch_size=BATCH_SIZE, workers=NUM_WORKERS, save_path=os.path.join(OUT_ROOT,"feats_all.npy"))
    labels_all = np.array(all_labels).astype(int)
    np.save(os.path.join(OUT_ROOT,"labels_all.npy"), labels_all)

# Final check
if feats_all is None or labels_all is None:
    raise RuntimeError("Failed to prepare features and labels. Check feature files or manifest.")

N = feats_all.shape[0]
print("Total samples used for ratio sweep:", N)

# Save composed arrays for reproducibility
np.save(os.path.join(OUT_ROOT,"feats_all.npy"), feats_all)
np.save(os.path.join(OUT_ROOT,"labels_all.npy"), labels_all)

# -------- Ratio sweep: loop through ratios (use integer n_train to avoid rounding errors) --------
ratios = [0.9,0.8,0.7,0.6,0.5,0.4,0.3,0.2,0.1]  # train fraction (train:test)
ratio_results = []

for train_frac in ratios:
    # compute integer train size robustly
    n_train = int(round(train_frac * N))
    # clamp to [1, N-1]
    if n_train >= N:
        n_train = N - 1
    if n_train < 1:
        n_train = 1
    n_test = N - n_train
    print(f"Running ratio: train {int(train_frac*100)}% -> n_train={n_train}, n_test={n_test}")
    # Use stratified train_test_split with integer train_size
    idxs = np.arange(N)
    try:
        train_idx, test_idx = train_test_split(idxs, train_size=n_train, stratify=labels_all, random_state=42)
    except Exception as e:
        # fallback: use random split (should be rare)
        print("Stratified split failed, falling back to random split:", e)
        train_idx, test_idx = train_test_split(idxs, train_size=n_train, random_state=42)
    X_train = feats_all[train_idx]; y_train = labels_all[train_idx]
    X_test = feats_all[test_idx]; y_test = labels_all[test_idx]
    # carve out val 10% of train (integer)
    val_portion = max(1, int(round(0.10 * len(X_train))))
    # pick val indices via stratify if possible
    try:
        tr_sub_idx, val_sub_idx = train_test_split(np.arange(len(X_train)), test_size=val_portion, stratify=y_train, random_state=42)
    except Exception:
        tr_sub_idx, val_sub_idx = train_test_split(np.arange(len(X_train)), test_size=val_portion, random_state=42)
    X_tr = X_train[tr_sub_idx]; y_tr = y_train[tr_sub_idx]
    X_val = X_train[val_sub_idx]; y_val = y_train[val_sub_idx]

    # Train classifiers on X_tr
    probes_res = {}
    for name, clf in CLASSIFIERS.items():
        import copy
        clf_local = copy.deepcopy(clf)
        if len(np.unique(y_tr)) < 2:
            probes_res[name] = {"accuracy": None, "macro_auc": None, "micro_auc": None, "per_class_f1": None, "skipped_reason": "only_one_class_in_train"}
            continue
        clf_local.fit(X_tr, y_tr)
        joblib.dump(clf_local, os.path.join(OUT_ROOT, f"{int(train_frac*100)}pct_{name}.joblib"))
        y_pred = clf_local.predict(X_test)
        acc = accuracy_score(y_test, y_pred)
        prec, rec, f1, sup = precision_recall_fscore_support(y_test, y_pred, average=None, zero_division=0)
        # AUC if probabilities exist
        try:
            if hasattr(clf_local, "predict_proba"):
                probs = clf_local.predict_proba(X_test)
                y_bin = label_binarize(y_test, classes=list(range(np.max(labels_all)+1)))
                macro = roc_auc_score(y_bin, probs, average="macro")
                micro = roc_auc_score(y_bin, probs, average="micro")
            else:
                macro = None; micro = None
        except Exception:
            macro = None; micro = None
        probes_res[name] = {"accuracy": float(acc), "macro_auc": macro, "micro_auc": micro, "per_class_f1": f1.tolist()}

    # label-efficiency robust (LogReg only)
    label_eff = {}
    total = X_tr.shape[0]
    for frac in [0.01,0.05,0.10,0.25,0.50,1.0]:
        k = max(1, int(total * frac))
        sel = stratified_subsample_indices(y_tr, k, random_state=42)
        if sel is None:
            label_eff[f"{int(frac*100)}%"] = {"accuracy": None, "skipped": True, "reason": "insufficient_class_diversity_for_k"}
            continue
        clf = LogisticRegression(max_iter=2000)
        clf.fit(X_tr[sel], y_tr[sel])
        score = accuracy_score(y_test, clf.predict(X_test))
        label_eff[f"{int(frac*100)}%"] = {"accuracy": float(score), "skipped": False, "k_used": int(len(sel))}

    ratio_results.append({
        "train_frac": train_frac,
        "n_train": int(n_train),
        "n_val": int(len(X_val)),
        "n_test": int(n_test),
        "probes": probes_res,
        "label_efficiency": label_eff
    })

    # Save intermediate results
    with open(os.path.join(OUT_ROOT, f"ratio_{int(train_frac*100)}.json"), "w") as f:
        json.dump(ratio_results[-1], f, indent=2)

# Save full results and CSV table
with open(os.path.join(OUT_ROOT, "ratio_results.json"), "w") as f:
    json.dump(ratio_results, f, indent=2)

import csv
csv_path = os.path.join(OUT_ROOT, "ratio_table.csv")
with open(csv_path, "w", newline="") as csvfile:
    writer = csv.writer(csvfile)
    # header
    header = ["train_pct", "n_train", "n_val", "n_test"]
    probe_names = list(CLASSIFIERS.keys())
    header += [pn + "_acc" for pn in probe_names]
    writer.writerow(header)
    for rr in ratio_results:
        row = [int(rr["train_frac"]*100), rr["n_train"], rr["n_val"], rr["n_test"]]
        for pn in probe_names:
            acc_val = rr["probes"][pn]["accuracy"] if rr["probes"][pn].get("accuracy") is not None else ""
            row.append(acc_val)
        writer.writerow(row)

# Zip outputs
zipname = os.path.join("/kaggle/working", "task7_ratio_outputs")
if os.path.exists(zipname + ".zip"):
    os.remove(zipname + ".zip")
shutil.make_archive(base_name=zipname, format="zip", root_dir=OUT_ROOT)
print("Ratio sweep outputs zipped to", zipname + ".zip")
print("Done. Outputs in", OUT_ROOT)


Total samples used for ratio sweep: 1800
Running ratio: train 90% -> n_train=1620, n_test=180
Running ratio: train 80% -> n_train=1440, n_test=360
Running ratio: train 70% -> n_train=1260, n_test=540
Running ratio: train 60% -> n_train=1080, n_test=720
Running ratio: train 50% -> n_train=900, n_test=900
Running ratio: train 40% -> n_train=720, n_test=1080
Running ratio: train 30% -> n_train=540, n_test=1260
Running ratio: train 20% -> n_train=360, n_test=1440
Running ratio: train 10% -> n_train=180, n_test=1620
Ratio sweep outputs zipped to /kaggle/working/task7_ratio_outputs.zip
Done. Outputs in /kaggle/working/task7_ratios


In [11]:
# ===== FIXED CELL C: Statistical Tests (produces stats_report.json & significance_table.txt) =====
# Self-contained. Run after ablation + ratio cells produced outputs.
import os, json, itertools, math, warnings
from pathlib import Path
import numpy as np
from scipy import stats

# Try optional libs
try:
    import scikit_posthocs as sp
    HAVE_SPPH = True
except Exception:
    HAVE_SPPH = False

try:
    from statsmodels.stats.contingency_tables import mcnemar
    HAVE_STATSMODELS = True
except Exception:
    HAVE_STATSMODELS = False

# Paths (edit if needed)
ABLATION_DIR = "/kaggle/working/task7_ablation"   # outputs from CELL A
RATIO_DIR = "/kaggle/working/task7_ratios"       # outputs from CELL B
OUT_DIR = "/kaggle/working/task7_stats"
os.makedirs(OUT_DIR, exist_ok=True)

# Initialize report with consistent structure
stats_report = {
    "friedman_ratio": None,
    "posthoc_ratio": None,
    "pairwise_tests_ratio": [],   # standardized entries: {"pair":(...), "test": "wilcoxon"|"ttest", "stat":..., "pvalue":...}
    "ablation_mcnemar": [],
    "ablation_pairwise_wilcoxon": []
}

# Helper to safely append a pairwise test record
def append_pairwise(pair, test_name, stat_val, pval):
    stats_report["pairwise_tests_ratio"].append({
        "pair": pair,
        "test": test_name,
        "stat": None if stat_val is None else float(stat_val),
        "pvalue": None if pval is None else float(pval)
    })

# --------- Load ratio results and prepare matrix for Friedman test ---------
ratio_json = os.path.join(RATIO_DIR, "ratio_results.json")
if not os.path.exists(ratio_json):
    print("WARNING: ratio_results.json not found at", ratio_json)
    ratio_results = None
else:
    with open(ratio_json,"r") as f:
        ratio_results = json.load(f)

if ratio_results:
    # build matrix: rows = ratios, cols = probe names (consistent order)
    probe_names = list(ratio_results[0]["probes"].keys())
    ratios = [r.get("train_frac", r.get("train_pct", None)) for r in ratio_results]
    # build accuracy matrix, ensuring numeric or nan
    acc_matrix = np.array([[ (r["probes"][p].get("accuracy") if isinstance(r["probes"][p].get("accuracy"), (int,float)) else np.nan)
                              for p in probe_names] for r in ratio_results], dtype=float)
    # drop columns (probes) that are all NaN
    valid_cols = ~np.all(np.isnan(acc_matrix), axis=0)
    probe_names_valid = [pn for pn,ok in zip(probe_names, valid_cols) if ok]
    acc_matrix_valid = acc_matrix[:, valid_cols]

    # Friedman test across probes using ratios as blocks (requires no NaNs)
    try:
        if np.isnan(acc_matrix_valid).any():
            # if there are NaNs, remove rows that contain any NaN (blocks with missing probe)
            good_rows = ~np.isnan(acc_matrix_valid).any(axis=1)
            arr = acc_matrix_valid[good_rows]
            if arr.shape[0] >= 2 and arr.shape[1] >= 2:
                fried_stat, fried_p = stats.friedmanchisquare(*[arr[:,j] for j in range(arr.shape[1])])
            else:
                fried_stat, fried_p = None, None
        else:
            fried_stat, fried_p = stats.friedmanchisquare(*[acc_matrix_valid[:,j] for j in range(acc_matrix_valid.shape[1])])
        stats_report["friedman_ratio"] = {"statistic": None if fried_stat is None else float(fried_stat),
                                         "pvalue": None if fried_p is None else float(fried_p),
                                         "n_blocks": int(acc_matrix_valid.shape[0]),
                                         "n_probes": int(acc_matrix_valid.shape[1]),
                                         "probe_names": probe_names_valid}
        print("Friedman test on ratio sweep probes:", stats_report["friedman_ratio"])
    except Exception as e:
        print("Friedman test failed:", e)
        stats_report["friedman_ratio"] = {"error": str(e)}

    # Post-hoc: try Nemenyi if scikit-posthocs available
    if HAVE_SPPH and acc_matrix_valid.shape[0] >= 2 and acc_matrix_valid.shape[1] >= 2:
        try:
            pvals = sp.posthoc_nemenyi_friedman(acc_matrix_valid)
            stats_report["posthoc_ratio"] = {"method": "nemenyi", "pvals_matrix": pvals.values.tolist(), "probe_names": probe_names_valid}
            print("Nemenyi post-hoc completed.")
        except Exception as e:
            print("Nemenyi failed:", e)
            stats_report["posthoc_ratio"] = {"error": str(e)}
    else:
        # fallback: pairwise Wilcoxon signed-rank tests with Holm correction
        from itertools import combinations
        p_list = []
        pairs = []
        for i,j in combinations(range(acc_matrix_valid.shape[1]),2):
            a = acc_matrix_valid[:, i]
            b = acc_matrix_valid[:, j]
            # drop rows where either is nan
            mask = ~np.isnan(a) & ~np.isnan(b)
            a2 = a[mask]; b2 = b[mask]
            if len(a2) < 2:
                # not enough paired observations — skip
                append_pairwise((probe_names_valid[i], probe_names_valid[j]), "insufficient_data", None, None)
                p_list.append(np.nan); pairs.append((probe_names_valid[i], probe_names_valid[j], None))
                continue
            try:
                with warnings.catch_warnings():
                    warnings.filterwarnings("ignore", category=RuntimeWarning)
                    stat, p = stats.wilcoxon(a2, b2)
                append_pairwise((probe_names_valid[i], probe_names_valid[j]), "wilcoxon", stat, p)
                p_list.append(p); pairs.append((probe_names_valid[i], probe_names_valid[j], float(p)))
            except Exception:
                # fallback to paired t-test
                try:
                    stat_t, p_t = stats.ttest_rel(a2, b2)
                    append_pairwise((probe_names_valid[i], probe_names_valid[j]), "ttest_rel", stat_t, p_t)
                    p_list.append(p_t); pairs.append((probe_names_valid[i], probe_names_valid[j], float(p_t)))
                except Exception:
                    append_pairwise((probe_names_valid[i], probe_names_valid[j]), "error", None, None)
                    p_list.append(np.nan); pairs.append((probe_names_valid[i], probe_names_valid[j], None))

        # Holm-Bonferroni correction on valid p-values
        valid_p_idx = [idx for idx,p in enumerate(p_list) if not (p is None or np.isnan(p))]
        m = len(valid_p_idx)
        holm_results = []
        if m > 0:
            sorted_idx = sorted(valid_p_idx, key=lambda k: p_list[k])
            for rank, idx in enumerate(sorted_idx, start=1):
                p = p_list[idx]
                adj_thresh = 0.05/(m - rank + 1)
                pair = pairs[idx]
                holm_results.append({"pair": (pair[0], pair[1]), "raw_p": float(p), "adj_threshold": adj_thresh, "significant": float(p) <= adj_thresh})
        stats_report["posthoc_ratio"] = {"method": "wilcoxon_holm", "results": holm_results}

    # Also run pairwise paired t-tests (for reference) and store consistently
    for i,j in combinations(range(acc_matrix_valid.shape[1]),2):
        a = acc_matrix_valid[:, i]; b = acc_matrix_valid[:, j]
        mask = ~np.isnan(a) & ~np.isnan(b)
        a2 = a[mask]; b2 = b[mask]
        if len(a2) < 2:
            append_pairwise((probe_names_valid[i], probe_names_valid[j]), "ttest_rel_insufficient", None, None)
            continue
        tstat, p = stats.ttest_rel(a2, b2)
        append_pairwise((probe_names_valid[i], probe_names_valid[j]), "ttest_rel", tstat, p)

# --------- Ablation: pairwise McNemar (if predictions available) & Wilcoxon on probe accuracies across epochs ---------
ablation_json = os.path.join(ABLATION_DIR, "ablation_results.json")
if not os.path.exists(ablation_json):
    print("WARNING: ablation_results.json not found at", ablation_json)
    ablation_results = None
else:
    with open(ablation_json,"r") as f:
        ablation_results = json.load(f)

if ablation_results:
    # Extract per-epoch accuracies for each probe
    epoch_names = [s["epoch_dir"] for s in ablation_results]
    probe_accs = {}
    for s in ablation_results:
        for probe_name, pr in s["probe_results"].items():
            probe_accs.setdefault(probe_name, []).append(pr.get("accuracy", None))
    # pairwise Wilcoxon across epochs per probe
    from itertools import combinations
    for probe_name, accs in probe_accs.items():
        pairwise = []
        # compare every epoch pair
        for i,j in combinations(range(len(accs)),2):
            a = accs[i]; b = accs[j]
            if a is None or b is None:
                pairwise.append({"epoch_pair": (epoch_names[i], epoch_names[j]), "stat": None, "pvalue": None})
                continue
            try:
                with warnings.catch_warnings():
                    warnings.filterwarnings("ignore", category=RuntimeWarning)
                    stat, p = stats.wilcoxon([a,b]) if (a==b) else stats.wilcoxon([a,b])
                pairwise.append({"epoch_pair": (epoch_names[i], epoch_names[j]), "stat": None if stat is None else float(stat), "pvalue": None if p is None else float(p)})
            except Exception:
                try:
                    stat_t, p_t = stats.ttest_rel([a],[b])
                    pairwise.append({"epoch_pair": (epoch_names[i], epoch_names[j]), "stat": None if stat_t is None else float(stat_t), "pvalue": None if p_t is None else float(p_t)})
                except Exception:
                    pairwise.append({"epoch_pair": (epoch_names[i], epoch_names[j]), "stat": None, "pvalue": None})
        stats_report["ablation_pairwise_wilcoxon"].append({"probe": probe_name, "pairwise": pairwise})

    # McNemar pairwise if per-epoch probe joblibs and test features/labels exist
    global_test_labels = "/kaggle/input/all-files/All Files/test_labels.npy"
    for s in ablation_results:
        epoch_dir = s["epoch_dir"]
        epoch_path = os.path.join(ABLATION_DIR, epoch_dir)
        test_feats_path = os.path.join(epoch_path, "test_feats.npy")
        if os.path.exists(test_feats_path) and os.path.exists(global_test_labels):
            y_true = np.load(global_test_labels)
            models_found = []
            for model_name in probe_accs.keys():
                model_file = os.path.join(epoch_path, f"probe_{model_name}.joblib")
                if os.path.exists(model_file):
                    models_found.append((model_name, model_file))
            for (m1, f1), (m2, f2) in combinations(models_found, 2):
                try:
                    import joblib
                    clf1 = joblib.load(f1)
                    clf2 = joblib.load(f2)
                    X_test = np.load(test_feats_path)
                    y1 = clf1.predict(X_test)
                    y2 = clf2.predict(X_test)
                    b01 = int(((y1==y_true) & (y2!=y_true)).sum())
                    b10 = int(((y1!=y_true) & (y2==y_true)).sum())
                    if HAVE_STATSMODELS:
                        table = [[int(((y1==y_true)&(y2==y_true)).sum()), b01],
                                 [b10, int(((y1!=y_true)&(y2!=y_true)).sum())]]
                        res = mcnemar(table, exact=False)
                        pval = float(res.pvalue)
                        stats_report["ablation_mcnemar"].append({"epoch": epoch_dir, "model_pair": (m1,m2), "b01": b01, "b10": b10, "pvalue": pval})
                    else:
                        n = b01 + b10
                        if n == 0:
                            pval = 1.0
                        else:
                            pval = 2.0 * min(stats.binom.cdf(min(b01,b10), n, 0.5), 1 - stats.binom.cdf(max(b01,b10)-1, n, 0.5))
                        stats_report["ablation_mcnemar"].append({"epoch": epoch_dir, "model_pair": (m1,m2), "b01": b01, "b10": b10, "pvalue": float(pval)})
                except Exception as e:
                    print("McNemar pairwise failed for", epoch_dir, m1, m2, e)
                    continue

# Save stats report
with open(os.path.join(OUT_DIR, "stats_report.json"), "w") as f:
    json.dump(stats_report, f, indent=2)

# Also create a human-readable significance table text
lines = []
lines.append("STATISTICAL TESTS SUMMARY\n")
if stats_report.get("friedman_ratio"):
    fr = stats_report["friedman_ratio"]
    lines.append(f"Friedman (ratio sweep) statistic={fr.get('statistic')} p={fr.get('pvalue')} probes={fr.get('probe_names')}\n")
if stats_report.get("posthoc_ratio"):
    ph = stats_report["posthoc_ratio"]
    lines.append(f"Post-hoc method: {ph.get('method')}\n")
    if ph.get("method") == "wilcoxon_holm":
        for item in ph.get("results", []):
            lines.append(f"Pair {item['pair'][0]} vs {item['pair'][1]} raw_p={item['raw_p']:.4f} adj_thresh={item['adj_threshold']:.4f} significant={item['significant']}\n")
    elif ph.get("method") == "nemenyi":
        lines.append("Nemenyi p-value matrix (rows/cols = probes):\n")
        for row in ph.get("pvals_matrix", []):
            lines.append(", ".join([f"{x:.4f}" for x in row]) + "\n")

# pairwise tests (robust printing)
if stats_report.get("pairwise_tests_ratio"):
    lines.append("\nPairwise tests (ratio sweep):\n")
    for item in stats_report["pairwise_tests_ratio"]:
        pair = item.get("pair")
        test = item.get("test")
        stat = item.get("stat")
        p = item.get("pvalue")
        lines.append(f"{pair[0]} vs {pair[1]}  test={test}  stat={stat}  p={p}\n")

if stats_report.get("ablation_mcnemar"):
    lines.append("\nAblation McNemar pairwise results:\n")
    for item in stats_report["ablation_mcnemar"]:
        lines.append(f"Epoch {item['epoch']} pair {item['model_pair'][0]} vs {item['model_pair'][1]} b01={item['b01']} b10={item['b10']} p={item['pvalue']:.6f}\n")

if stats_report.get("ablation_pairwise_wilcoxon"):
    lines.append("\nAblation pairwise Wilcoxon (per-probe):\n")
    for item in stats_report["ablation_pairwise_wilcoxon"]:
        lines.append(f"Probe: {item['probe']}\n")
        for p in item["pairwise"]:
            lines.append(f"  {p['epoch_pair'][0]} vs {p['epoch_pair'][1]} p={p.get('pvalue')}\n")

with open(os.path.join(OUT_DIR, "significance_table.txt"), "w") as f:
    f.writelines(lines)

print("Stats saved to", os.path.join(OUT_DIR, "stats_report.json"))
print("Human-readable table saved to", os.path.join(OUT_DIR, "significance_table.txt"))


Friedman test on ratio sweep probes: {'statistic': 31.55555555555557, 'pvalue': 2.3579392529254347e-06, 'n_blocks': 9, 'n_probes': 5, 'probe_names': ['LogisticRegression', 'SVM_RBF', 'RandomForest', 'DecisionTree', 'MLP']}
Stats saved to /kaggle/working/task7_stats/stats_report.json
Human-readable table saved to /kaggle/working/task7_stats/significance_table.txt


In [12]:
# ===== CELL D: Zip all Task 7 outputs into task7_results.zip =====
import os, shutil

OUT_ZIP = "/kaggle/working/task7_results"
# directories to include
dirs = [
    "/kaggle/working/task7_ablation",
    "/kaggle/working/task7_ratios",
    "/kaggle/working/task7_stats"
]
# create combined folder if you want; but we'll directly archive each dir into one zip root
# remove existing zip
if os.path.exists(OUT_ZIP + ".zip"):
    os.remove(OUT_ZIP + ".zip")

# create a temporary folder to aggregate (safe)
tmp_agg = "/kaggle/working/task7_aggregate_tmp"
if os.path.exists(tmp_agg):
    shutil.rmtree(tmp_agg)
os.makedirs(tmp_agg, exist_ok=True)

for d in dirs:
    if os.path.exists(d):
        # copy into tmp_agg preserving folder name
        folder_name = os.path.basename(d)
        dst = os.path.join(tmp_agg, folder_name)
        shutil.copytree(d, dst)
    else:
        print("Warning: directory not found, skipping:", d)

# create zip
shutil.make_archive(base_name=OUT_ZIP, format="zip", root_dir=tmp_agg)
print("Created zip:", OUT_ZIP + ".zip")
# cleanup tmp
# shutil.rmtree(tmp_agg)  # uncomment to remove tmp after creation
print("All Task 7 outputs archived.")


Created zip: /kaggle/working/task7_results.zip
All Task 7 outputs archived.
