In [1]:
!pip uninstall -y numpy
!pip install "numpy<2"
!pip install --upgrade pip setuptools wheel

# Optional: install CPU torch & torchvision (Kaggle often has torch; this ensures CPU wheel)
!pip install torch --extra-index-url https://download.pytorch.org/whl/cpu
!pip install torchvision

Found existing installation: numpy 1.26.4
Uninstalling numpy-1.26.4:
  Successfully uninstalled numpy-1.26.4
Collecting numpy<2
  Downloading numpy-1.26.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (61 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.0/61.0 kB[0m [31m3.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading numpy-1.26.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (18.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m18.3/18.3 MB[0m [31m71.9 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[?25hInstalling collected packages: numpy
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
bigframes 2.12.0 requires google-cloud-bigquery-storage<3.0.0,>=2.30.0, which is not installed.
datasets 4.4.1 requires pyarrow>=21.0.0, but you have pyarrow 19.0.1 which is incompatible.


In [3]:
# CELL 0 — copy first non-empty /kaggle/input dataset into writable /kaggle/working/dataset_copy
import shutil, os
from pathlib import Path

INPUT_ROOT = Path("/kaggle/input")
WORK_COPY = Path("/kaggle/working/dataset_copy").resolve()

# find first non-empty input subfolder
cand = None
for p in sorted(INPUT_ROOT.iterdir()):
    try:
        if p.is_dir() and any(p.rglob("*")):
            cand = p
            break
    except Exception:
        continue

if cand is None:
    print("No dataset found under /kaggle/input. If your data is already in /kaggle/working, you can skip copying.")
else:
    print("Found input folder to copy:", cand)
    # prefer nested classification-like folder if obvious
    nested = None
    for sub in cand.rglob("*"):
        try:
            if sub.is_dir() and any(x.name.lower() in ("train","valid","val","classification_dataset") for x in sub.iterdir() if x.is_dir()):
                nested = sub
                break
        except Exception:
            continue
    src = nested if nested is not None else cand
    print("Using source folder:", src)
    if WORK_COPY.exists():
        print("Destination already exists:", WORK_COPY, "- skipping copy (delete to force refresh).")
    else:
        print("Copying to writable working folder:", WORK_COPY)
        shutil.copytree(src, WORK_COPY, dirs_exist_ok=True)
        (WORK_COPY / ".copied_from").write_text(str(src))
        print("Copy complete. WORK_COPY contents top-level:", list(WORK_COPY.iterdir())[:20])


Found input folder to copy: /kaggle/input/classification-zip
Using source folder: /kaggle/input/classification-zip/classification_dataset
Destination already exists: /kaggle/working/dataset_copy - skipping copy (delete to force refresh).


In [4]:
# CELL 1 — dataset check
from pathlib import Path
import os

# Use the writable copy created by CELL 0
WORK_ROOT = Path("/kaggle/working/dataset_copy/classification_dataset")
# Common case: dataset copied directly under /kaggle/working/dataset_copy (no extra nesting)
if not WORK_ROOT.exists():
    # try fallback locations under dataset_copy
    alt1 = Path("/kaggle/working/dataset_copy")
    # prefer a folder containing 'train' and 'valid' inside dataset_copy
    if alt1.exists():
        for p in alt1.iterdir():
            if p.is_dir() and (p / "train").exists() and (p / "valid").exists():
                WORK_ROOT = p
                break
    # final fallback: if dataset_copy itself contains train/valid, use it
    if (alt1 / "train").exists() and (alt1 / "valid").exists():
        WORK_ROOT = alt1

ARTIFACT_DIR = Path("/kaggle/working/artifacts")
ARTIFACT_DIR.mkdir(parents=True, exist_ok=True)
CHECKPOINT = ARTIFACT_DIR / "classification_model.pt"  # existing model you trained earlier

print("WORK_ROOT:", WORK_ROOT)
assert WORK_ROOT.exists(), f"WORK_ROOT not found: {WORK_ROOT}. If your dataset is under a different folder, inspect /kaggle/working/dataset_copy."

for p in sorted(WORK_ROOT.iterdir()):
    print(p.name, "(dir)" if p.is_dir() else "")

# check train/valid presence
train_dir = WORK_ROOT / "train"
val_dir = WORK_ROOT / "valid"
if not train_dir.exists():
    # try 'train_images' or fallback to using WORK_ROOT itself
    for alt in ["train_images","training"]:
        if (WORK_ROOT/alt).exists():
            train_dir = WORK_ROOT/alt
            break

print("train_dir:", train_dir, "exists:", train_dir.exists())
print("val_dir:", val_dir, "exists:", val_dir.exists())


WORK_ROOT: /kaggle/working/dataset_copy
.copied_from 
test (dir)
train (dir)
valid (dir)
train_dir: /kaggle/working/dataset_copy/train exists: True
val_dir: /kaggle/working/dataset_copy/valid exists: True


In [5]:
# CELL 2 — dataloaders and classes
import torchvision.transforms as T
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader, random_split
from pathlib import Path
import torch

WORK_ROOT = Path("/kaggle/working/dataset_copy/classification_dataset")  # canonical dataset copy root
# fallback: if above not right, try top-level copy
if not WORK_ROOT.exists():
    WORK_ROOT = Path("/kaggle/working/dataset_copy")

train_dir = WORK_ROOT / "train"
val_dir = WORK_ROOT / "valid"

# transforms (simple)
train_tf = T.Compose([T.Resize((224,224)), T.RandomHorizontalFlip(), T.ToTensor()])
val_tf   = T.Compose([T.Resize((224,224)), T.ToTensor()])

# Use ImageFolder (assumes folder-per-class under train/ and valid/)
train_ds = ImageFolder(str(train_dir), transform=train_tf)
val_ds = ImageFolder(str(val_dir), transform=val_tf)

print("Detected classes:", train_ds.classes)
print("Num classes:", len(train_ds.classes))
print("Train samples:", len(train_ds), "Val samples:", len(val_ds))

# Dataloaders
train_loader = DataLoader(train_ds, batch_size=8, shuffle=True, num_workers=0)
val_loader   = DataLoader(val_ds, batch_size=16, shuffle=False, num_workers=0)

# Save class names to artifact for reproducibility
import json
ARTIFACT_DIR = Path("/kaggle/working/artifacts"); ARTIFACT_DIR.mkdir(parents=True, exist_ok=True)
with open(ARTIFACT_DIR/"class_names.json","w",encoding="utf-8") as f:
    json.dump(train_ds.classes, f)
print("Saved /kaggle/working/artifacts/class_names.json")


Detected classes: ['bird', 'drone']
Num classes: 2
Train samples: 2662 Val samples: 442
Saved /kaggle/working/artifacts/class_names.json


In [6]:
# CELL 3 — model construction & robust checkpoint load
import torch
from torchvision import models
from pathlib import Path
import os

ARTIFACT_DIR = Path("/kaggle/working/artifacts")
ARTIFACT_DIR.mkdir(parents=True, exist_ok=True)
CHECKPOINT = ARTIFACT_DIR / "classification_model.pt"  # existing checkpoint (may have wrong fc size)
device = "cpu"

# number of classes detected from previous cell
import json
class_names = json.load(open(ARTIFACT_DIR/"class_names.json","r",encoding="utf-8"))
num_classes = len(class_names)
print("num_classes:", num_classes, "class_names:", class_names)

# build fresh model
model = models.resnet18(weights=None)
num_ftrs = model.fc.in_features

# Attempt to load checkpoint state dict robustly
def extract_state_dict(maybe_dict):
    """If checkpoint is wrapped, try to find inner state_dict, else return as-is."""
    if not isinstance(maybe_dict, dict):
        return None
    # common keys: 'model_state_dict', 'state_dict', 'net', 'model'
    for key in ("model_state_dict","state_dict","net","state"):
        if key in maybe_dict and isinstance(maybe_dict[key], dict):
            return maybe_dict[key]
    # heuristics: if values look like tensors assume it's a state dict
    if all(isinstance(v, torch.Tensor) or (hasattr(v,'shape') and not isinstance(v, dict)) for v in maybe_dict.values()):
        return maybe_dict
    # try to find the largest dict-valued entry
    for k,v in maybe_dict.items():
        if isinstance(v, dict) and any(isinstance(x, torch.Tensor) for x in v.values()):
            return v
    return None

loaded = False
if CHECKPOINT.exists():
    ck = torch.load(CHECKPOINT, map_location=device)
    state_dict = extract_state_dict(ck) or ck
    # If state_dict contains fc shape mismatching, we will load with strict=False
    try:
        # create a temp model with fc sized like checkpoint if possible to allow better mapping
        temp_model = models.resnet18(weights=None)
        # if ck has 'fc.weight' we can inspect its first dimension
        if 'fc.weight' in state_dict:
            ck_fc_shape0 = state_dict['fc.weight'].shape[0]
            # set temp model fc to same shape to allow exact load
            import torch.nn as nn
            temp_model.fc = nn.Linear(num_ftrs, ck_fc_shape0)
            load_res = temp_model.load_state_dict(state_dict, strict=False)
            print("Loaded into temp model. Missing/unexpected keys:", load_res)
            # now copy backbone weights into our real model (except fc)
            ts = temp_model.state_dict()
            my_sd = model.state_dict()
            for k,v in ts.items():
                if not k.startswith("fc."):
                    my_sd[k] = v
            model.load_state_dict(my_sd)
            loaded = True
            print("Backbone weights copied from checkpoint (fc skipped or mismatched).")
        else:
            # state_dict lacks fc key — likely backbone-only checkpoint
            model.load_state_dict(state_dict, strict=False)
            loaded = True
            print("Loaded checkpoint (no fc present) with strict=False.")
    except Exception as e:
        print("Warning: robust load failed:", e)
        try:
            model.load_state_dict(state_dict, strict=False)
            loaded = True
            print("Fallback loaded checkpoint with strict=False.")
        except Exception as e2:
            print("Final load attempt failed — proceeding with randomly initialized model. Error:", e2)
else:
    print("No existing checkpoint found at", CHECKPOINT, "; proceeding from scratch.")

# Replace final fc with new layer for correct num_classes (random init)
import torch.nn as nn
model.fc = nn.Linear(num_ftrs, num_classes)
model.to(device)
print("Model ready. Final fc shape:", model.fc.weight.shape)


num_classes: 2 class_names: ['bird', 'drone']
No existing checkpoint found at /kaggle/working/artifacts/classification_model.pt ; proceeding from scratch.
Model ready. Final fc shape: torch.Size([2, 512])


In [7]:
# ---------- PRELIGHT TEST (single batch forward+backward) ----------
import torch, time
from pathlib import Path
from torchvision import models
from torch import nn, optim

# assume train_loader, val_loader, class_names were created by CELL 2
print("Sanity: train_loader size:", len(train_loader), "val:", len(val_loader))
# build model same as CELL 3 but simpler: new fc with correct classes
num_classes = len(class_names)
model = models.resnet18(weights=None)
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, num_classes)
device = "cpu"
model.to(device)
print("Model built with final fc:", model.fc.weight.shape)

# try to load existing checkpoint into backbone (non-strict) if present
ckpt_path = Path("/kaggle/working/artifacts/classification_model.pt")
if ckpt_path.exists():
    ck = torch.load(ckpt_path, map_location=device)
    # quick heuristic to extract nested state dict
    if isinstance(ck, dict) and 'model_state_dict' in ck:
        sd = ck['model_state_dict']
    elif isinstance(ck, dict) and 'state_dict' in ck:
        sd = ck['state_dict']
    else:
        sd = ck
    try:
        model.load_state_dict(sd, strict=False)
        print("Loaded checkpoint with strict=False")
    except Exception as e:
        print("Load warning (non-fatal):", e)

# one optimization step on one batch
opt = optim.Adam(model.parameters(), lr=1e-4)
criterion = nn.CrossEntropyLoss()

# get a single batch
start = time.time()
batch = next(iter(train_loader))
imgs, labels = batch
print("Batch shapes:", imgs.shape, labels.shape)
imgs, labels = imgs.to(device), labels.to(device)

model.train()
opt.zero_grad()
out = model(imgs)
loss = criterion(out, labels)
print("Loss on single batch:", float(loss.item()))
loss.backward()
opt.step()
print("Single-step backward completed in {:.1f}s".format(time.time()-start))
# Done - model is trainable on one batch
print("PRELIGHT SUCCESS: data pipeline + model forward & backward OK.")


Sanity: train_loader size: 333 val: 28
Model built with final fc: torch.Size([2, 512])
Batch shapes: torch.Size([8, 3, 224, 224]) torch.Size([8])
Loss on single batch: 0.7018393278121948
Single-step backward completed in 1.3s
PRELIGHT SUCCESS: data pipeline + model forward & backward OK.


In [8]:
# CELL 4 — Fine-tune training loop (CPU)
import torch
from torch import nn, optim
from tqdm import tqdm
import os

# reuse objects created earlier: model, train_loader, val_loader, class_names
device = "cpu"
model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)
EPOCHS = 5  
best_val_acc = 0.0
best_path = Path("/kaggle/working/artifacts/classification_model_finetuned.pt")

for epoch in range(1, EPOCHS+1):
    model.train()
    running_loss = 0.0
    for imgs, labels in tqdm(train_loader, desc=f"Epoch {epoch} train"):
        imgs, labels = imgs.to(device), labels.to(device)
        optimizer.zero_grad()
        out = model(imgs)
        loss = criterion(out, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    avg_loss = running_loss / max(1, len(train_loader))
    print(f"Epoch {epoch} training loss: {avg_loss:.4f}")

    # validation
    model.eval()
    correct = 0; total = 0
    with torch.no_grad():
        for imgs, labels in val_loader:
            imgs, labels = imgs.to(device), labels.to(device)
            out = model(imgs)
            preds = out.argmax(dim=1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)
    val_acc = correct / max(1, total)
    print(f"Epoch {epoch} validation accuracy: {val_acc:.4f}")

    # save best
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        torch.save(model.state_dict(), best_path)
        print("Saved new best model to", best_path)

print("Fine-tune done. Best val acc:", best_val_acc)
print("Best model path:", best_path)


Epoch 1 train: 100%|██████████| 333/333 [04:34<00:00,  1.21it/s]


Epoch 1 training loss: 0.5254
Epoch 1 validation accuracy: 0.6493
Saved new best model to /kaggle/working/artifacts/classification_model_finetuned.pt


Epoch 2 train: 100%|██████████| 333/333 [04:34<00:00,  1.21it/s]


Epoch 2 training loss: 0.3857
Epoch 2 validation accuracy: 0.7534
Saved new best model to /kaggle/working/artifacts/classification_model_finetuned.pt


Epoch 3 train: 100%|██████████| 333/333 [04:39<00:00,  1.19it/s]


Epoch 3 training loss: 0.3382
Epoch 3 validation accuracy: 0.7692
Saved new best model to /kaggle/working/artifacts/classification_model_finetuned.pt


Epoch 4 train: 100%|██████████| 333/333 [04:36<00:00,  1.21it/s]


Epoch 4 training loss: 0.2855
Epoch 4 validation accuracy: 0.8281
Saved new best model to /kaggle/working/artifacts/classification_model_finetuned.pt


Epoch 5 train: 100%|██████████| 333/333 [04:41<00:00,  1.18it/s]


Epoch 5 training loss: 0.2211
Epoch 5 validation accuracy: 0.8258
Fine-tune done. Best val acc: 0.8280542986425339
Best model path: /kaggle/working/artifacts/classification_model_finetuned.pt


In [9]:
# CELL 5 — load best model and run a single-sample prediction (prints label)
import torch, random
from torchvision import transforms as T
from PIL import Image
from pathlib import Path
import os

BEST = Path("/kaggle/working/artifacts/classification_model_finetuned.pt")
if not BEST.exists():
    BEST = Path("/kaggle/working/artifacts/classification_model.pt")  # fallback

# reconstruct model architecture then load
from torchvision import models
import json
class_names = json.load(open("/kaggle/working/artifacts/class_names.json","r",encoding="utf-8"))
num_classes = len(class_names)
model_eval = models.resnet18(weights=None)
num_ftrs = model_eval.fc.in_features
model_eval.fc = torch.nn.Linear(num_ftrs, num_classes)
model_eval.load_state_dict(torch.load(BEST, map_location="cpu"))
model_eval.eval()

# find a sample image from valid
WORK_ROOT = Path("/kaggle/working/dataset_copy/classification_dataset")
if not WORK_ROOT.exists():
    WORK_ROOT = Path("/kaggle/working/dataset_copy")

sample_img = None
for root, dirs, files in os.walk(WORK_ROOT / "valid"):
    for f in files:
        if f.lower().endswith(('.jpg','.jpeg','.png')):
            sample_img = Path(root) / f
            break
    if sample_img:
        break

if sample_img is None:
    # fallback: any image
    for root, dirs, files in os.walk(WORK_ROOT):
        for f in files:
            if f.lower().endswith(('.jpg','.jpeg','.png')):
                sample_img = Path(root) / f
                break
        if sample_img:
            break

print("Sample image:", sample_img)
tf = T.Compose([T.Resize((224,224)), T.ToTensor()])
img = tf(Image.open(sample_img).convert("RGB")).unsqueeze(0)
with torch.no_grad():
    out = model_eval(img)
    pred = int(out.argmax(dim=1).item())
print("Predicted index:", pred, "Predicted label:", class_names[pred])


Sample image: /kaggle/working/dataset_copy/valid/bird/0527b2c8cde80736_jpg.rf.e7cfd4cdde3117b1c4797fe2a669281f.jpg
Predicted index: 0 Predicted label: bird
