<a href="https://colab.research.google.com/github/ErangaOttachchige/Final-Year-Research-Project/blob/main/02_stage1_detection_filter_training_OPTIMIZED_for_Colab_Free_T4.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Stage 1 Detection/Filter - ULTRA OPTIMIZED for Colab Free T4
##
### KEY OPTIMIZATIONS:
### 1. Parallel caching with progress (8 workers)
### 2. Reduced batch processing with gradient accumulation
### 3. Smart worker config (no RAM crash)
### 4. Optional: Skip caching, train directly from Drive (for smaller datasets)
### 5. Memory-efficient data loading
### 6. Faster evaluation

Stage 1 Binary Filter:

Label: animal vs non_animal (where non_animal = empty + car)

Fast: uses your tensor cache (no local copy, no Drive rsync)

Stable: low RAM, no crashes

Saves to Drive: model + label mapping

Progress bars: train/val/test

In [None]:
# Mount Drive + paths
from google.colab import drive
drive.mount("/content/drive")

import os
DRIVE_CCT = "/content/drive/MyDrive/datasets/cct20"
PROC_DIR  = f"{DRIVE_CCT}/processed"
CSV_STAGE1 = f"{PROC_DIR}/cct20_stage1_imagelevel.csv"

print("‚úì PROC_DIR files:", os.listdir(PROC_DIR))
print("‚úì Stage1 CSV exists:", os.path.exists(CSV_STAGE1))


Mounted at /content/drive
‚úì PROC_DIR files: ['cct20_species_annotations.csv', 'cct20_stage1_imagelevel.csv', 'cct20_stage2_species_imagelevel.csv', 'stage2_best_species_efficientnet_b0_optimized.pt', 'stage2_label_mapping.json', 'stage1_best_efficientnet_b0.pt', 'stage1_label_mapping.json', 'stage1_best_binary_efficientnet_b0.pt', 'stage1_binary_label_mapping.json']
‚úì Stage1 CSV exists: True


In [None]:
# Install packages + check GPU
!pip -q install timm torchmetrics pandas numpy scikit-learn pillow tqdm

import torch
print("‚úì CUDA:", torch.cuda.is_available())
print("‚úì GPU :", torch.cuda.get_device_name(0) if torch.cuda.is_available() else "None")


[?25l   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m0.0/983.2 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[91m‚ï∏[0m [32m983.0/983.2 kB[0m [31m32.4 MB/s[0m eta [36m0:00:01[0m[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m983.2/983.2 kB[0m [31m23.8 MB/s[0m eta [36m0:00:00[0m
[?25h‚úì CUDA: True
‚úì GPU : Tesla T4


In [None]:
# Load Stage1 CSV + create binary labels
import pandas as pd
import os

df = pd.read_csv(CSV_STAGE1)

# Binary label:
# animal stays animal
# car + empty become non_animal
df["label_bin"] = df["label_stage1"].apply(lambda x: "animal" if x=="animal" else "non_animal")

missing = (~df["path"].apply(os.path.exists)).sum()
print(f"‚úì Rows: {len(df)}, Missing paths: {missing}")
print("\nSplit counts:\n", df["split"].value_counts())
print("\nBinary label counts:\n", df["label_bin"].value_counts())


‚úì Rows: 57864, Missing paths: 1

Split counts:
 split
test_trans    23275
test_cis      15827
train         13553
val_cis        3484
val_trans      1725
Name: count, dtype: int64

Binary label counts:
 label_bin
animal        51237
non_animal     6627
Name: count, dtype: int64


In [None]:
# Ultra-fast caching with optimizations
import glob, hashlib
from PIL import Image
import torchvision.transforms as T
from tqdm import tqdm
from concurrent.futures import ThreadPoolExecutor
import torch
import os

CACHE_DIR = "/content/stage1_bin_cache_224"
os.makedirs(CACHE_DIR, exist_ok=True)

def cache_path(img_path):
    return os.path.join(CACHE_DIR, hashlib.md5(img_path.encode()).hexdigest() + ".pt")

df["cache_path"] = df["path"].apply(cache_path)

cached_count = len(glob.glob(CACHE_DIR + "/*.pt"))
print(f"‚úì Cached tensors: {cached_count} / {len(df)}")

# Pre-compile transform (slight speed boost)
pre_tf = T.Compose([
    T.Resize((224, 224), antialias=True),  # antialias=True for quality
    T.ToTensor(),
])

def process_one(row):
    """Optimized processing with early exit"""
    cp = row["cache_path"]
    if os.path.exists(cp):
        return
    try:
        # Use Image.open in a context manager (releases file handle faster)
        with Image.open(row["path"]) as img:
            img_rgb = img.convert("RGB")
            x = pre_tf(img_rgb)
        torch.save(x, cp)
    except Exception:
        pass  # Silently skip corrupted images

if cached_count < len(df) * 0.95:
    print("üîÑ Caching tensors (8 workers)...")

    # Filter out already-cached rows BEFORE threading (faster)
    rows_to_process = [row for _, row in df.iterrows() if not os.path.exists(row["cache_path"])]

    print(f"   Processing {len(rows_to_process)} new images...")

    with ThreadPoolExecutor(max_workers=8) as ex:
        list(tqdm(
            ex.map(process_one, rows_to_process),
            total=len(rows_to_process),
            desc="Caching Stage1(bin)"
        ))

    print("‚úì Cache done:", len(glob.glob(CACHE_DIR + "/*.pt")))
else:
    print("‚úì Cache already exists ‚úÖ")

# Keep only rows that have cache file (safety)
df["has_cache"] = df["cache_path"].apply(os.path.exists)
df = df[df["has_cache"]].reset_index(drop=True)
print("‚úì Usable rows:", len(df))

‚úì Cached tensors: 0 / 57864
üîÑ Caching tensors (8 workers)...
   Processing 57864 new images...


Caching Stage1(bin): 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 57864/57864 [50:16<00:00, 19.18it/s]


‚úì Cache done: 57864
‚úì Usable rows: 57864


In [None]:
# Dataset + DataLoaders (balanced sampling + stable RAM)
import numpy as np
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler
from sklearn.metrics import f1_score, accuracy_score
import torchvision.transforms as T
from tqdm import tqdm
import torch

# label mapping (binary)
classes = ["animal", "non_animal"]
class_to_idx = {c:i for i,c in enumerate(classes)}
idx_to_class = {i:c for c,i in class_to_idx.items()}
df["y"] = df["label_bin"].map(class_to_idx)

# splits
train_df = df[df["split"]=="train"].reset_index(drop=True)
val_df   = df[df["split"]=="val_cis"].reset_index(drop=True)
valT_df  = df[df["split"]=="val_trans"].reset_index(drop=True)
test_cis_df   = df[df["split"]=="test_cis"].reset_index(drop=True)
test_trans_df = df[df["split"]=="test_trans"].reset_index(drop=True)

print("train:", len(train_df), "val_cis:", len(val_df), "val_trans:", len(valT_df),
      "test_cis:", len(test_cis_df), "test_trans:", len(test_trans_df))
print("\nTrain label counts:\n", train_df["label_bin"].value_counts())

# augmentation applied on tensor
aug_tf = T.Compose([
    T.RandomHorizontalFlip(0.5),
    T.ColorJitter(0.2, 0.2, 0.1),
])

class CachedDS(Dataset):
    def __init__(self, frame, augment=False):
        self.df = frame.reset_index(drop=True)
        self.augment = augment
    def __len__(self):
        return len(self.df)
    def __getitem__(self, i):
        r = self.df.iloc[i]
        x = torch.load(r["cache_path"])
        if self.augment:
            x = aug_tf(x)
        y = int(r["y"])
        return x, y

train_ds = CachedDS(train_df, augment=True)
val_ds   = CachedDS(val_df, augment=False)
valT_ds  = CachedDS(valT_df, augment=False)
test_cis_ds   = CachedDS(test_cis_df, augment=False)
test_trans_ds = CachedDS(test_trans_df, augment=False)

# balanced sampler
counts = train_df["y"].value_counts().sort_index()
w_class = 1.0 / counts
w_sample = train_df["y"].map(w_class).values
sampler = WeightedRandomSampler(torch.tensor(w_sample, dtype=torch.double),
                                num_samples=len(w_sample),
                                replacement=True)

# class weights for loss (binary)
cw = (counts.sum() / (len(classes) * counts)).values
class_weight = torch.tensor(cw, dtype=torch.float32)
print("\nCounts:", counts.to_dict())
print("Loss weights:", class_weight)

# loaders
BATCH_TRAIN = 32
BATCH_EVAL  = 64
NUM_WORKERS = 2

train_loader = DataLoader(train_ds, batch_size=BATCH_TRAIN, sampler=sampler,
                          num_workers=NUM_WORKERS, pin_memory=True)
val_loader   = DataLoader(val_ds, batch_size=BATCH_EVAL, shuffle=False,
                          num_workers=NUM_WORKERS, pin_memory=True)
valT_loader  = DataLoader(valT_ds, batch_size=BATCH_EVAL, shuffle=False,
                          num_workers=NUM_WORKERS, pin_memory=True)
test_cis_loader = DataLoader(test_cis_ds, batch_size=BATCH_EVAL, shuffle=False,
                             num_workers=NUM_WORKERS, pin_memory=True)
test_trans_loader = DataLoader(test_trans_ds, batch_size=BATCH_EVAL, shuffle=False,
                               num_workers=NUM_WORKERS, pin_memory=True)


train: 13553 val_cis: 3484 val_trans: 1725 test_cis: 15827 test_trans: 23275

Train label counts:
 label_bin
animal        12885
non_animal      668
Name: count, dtype: int64

Counts: {0: 12885, 1: 668}
Loss weights: tensor([ 0.5259, 10.1445])


In [None]:
# Train EfficientNet-B0 (binary) + save best to Drive
import timm
import torch.nn as nn
from sklearn.metrics import f1_score, accuracy_score
from tqdm import tqdm
import torch

device = "cuda" if torch.cuda.is_available() else "cpu"
print("‚úì device:", device)

model = timm.create_model("efficientnet_b0", pretrained=True, num_classes=len(classes)).to(device)
criterion = nn.CrossEntropyLoss(weight=class_weight.to(device))
optimizer = torch.optim.AdamW(model.parameters(), lr=3e-4)

scaler = torch.cuda.amp.GradScaler(enabled=(device=="cuda"))
ACCUM_STEPS = 2

def eval_loader(loader, name="eval"):
    model.eval()
    ys, ps = [], []
    with torch.no_grad():
        for x, y in tqdm(loader, desc=name, leave=False):
            x = x.to(device, non_blocking=True)
            y = y.to(device, non_blocking=True)
            with torch.cuda.amp.autocast(enabled=(device=="cuda")):
                logits = model(x)
            p = logits.argmax(1)
            ys.extend(y.cpu().tolist())
            ps.extend(p.cpu().tolist())
    acc = accuracy_score(ys, ps)
    mf1 = f1_score(ys, ps, average="macro")
    return acc, mf1

SAVE_PATH = f"{PROC_DIR}/stage1_best_binary_efficientnet_b0.pt"
best = -1.0
EPOCHS = 5

for ep in range(1, EPOCHS+1):
    model.train()
    optimizer.zero_grad(set_to_none=True)
    running = 0.0

    pbar = tqdm(train_loader, desc=f"Epoch {ep}/{EPOCHS} [train]")
    for i, (x, y) in enumerate(pbar, 1):
        x = x.to(device, non_blocking=True)
        y = y.to(device, non_blocking=True)

        with torch.cuda.amp.autocast(enabled=(device=="cuda")):
            logits = model(x)
            loss = criterion(logits, y) / ACCUM_STEPS

        scaler.scale(loss).backward()

        if i % ACCUM_STEPS == 0:
            scaler.step(optimizer)
            scaler.update()
            optimizer.zero_grad(set_to_none=True)

        running += loss.item() * x.size(0) * ACCUM_STEPS
        pbar.set_postfix({"loss": f"{loss.item()*ACCUM_STEPS:.4f}"})

    train_loss = running / len(train_df)

    val_acc, val_mf1 = eval_loader(val_loader, "val_cis")
    print(f"\nEpoch {ep}: train_loss={train_loss:.4f} | val_cis acc={val_acc:.3f} macroF1={val_mf1:.3f}")

    if len(valT_df) > 0:
        vt_acc, vt_mf1 = eval_loader(valT_loader, "val_trans")
        print(f"          val_trans acc={vt_acc:.3f} macroF1={vt_mf1:.3f}")

    if val_mf1 > best:
        best = val_mf1
        torch.save(model.state_dict(), SAVE_PATH)
        print(f"üíæ SAVED BEST ‚Üí {SAVE_PATH} (macroF1={best:.3f})")


‚úì device: cuda


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


model.safetensors:   0%|          | 0.00/21.4M [00:00<?, ?B/s]

  scaler = torch.cuda.amp.GradScaler(enabled=(device=="cuda"))
  with torch.cuda.amp.autocast(enabled=(device=="cuda")):
Epoch 1/5 [train]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 424/424 [01:33<00:00,  4.53it/s, loss=0.0000]
  with torch.cuda.amp.autocast(enabled=(device=="cuda")):



Epoch 1: train_loss=0.0371 | val_cis acc=0.726 macroF1=0.495


  with torch.cuda.amp.autocast(enabled=(device=="cuda")):


          val_trans acc=0.963 macroF1=0.559
üíæ SAVED BEST ‚Üí /content/drive/MyDrive/datasets/cct20/processed/stage1_best_binary_efficientnet_b0.pt (macroF1=0.495)


  with torch.cuda.amp.autocast(enabled=(device=="cuda")):
Epoch 2/5 [train]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 424/424 [01:07<00:00,  6.27it/s, loss=0.0000]
  with torch.cuda.amp.autocast(enabled=(device=="cuda")):



Epoch 2: train_loss=0.0023 | val_cis acc=0.721 macroF1=0.477


  with torch.cuda.amp.autocast(enabled=(device=="cuda")):


          val_trans acc=0.901 macroF1=0.540


  with torch.cuda.amp.autocast(enabled=(device=="cuda")):
Epoch 3/5 [train]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 424/424 [01:11<00:00,  5.96it/s, loss=0.0000]
  with torch.cuda.amp.autocast(enabled=(device=="cuda")):



Epoch 3: train_loss=0.0023 | val_cis acc=0.725 macroF1=0.490


  with torch.cuda.amp.autocast(enabled=(device=="cuda")):


          val_trans acc=0.835 macroF1=0.499


  with torch.cuda.amp.autocast(enabled=(device=="cuda")):
Epoch 4/5 [train]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 424/424 [01:02<00:00,  6.77it/s, loss=0.1304]
  with torch.cuda.amp.autocast(enabled=(device=="cuda")):



Epoch 4: train_loss=0.0032 | val_cis acc=0.714 macroF1=0.456


  with torch.cuda.amp.autocast(enabled=(device=="cuda")):


          val_trans acc=0.961 macroF1=0.490


  with torch.cuda.amp.autocast(enabled=(device=="cuda")):
Epoch 5/5 [train]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 424/424 [01:02<00:00,  6.75it/s, loss=0.0000]
  with torch.cuda.amp.autocast(enabled=(device=="cuda")):



Epoch 5: train_loss=0.0043 | val_cis acc=0.716 macroF1=0.459


  with torch.cuda.amp.autocast(enabled=(device=="cuda")):
                                                          

          val_trans acc=0.965 macroF1=0.507




In [None]:

# Final test evaluation (CIS vs TRANS)
from sklearn.metrics import classification_report
import torch

model.load_state_dict(torch.load(SAVE_PATH, map_location=device))

def full_eval(loader, name="eval"):
    model.eval()
    ys, ps = [], []
    with torch.no_grad():
        for x, y in tqdm(loader, desc=name):
            x = x.to(device, non_blocking=True)
            y = y.to(device, non_blocking=True)
            with torch.cuda.amp.autocast(enabled=(device=="cuda")):
                logits = model(x)
            p = logits.argmax(1)
            ys.extend(y.cpu().tolist())
            ps.extend(p.cpu().tolist())
    return ys, ps

cis_y, cis_p = full_eval(test_cis_loader, "test_cis")
tr_y,  tr_p  = full_eval(test_trans_loader, "test_trans")

print("\n--- CIS REPORT ---")
print(classification_report(cis_y, cis_p, target_names=classes))

print("\n--- TRANS REPORT ---")
print(classification_report(tr_y, tr_p, target_names=classes))


  with torch.cuda.amp.autocast(enabled=(device=="cuda")):
test_cis: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 248/248 [01:19<00:00,  3.14it/s]
test_trans: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 364/364 [01:54<00:00,  3.18it/s]



--- CIS REPORT ---
              precision    recall  f1-score   support

      animal       0.93      1.00      0.96     13856
  non_animal       0.95      0.43      0.59      1971

    accuracy                           0.93     15827
   macro avg       0.94      0.71      0.78     15827
weighted avg       0.93      0.93      0.91     15827


--- TRANS REPORT ---
              precision    recall  f1-score   support

      animal       0.92      0.98      0.95     20384
  non_animal       0.74      0.43      0.54      2891

    accuracy                           0.91     23275
   macro avg       0.83      0.70      0.75     23275
weighted avg       0.90      0.91      0.90     23275



In [None]:
# Save label mapping (Drive)
import json

mapping = {"classes": classes, "class_to_idx": class_to_idx}
out_json = f"{PROC_DIR}/stage1_binary_label_mapping.json"
with open(out_json, "w") as f:
    json.dump(mapping, f, indent=2)

print("‚úì Saved:", out_json)
print("‚úì Saved model:", SAVE_PATH)


‚úì Saved: /content/drive/MyDrive/datasets/cct20/processed/stage1_binary_label_mapping.json
‚úì Saved model: /content/drive/MyDrive/datasets/cct20/processed/stage1_best_binary_efficientnet_b0.pt
