<a href="https://colab.research.google.com/github/ErangaOttachchige/Final-Year-Research-Project/blob/main/01_Stage_2_Species_Classification_OPTIMIZED_for_Colab_Free_T4.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Stage 2 Species Classification - OPTIMIZED for Colab Free T4
# Key improvements:
# - Faster image copying (shutil > rsync)
# - Preprocessed dataset (cached 224x224 tensors)
# - Smart worker config (no RAM crash)
# - Gradient accumulation for effective larger batches
# - Progress tracking


In [10]:
# ============================================================================
# SETUP: Mount Drive + Paths
# ============================================================================
from google.colab import drive
drive.mount("/content/drive")

import os
DRIVE_CCT = "/content/drive/MyDrive/datasets/cct20"
IMG_DIR   = f"{DRIVE_CCT}/eccv_18_all_images_sm"
PROC_DIR  = f"{DRIVE_CCT}/processed"
CSV_STAGE2 = f"{PROC_DIR}/cct20_stage2_species_imagelevel.csv"

print("âœ“ PROC_DIR files:", os.listdir(PROC_DIR))
print("âœ“ Stage2 CSV exists:", os.path.exists(CSV_STAGE2))

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
âœ“ PROC_DIR files: ['cct20_species_annotations.csv', 'cct20_stage1_imagelevel.csv', 'cct20_stage2_species_imagelevel.csv']
âœ“ Stage2 CSV exists: True


In [11]:
# ============================================================================
# INSTALL PACKAGES
# ============================================================================
!pip -q install timm torchmetrics pandas numpy scikit-learn pillow tqdm

import torch
print("âœ“ CUDA:", torch.cuda.is_available())
print("âœ“ GPU:", torch.cuda.get_device_name(0) if torch.cuda.is_available() else "None")

âœ“ CUDA: True
âœ“ GPU: Tesla T4


In [12]:
# ============================================================================
# LOAD CSV + CREATE CACHE DIRECTLY FROM DRIVE
# ============================================================================
import pandas as pd
import glob
import torch
import hashlib
from PIL import Image
import torchvision.transforms as T
from tqdm import tqdm

# Load CSV (keep Drive paths - we'll cache directly)
df = pd.read_csv(CSV_STAGE2)

# Verify paths exist
missing = (~df["path"].apply(os.path.exists)).sum()
print(f"âœ“ Rows: {len(df)}, Missing: {missing}")
print("\nSplit counts:\n", df["split"].value_counts())
print("\nLabel counts:\n", df["label_stage2"].value_counts())

# Label mapping
classes = sorted(df["label_stage2"].unique())
class_to_idx = {c:i for i,c in enumerate(classes)}
idx_to_class = {i:c for c,i in class_to_idx.items()}
df["y"] = df["label_stage2"].map(class_to_idx)



âœ“ Rows: 51237, Missing: 0

Split counts:
 split
test_trans    20384
test_cis      13856
train         12885
val_cis        2448
val_trans      1664
Name: count, dtype: int64

Label counts:
 label_stage2
opossum     13688
raccoon      7841
rabbit       5549
coyote       5315
bobcat       4961
cat          4601
squirrel     3181
dog          2788
bird         1402
skunk         857
rodent        812
other         242
Name: count, dtype: int64


In [14]:
# ============================================================================
# PREPROCESS DIRECTLY FROM DRIVE TO LOCAL CACHE (one-time)
# ============================================================================

from concurrent.futures import ThreadPoolExecutor
from functools import partial

CACHE_DIR = "/content/preprocessed_cache"
os.makedirs(CACHE_DIR, exist_ok=True)

def get_cache_path(img_path):
    """Generate unique cache filename"""
    hash_name = hashlib.md5(img_path.encode()).hexdigest()
    return os.path.join(CACHE_DIR, f"{hash_name}.pt")

# Add cache paths to dataframe
df["cache_path"] = df["path"].apply(get_cache_path)

# Check if cache exists
cached_count = len(glob.glob(CACHE_DIR + "/*.pt"))

def process_one_image(row, preprocess_tf):
    """Process and cache a single image"""
    cache_path = row["cache_path"]
    if os.path.exists(cache_path):
        return
    try:
        img = Image.open(row["path"]).convert("RGB")
        tensor = preprocess_tf(img)
        torch.save(tensor, cache_path)
    except Exception as e:
        print(f"Error processing {row['path']}: {e}")

if cached_count < len(df) * 0.95:
    print(f"ðŸ”„ Parallel caching ({cached_count}/{len(df)} exist)...")
    print("Using 8 parallel workers - should take ~3-5 min")

    preprocess_tf = T.Compose([
        T.Resize((224, 224)),
        T.ToTensor(),
    ])

    # Get rows that need processing
    rows_to_process = [row for _, row in df.iterrows()]

    # Process 8 images at a time (parallel)
    with ThreadPoolExecutor(max_workers=8) as executor:
        list(tqdm(
            executor.map(partial(process_one_image, preprocess_tf=preprocess_tf),
                        rows_to_process),
            total=len(rows_to_process),
            desc="Parallel caching"
        ))

    print(f"âœ“ Cache complete! {len(glob.glob(CACHE_DIR + '/*.pt'))} tensors")
else:
    print(f"âœ“ Cache exists ({cached_count} tensors) - skipping preprocessing!")

ðŸ”„ Parallel caching (640/51237 exist)...
Using 8 parallel workers - should take ~3-5 min


Parallel caching: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 51237/51237 [44:22<00:00, 19.25it/s]

âœ“ Cache complete! 51237 tensors





In [15]:
# ============================================================================
# OPTIMIZED DATASET (loads cached tensors, not raw images)
# ============================================================================
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler

# Augmentation transforms (applied to cached tensors during training)
aug_tf = T.Compose([
    T.RandomHorizontalFlip(0.5),
    T.ColorJitter(0.2, 0.2, 0.1),
])

class CachedImageDS(Dataset):
    """Loads preprocessed tensors (10x faster than PIL)"""
    def __init__(self, frame, augment=False):
        self.df = frame.reset_index(drop=True)
        self.augment = augment

    def __len__(self):
        return len(self.df)

    def __getitem__(self, i):
        row = self.df.iloc[i]

        # Load cached tensor (very fast!)
        x = torch.load(row["cache_path"])

        # Apply augmentation if training
        if self.augment:
            x = aug_tf(x)

        y = int(row["y"])
        return x, y

# Split data
train_df = df[df["split"]=="train"].reset_index(drop=True)
val_df   = df[df["split"]=="val_cis"].reset_index(drop=True)
valT_df  = df[df["split"]=="val_trans"].reset_index(drop=True)
test_cis_df   = df[df["split"]=="test_cis"].reset_index(drop=True)
test_trans_df = df[df["split"]=="test_trans"].reset_index(drop=True)

print(f"\nâœ“ Splits - train: {len(train_df)}, val_cis: {len(val_df)}, val_trans: {len(valT_df)}")

# Create datasets
train_ds = CachedImageDS(train_df, augment=True)
val_ds   = CachedImageDS(val_df, augment=False)
valT_ds  = CachedImageDS(valT_df, augment=False)
test_cis_ds   = CachedImageDS(test_cis_df, augment=False)
test_trans_ds = CachedImageDS(test_trans_df, augment=False)

# Balanced sampling
counts = train_df["y"].value_counts().sort_index()
w_class = 1.0 / counts
w_sample = train_df["y"].map(w_class).values
sampler = WeightedRandomSampler(torch.tensor(w_sample, dtype=torch.double),
                                num_samples=len(w_sample),
                                replacement=True)

# Class weights for loss
cw = (counts.sum() / (len(counts) * counts)).values
class_weight = torch.tensor(cw, dtype=torch.float32)


âœ“ Splits - train: 12885, val_cis: 2448, val_trans: 1664


In [16]:
# ============================================================================
# OPTIMIZED DATALOADERS (no RAM crash on Colab free)
# ============================================================================
BATCH_TRAIN = 32  # Smaller batch, use gradient accumulation
BATCH_EVAL  = 64
NUM_WORKERS = 2   # CRITICAL: 2 workers max on Colab free to avoid RAM crash

train_loader = DataLoader(train_ds, batch_size=BATCH_TRAIN, sampler=sampler,
                          num_workers=NUM_WORKERS, pin_memory=True)
val_loader   = DataLoader(val_ds, batch_size=BATCH_EVAL, shuffle=False,
                          num_workers=NUM_WORKERS, pin_memory=True)
valT_loader  = DataLoader(valT_ds, batch_size=BATCH_EVAL, shuffle=False,
                          num_workers=NUM_WORKERS, pin_memory=True)
test_cis_loader = DataLoader(test_cis_ds, batch_size=BATCH_EVAL, shuffle=False,
                             num_workers=NUM_WORKERS, pin_memory=True)
test_trans_loader = DataLoader(test_trans_ds, batch_size=BATCH_EVAL, shuffle=False,
                               num_workers=NUM_WORKERS, pin_memory=True)

print(f"âœ“ Classes: {len(classes)}")

âœ“ Classes: 12


In [17]:
# ============================================================================
# TRAINING WITH GRADIENT ACCUMULATION
# ============================================================================
import timm
import torch.nn as nn
from sklearn.metrics import f1_score, accuracy_score

device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"âœ“ Device: {device}")

model = timm.create_model("efficientnet_b0", pretrained=True, num_classes=len(classes)).to(device)
crit  = nn.CrossEntropyLoss(weight=class_weight.to(device))
opt   = torch.optim.AdamW(model.parameters(), lr=3e-4)

scaler = torch.cuda.amp.GradScaler(enabled=(device=="cuda"))

# Gradient accumulation: effective batch = BATCH_TRAIN * ACCUM_STEPS
ACCUM_STEPS = 2  # Effective batch = 32 * 2 = 64

def eval_loader(loader, name="eval"):
    model.eval()
    ys, ps = [], []
    with torch.no_grad():
        for x, y in tqdm(loader, desc=name, leave=False):
            x = x.to(device, non_blocking=True)
            y = y.to(device, non_blocking=True)
            with torch.cuda.amp.autocast(enabled=(device=="cuda")):
                logits = model(x)
            p = logits.argmax(1)
            ys.extend(y.cpu().tolist())
            ps.extend(p.cpu().tolist())
    return accuracy_score(ys, ps), f1_score(ys, ps, average="macro"), ys, ps

SAVE_PATH = f"{PROC_DIR}/stage2_best_species_efficientnet_b0_optimized.pt"
best = -1.0

EPOCHS = 5
for ep in range(1, EPOCHS+1):
    model.train()
    running_loss = 0.0

    pbar = tqdm(train_loader, desc=f"Epoch {ep}/{EPOCHS}")
    for batch_idx, (x, y) in enumerate(pbar):
        x = x.to(device, non_blocking=True)
        y = y.to(device, non_blocking=True)

        with torch.cuda.amp.autocast(enabled=(device=="cuda")):
            logits = model(x)
            loss = crit(logits, y) / ACCUM_STEPS  # Scale loss

        scaler.scale(loss).backward()

        # Update weights every ACCUM_STEPS
        if (batch_idx + 1) % ACCUM_STEPS == 0:
            scaler.step(opt)
            scaler.update()
            opt.zero_grad(set_to_none=True)

        running_loss += loss.item() * x.size(0) * ACCUM_STEPS
        pbar.set_postfix({"loss": f"{loss.item() * ACCUM_STEPS:.4f}"})

    train_loss = running_loss / len(train_df)

    val_acc, val_mf1, _, _ = eval_loader(val_loader, "val_cis")
    print(f"\nEpoch {ep}: train_loss={train_loss:.4f} | val_cis acc={val_acc:.3f} macroF1={val_mf1:.3f}")

    if len(valT_df) > 0:
        vt_acc, vt_mf1, _, _ = eval_loader(valT_loader, "val_trans")
        print(f"          val_trans acc={vt_acc:.3f} macroF1={vt_mf1:.3f}")

    if val_mf1 > best:
        best = val_mf1
        torch.save(model.state_dict(), SAVE_PATH)
        print(f"ðŸ’¾ SAVED BEST (macroF1={best:.3f})")


âœ“ Device: cuda


  scaler = torch.cuda.amp.GradScaler(enabled=(device=="cuda"))
  with torch.cuda.amp.autocast(enabled=(device=="cuda")):
Epoch 1/5: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 403/403 [01:31<00:00,  4.41it/s, loss=0.1468]
  with torch.cuda.amp.autocast(enabled=(device=="cuda")):



Epoch 1: train_loss=0.4717 | val_cis acc=0.667 macroF1=0.643


  with torch.cuda.amp.autocast(enabled=(device=="cuda")):


          val_trans acc=0.202 macroF1=0.136
ðŸ’¾ SAVED BEST (macroF1=0.643)


  with torch.cuda.amp.autocast(enabled=(device=="cuda")):
Epoch 2/5: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 403/403 [01:04<00:00,  6.29it/s, loss=0.0421]
  with torch.cuda.amp.autocast(enabled=(device=="cuda")):



Epoch 2: train_loss=0.1140 | val_cis acc=0.754 macroF1=0.731


  with torch.cuda.amp.autocast(enabled=(device=="cuda")):


          val_trans acc=0.245 macroF1=0.142
ðŸ’¾ SAVED BEST (macroF1=0.731)


  with torch.cuda.amp.autocast(enabled=(device=="cuda")):
Epoch 3/5: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 403/403 [01:01<00:00,  6.55it/s, loss=0.0267]
  with torch.cuda.amp.autocast(enabled=(device=="cuda")):



Epoch 3: train_loss=0.0595 | val_cis acc=0.800 macroF1=0.794


  with torch.cuda.amp.autocast(enabled=(device=="cuda")):


          val_trans acc=0.236 macroF1=0.162
ðŸ’¾ SAVED BEST (macroF1=0.794)


  with torch.cuda.amp.autocast(enabled=(device=="cuda")):
Epoch 4/5: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 403/403 [01:00<00:00,  6.70it/s, loss=0.0148]
  with torch.cuda.amp.autocast(enabled=(device=="cuda")):



Epoch 4: train_loss=0.0409 | val_cis acc=0.827 macroF1=0.795


  with torch.cuda.amp.autocast(enabled=(device=="cuda")):


          val_trans acc=0.227 macroF1=0.148
ðŸ’¾ SAVED BEST (macroF1=0.795)


  with torch.cuda.amp.autocast(enabled=(device=="cuda")):
Epoch 5/5: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 403/403 [01:00<00:00,  6.63it/s, loss=0.0452]
  with torch.cuda.amp.autocast(enabled=(device=="cuda")):



Epoch 5: train_loss=0.0301 | val_cis acc=0.834 macroF1=0.808


  with torch.cuda.amp.autocast(enabled=(device=="cuda")):


          val_trans acc=0.268 macroF1=0.168
ðŸ’¾ SAVED BEST (macroF1=0.808)


In [18]:
# ============================================================================
# TEST EVALUATION
# ============================================================================
from sklearn.metrics import classification_report

print("\n" + "="*60)
print("FINAL TEST EVALUATION")
print("="*60)

model.load_state_dict(torch.load(SAVE_PATH, map_location=device))

cis_acc, cis_mf1, cis_y, cis_p = eval_loader(test_cis_loader, "test_cis")
tr_acc,  tr_mf1,  tr_y,  tr_p  = eval_loader(test_trans_loader, "test_trans")

print(f"\nðŸŽ¯ TEST CIS   â†’ acc={cis_acc:.3f}, macroF1={cis_mf1:.3f}")
print(f"ðŸŽ¯ TEST TRANS â†’ acc={tr_acc:.3f}, macroF1={tr_mf1:.3f}")

print("\n--- CIS REPORT ---")
print(classification_report(cis_y, cis_p, target_names=[idx_to_class[i] for i in range(len(classes))]))

print("\n--- TRANS REPORT ---")
print(classification_report(tr_y, tr_p, target_names=[idx_to_class[i] for i in range(len(classes))]))


FINAL TEST EVALUATION


  with torch.cuda.amp.autocast(enabled=(device=="cuda")):
  with torch.cuda.amp.autocast(enabled=(device=="cuda")):
                                                             


ðŸŽ¯ TEST CIS   â†’ acc=0.612, macroF1=0.579
ðŸŽ¯ TEST TRANS â†’ acc=0.380, macroF1=0.260

--- CIS REPORT ---
              precision    recall  f1-score   support

        bird       0.47      0.51      0.49       573
      bobcat       0.42      0.78      0.55       897
         cat       0.63      0.69      0.66      1632
      coyote       0.69      0.56      0.62      1326
         dog       0.68      0.53      0.60       845
     opossum       0.89      0.64      0.74      4524
       other       0.71      0.55      0.62       168
      rabbit       0.55      0.43      0.48      1758
     raccoon       0.40      0.81      0.54      1047
      rodent       0.55      0.52      0.53       233
       skunk       0.54      0.80      0.64       194
    squirrel       0.44      0.51      0.48       659

    accuracy                           0.61     13856
   macro avg       0.58      0.61      0.58     13856
weighted avg       0.67      0.61      0.62     13856


--- TRANS REPORT ---




In [19]:
# ============================================================================
# SAVE METADATA
# ============================================================================
import json

mapping = {
    "classes": classes,
    "class_to_idx": class_to_idx
}
out_json = f"{PROC_DIR}/stage2_label_mapping.json"
with open(out_json, "w") as f:
    json.dump(mapping, f, indent=2)

print(f"\nâœ“ Saved: {out_json}")
print("âœ“ Training complete!")



âœ“ Saved: /content/drive/MyDrive/datasets/cct20/processed/stage2_label_mapping.json
âœ“ Training complete!
