In [1]:
import pandas as pd
from pathlib import Path
import numpy as np

ROOT = Path.cwd()
SPL = ROOT / "labels" / "splits_hier"

train_df = pd.read_csv(SPL / "train.csv")
val_df   = pd.read_csv(SPL / "val.csv")
test_df  = pd.read_csv(SPL / "test.csv")

NUM_MAIN = train_df["main_id"].max() + 1
NUM_SUB  = train_df["sub_id"].max() + 1

# Frequencies (train-only to avoid leakage)
freq_main = train_df["main_id"].value_counts().reindex(range(NUM_MAIN), fill_value=0)
freq_sub  = train_df["sub_id"].value_counts().reindex(range(NUM_SUB),  fill_value=0)

print("Train main freq:\n", freq_main.to_dict())
print("Train sub  freq:\n", freq_sub.to_dict())


Train main freq:
 {0: 666, 1: 614}
Train sub  freq:
 {0: 514, 1: 18, 2: 82, 3: 0, 4: 666}


In [2]:
import torch
from torch.utils.data import WeightedRandomSampler

eps = 1e-6
# Simple inverse-frequency weights (stable baseline)
w_main_ce = torch.tensor(1.0 / (freq_main.values + eps), dtype=torch.float32)
w_sub_ce  = torch.tensor(1.0 / (freq_sub.values  + eps), dtype=torch.float32)

# Normalize to mean=1 (optional; CE doesn’t require it, but it stabilizes LR)
w_main_ce = w_main_ce * (len(w_main_ce) / w_main_ce.sum())
w_sub_ce  = w_sub_ce  * (len(w_sub_ce)  / w_sub_ce.sum())

print("CE weights (main):", w_main_ce.tolist())
print("CE weights (sub): ", w_sub_ce.tolist())

# Per-sample weights for the sampler — balance by **sub** label
sample_w = train_df["sub_id"].map(lambda j: float(w_sub_ce[int(j)])).to_numpy()
sampler  = WeightedRandomSampler(
    weights=torch.as_tensor(sample_w, dtype=torch.double),
    num_samples=len(sample_w),     # roughly epoch size; can scale up if you want more repeats
    replacement=True
)


CE weights (main): [0.9593750834465027, 1.040624976158142]
CE weights (sub):  [9.727624927791112e-09, 2.777777297069406e-07, 6.097560145690295e-08, 4.999999523162842, 7.507506616377668e-09]


In [3]:
import cv2, albumentations as A
from albumentations.pytorch import ToTensorV2
from torch.utils.data import Dataset, DataLoader
import torch

IMAGE_SIZE = 224
IMAGENET_MEAN = (0.485, 0.456, 0.406)
IMAGENET_STD  = (0.229, 0.224, 0.225)

# Train: random crop+resize to target size (v2 uses 'size'), light flips/colors
train_tfms = A.Compose([
    A.RandomResizedCrop(size=(IMAGE_SIZE, IMAGE_SIZE), scale=(0.7, 1.0), ratio=(0.75, 1.33)),
    A.HorizontalFlip(p=0.5),
    A.ColorJitter(0.2, 0.2, 0.2, 0.1, p=0.3),
    A.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD),
    ToTensorV2(),
])

# Val/Test: resize/pad to square, normalize, tensor
val_tfms = A.Compose([
    A.LongestMaxSize(max_size=IMAGE_SIZE),
    A.PadIfNeeded(IMAGE_SIZE, IMAGE_SIZE, border_mode=cv2.BORDER_CONSTANT, value=(0, 0, 0)),
    A.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD),
    ToTensorV2(),
])

class HierDataset(Dataset):
    def __init__(self, df, tfms):
        self.df = df.reset_index(drop=True)
        self.tfms = tfms

    def __len__(self): return len(self.df)

    def __getitem__(self, i):
        p = self.df.loc[i, "path"]
        im = cv2.imread(p, cv2.IMREAD_UNCHANGED)
        if im is None:
            raise FileNotFoundError(p)
        if im.ndim == 2: im = cv2.cvtColor(im, cv2.COLOR_GRAY2RGB)
        else:            im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)

        x = self.tfms(image=im)["image"]           # already CHW float tensor
        y_main = int(self.df.loc[i, "main_id"])
        y_sub  = int(self.df.loc[i, "sub_id"])
        return x, torch.tensor(y_main), torch.tensor(y_sub)

train_ds = HierDataset(train_df, train_tfms)
val_ds   = HierDataset(val_df,   val_tfms)
test_ds  = HierDataset(test_df,  val_tfms)

BATCH_SIZE = 32
# NOTE: when using a sampler, don't pass shuffle=True
train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, sampler=sampler, num_workers=0)
val_loader   = DataLoader(val_ds,   batch_size=BATCH_SIZE, shuffle=False, num_workers=0)
test_loader  = DataLoader(test_ds,  batch_size=BATCH_SIZE, shuffle=False, num_workers=0)


  A.PadIfNeeded(IMAGE_SIZE, IMAGE_SIZE, border_mode=cv2.BORDER_CONSTANT, value=(0, 0, 0)),


In [4]:
import torch
import torch.nn as nn
from torchvision.models import mobilenet_v3_small, MobileNet_V3_Small_Weights

# ---- Device (reuse if already defined) ----
device = torch.device("mps" if torch.backends.mps.is_available()
                      else ("cuda" if torch.cuda.is_available() else "cpu"))
print("Device:", device)

crit_main = nn.CrossEntropyLoss(weight=w_main_ce.to(device))  # main head
crit_sub  = nn.CrossEntropyLoss(weight=w_sub_ce.to(device))   # sub head

# ---- Losses with class weights (expects w_main_ce / w_sub_ce from Cell 2) ----
try:
    crit_main, crit_sub
except NameError:
    # If you restarted the kernel and haven't run Cell 2 yet, fall back to unweighted CE
    print("Note: falling back to unweighted losses (run Cell 2 for class-weighted CE).")
    crit_main = nn.CrossEntropyLoss()
    crit_sub  = nn.CrossEntropyLoss()

# ---- Multi-task model: MobileNetV3-Small backbone + 2 classifier heads ----
class MultiTaskMobileNetV3(nn.Module):
    def __init__(self, num_main: int, num_sub: int):
        super().__init__()
        weights = MobileNet_V3_Small_Weights.IMAGENET1K_V1  # pretrained ImageNet
        self.backbone = mobilenet_v3_small(weights=weights)
        in_feats = self.backbone.classifier[-1].in_features
        # remove the original single-class head
        self.backbone.classifier[-1] = nn.Identity()
        # two heads for main + sub
        self.head_main = nn.Linear(in_feats, num_main)
        self.head_sub  = nn.Linear(in_feats, num_sub)

    def forward(self, x):
        feats = self.backbone(x)              # pooled embedding
        logits_main = self.head_main(feats)   # softmax via CrossEntropyLoss
        logits_sub  = self.head_sub(feats)
        return logits_main, logits_sub

NUM_MAIN = int(train_df["main_id"].max()) + 1
NUM_SUB  = int(train_df["sub_id"].max()) + 1
model = MultiTaskMobileNetV3(NUM_MAIN, NUM_SUB).to(device)

# ---- Optimizer & scheduler ----
optimizer = torch.optim.AdamW(model.parameters(), lr=3e-4, weight_decay=1e-4)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode="min", factor=0.5, patience=2)

# small utility: number of trainable params
def count_params(m): return sum(p.numel() for p in m.parameters() if p.requires_grad)
print(f"Params: {count_params(model):,}  | heads: main={NUM_MAIN}, sub={NUM_SUB}")


Device: mps
Params: 1,525,031  | heads: main=2, sub=5


In [5]:
import math, time
import torch
from pathlib import Path

def top1_from_logits(logits, targets):
    preds = logits.argmax(dim=1)
    return (preds == targets).float().mean().item()

@torch.no_grad()
def evaluate(model, loader):
    model.eval()
    n_batches = 0
    tot_loss = 0.0
    acc_main = 0.0
    acc_sub  = 0.0
    for xb, y_main, y_sub in loader:
        xb      = xb.to(device)
        y_main  = y_main.to(device)
        y_sub   = y_sub.to(device)

        lm, ls  = model(xb)
        loss    = crit_main(lm, y_main) + crit_sub(ls, y_sub)

        tot_loss += float(loss.detach().cpu().item())
        acc_main += top1_from_logits(lm, y_main)
        acc_sub  += top1_from_logits(ls, y_sub)
        n_batches += 1

    return {
        "loss":    tot_loss / max(1, n_batches),
        "acc_main": acc_main / max(1, n_batches),
        "acc_sub":  acc_sub  / max(1, n_batches),
    }

def train_one_epoch(model, loader, optimizer):
    model.train()
    tot_loss = 0.0
    n_batches = 0
    for xb, y_main, y_sub in loader:
        xb     = xb.to(device)
        y_main = y_main.to(device)
        y_sub  = y_sub.to(device)

        optimizer.zero_grad(set_to_none=True)
        lm, ls = model(xb)
        loss   = crit_main(lm, y_main) + crit_sub(ls, y_sub)
        loss.backward()
        optimizer.step()

        tot_loss += float(loss.detach().cpu().item())
        n_batches += 1
    return tot_loss / max(1, n_batches)

# ==== train ====
EPOCHS   = 12
PATIENCE = 3
best_val = math.inf
wait     = 0

MODELS_DIR = Path("models"); MODELS_DIR.mkdir(parents=True, exist_ok=True)
CKPT = MODELS_DIR / "hier_mnv3_small.pt"

history = []
for ep in range(1, EPOCHS+1):
    t0 = time.time()
    tr_loss = train_one_epoch(model, train_loader, optimizer)
    val_stats = evaluate(model, val_loader)

    # step ReduceLROnPlateau on *validation loss*
    scheduler.step(val_stats["loss"])

    dt = time.time() - t0
    print(f"Epoch {ep:02d} | train_loss {tr_loss:.4f} | "
          f"val_loss {val_stats['loss']:.4f} | "
          f"val_acc_main {val_stats['acc_main']:.3f} | "
          f"val_acc_sub {val_stats['acc_sub']:.3f} | {dt:.1f}s")

    history.append({"epoch": ep, **val_stats, "train_loss": tr_loss})

    # early stop on best val loss
    if val_stats["loss"] < best_val - 1e-4:
        best_val = val_stats["loss"]
        wait = 0
        torch.save(model.state_dict(), CKPT)
    else:
        wait += 1
        if wait >= PATIENCE:
            print("Early stopping.")
            break

print("Best checkpoint saved to:", CKPT)


Epoch 01 | train_loss 0.4897 | val_loss 1.2293 | val_acc_main 0.881 | val_acc_sub 0.306 | 20.1s
Epoch 02 | train_loss 0.1010 | val_loss 0.7977 | val_acc_main 0.912 | val_acc_sub 0.644 | 17.1s
Epoch 03 | train_loss 0.0652 | val_loss 1.1480 | val_acc_main 0.856 | val_acc_sub 0.613 | 17.1s
Epoch 04 | train_loss 0.0477 | val_loss 0.7429 | val_acc_main 0.950 | val_acc_sub 0.856 | 17.4s
Epoch 05 | train_loss 0.0354 | val_loss 0.9703 | val_acc_main 0.950 | val_acc_sub 0.900 | 17.5s
Epoch 06 | train_loss 0.0252 | val_loss 1.1137 | val_acc_main 0.969 | val_acc_sub 0.938 | 16.4s
Epoch 07 | train_loss 0.0275 | val_loss 0.9621 | val_acc_main 0.969 | val_acc_sub 0.894 | 16.7s
Early stopping.
Best checkpoint saved to: models/hier_mnv3_small.pt


In [6]:
import torch, numpy as np
from pathlib import Path
from sklearn.metrics import classification_report

# 1) Load best checkpoint
CKPT = Path("models/hier_mnv3_small.pt")
model.load_state_dict(torch.load(CKPT, map_location=device))
model.eval()

# 2) Build id→name lists from your train dataframe
id2main = (
    train_df[["main_id", "main_name"]]
    .drop_duplicates()
    .sort_values("main_id")["main_name"]
    .tolist()
)
id2sub = (
    train_df[["sub_id", "sub_name"]]
    .drop_duplicates()
    .sort_values("sub_id")["sub_name"]
    .tolist()
)

# 3) Quick loss/acc on test (reuses evaluate() from Cell 5)
test_stats = evaluate(model, test_loader)
print(f"TEST -> loss: {test_stats['loss']:.4f} | "
      f"acc_main: {test_stats['acc_main']:.3f} | "
      f"acc_sub: {test_stats['acc_sub']:.3f}")

# 4) Full classification reports (named)
y_true_main, y_pred_main = [], []
y_true_sub,  y_pred_sub  = [], []

with torch.no_grad():
    for xb, y_main, y_sub in test_loader:
        xb = xb.to(device)
        lm, ls = model(xb)
        y_true_main.extend(y_main.numpy())
        y_true_sub.extend(y_sub.numpy())
        y_pred_main.extend(lm.argmax(1).cpu().numpy())
        y_pred_sub.extend(ls.argmax(1).cpu().numpy())

y_true_main = np.array(y_true_main); y_pred_main = np.array(y_pred_main)
y_true_sub  = np.array(y_true_sub);  y_pred_sub  = np.array(y_pred_sub)

print("\n=== MAIN report ===")
print(classification_report(y_true_main, y_pred_main, target_names=id2main, digits=3))

print("=== SUB report ===")
print(classification_report(y_true_sub, y_pred_sub, target_names=id2sub, digits=3))


TEST -> loss: 0.4302 | acc_main: 0.974 | acc_sub: 0.953

=== MAIN report ===
                      precision    recall  f1-score   support

Environmental issues      0.976     0.964     0.970        84
         Road issues      0.962     0.974     0.968        77

            accuracy                          0.969       161
           macro avg      0.969     0.969     0.969       161
        weighted avg      0.969     0.969     0.969       161

=== SUB report ===
                      precision    recall  f1-score   support

             Pothole      0.967     0.908     0.937        65
Damaged Road Surface      0.000     0.000     0.000         2
     Illegal Parking      0.909     1.000     0.952        10
 Littering / Garbage      0.988     0.988     0.988        84

            accuracy                          0.944       161
           macro avg      0.716     0.724     0.719       161
        weighted avg      0.962     0.944     0.953       161



In [15]:
# Cell 7 — Single-image prediction helper (MAIN + SUB)

import albumentations as A
from albumentations.pytorch import ToTensorV2
import cv2, torch, numpy as np
import torch.nn.functional as F

# Build id→name lists aligned to the model head sizes (fills any gaps with placeholders)
NUM_MAIN = model.head_main.out_features
NUM_SUB  = model.head_sub.out_features

id2main = [f"Unknown-{i}" for i in range(NUM_MAIN)]
for mid, name in (train_df[["main_id","main_name"]].drop_duplicates().itertuples(index=False)):
    mid = int(mid)
    if 0 <= mid < NUM_MAIN:
        id2main[mid] = name

id2sub = [f"Unknown-{i}" for i in range(NUM_SUB)]
for sid, name in (train_df[["sub_id","sub_name"]].drop_duplicates().itertuples(index=False)):
    sid = int(sid)
    if 0 <= sid < NUM_SUB:
        id2sub[sid] = name

def preprocess_for_infer(path: str, size=224):
    im = cv2.imread(path, cv2.IMREAD_UNCHANGED)
    if im is None:
        raise FileNotFoundError(path)
    if im.ndim == 2:
        im = cv2.cvtColor(im, cv2.COLOR_GRAY2RGB)
    else:
        im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)

    tfm = A.Compose([
        A.LongestMaxSize(max_size=size),
        A.PadIfNeeded(size, size, border_mode=cv2.BORDER_CONSTANT, value=(0, 0, 0)),
        A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
        ToTensorV2(),
    ])
    x = tfm(image=im)["image"].unsqueeze(0)  # 1x3xHxW
    return x

@torch.no_grad()
def predict(image_path: str):
    model.eval()
    xb = preprocess_for_infer(image_path).to(device)
    lm, ls = model(xb)  # logits for main and sub heads
    pm = F.softmax(lm, dim=1)[0].cpu().numpy()  # probabilities (sum to 1)
    ps = F.softmax(ls, dim=1)[0].cpu().numpy()

    main_id = int(pm.argmax())
    sub_id  = int(ps.argmax())

    return {
        "main_name": id2main[main_id],
        "main_prob": float(pm[main_id]),
        "sub_name":  id2sub[sub_id],
        "sub_prob":  float(ps[sub_id]),
    }

# Example:
print(predict("/Users/nithilathawalampitiya/Downloads/pexels-simon-robben-55958-614810.jpg"))


{'main_name': 'Environmental issues', 'main_prob': 0.8481562733650208, 'sub_name': 'Littering / Garbage', 'sub_prob': 0.4348079562187195}


  A.PadIfNeeded(size, size, border_mode=cv2.BORDER_CONSTANT, value=(0, 0, 0)),
