# Baseline

## VGG

### 1) Imports & Configuration

In [None]:
import os
from pathlib import Path
import numpy as np
import pandas as pd
from PIL import Image

import torch
from torch import nn, optim
from torch.utils.data import Dataset, DataLoader
from torchvision import models, transforms

from tqdm import tqdm
from sklearn.model_selection import train_test_split

# Device
DEVICE     = torch.device("cuda" if torch.cuda.is_available() else "cpu")
PIN_MEMORY = True if DEVICE.type == "cuda" else False

# Paths
IMAGE_DIRS = ["train_images", "extra_images"]
TRAIN_CSV  = "train.csv"
EXTRA_CSV  = "train-extra.csv"

# ImageNet stats
MEAN = [0.485, 0.456, 0.406]
STD  = [0.229, 0.224, 0.225]


### 2) Load, Index and Filter Dataframe

In [82]:
def load_labels():
    sample = pd.read_csv(TRAIN_CSV, nrows=0)
    tags   = [c for c in sample.columns if c not in ("gridimage_id", "description")]

    def reindex(df):
        df = df[["gridimage_id"] + tags].copy()
        df["gridimage_id"] = df["gridimage_id"].astype(str).str.zfill(5)
        return df

    df1 = reindex(pd.read_csv(TRAIN_CSV))
    df2 = reindex(pd.read_csv(EXTRA_CSV))
    return pd.concat([df1, df2], ignore_index=True), tags

df, tag_cols = load_labels()
print(f"Initial samples: {len(df)}")

# Build ID → filepath map
VALID_EXTS = {".jpg", ".jpeg", ".png"}
id2path = {}
for root in IMAGE_DIRS:
    for fp in Path(root).rglob("*"):
        if fp.suffix.lower() in VALID_EXTS:
            stem = fp.stem
            if stem not in id2path:
                id2path[stem] = str(fp)
print(f"Images found on disk: {len(id2path)}")

# Keep only rows with existing images
df = df[df["gridimage_id"].isin(id2path)].reset_index(drop=True)
print(f"After image filter: {len(df)}")

# Filter by tag-count between 1 and 3
df["tag_count"] = df[tag_cols].sum(axis=1)
df = df[(df["tag_count"] >= 1) & (df["tag_count"] <= 5)].reset_index(drop=True)
print(f"After tag-count filter: {len(df)}")


Initial samples: 657235
Images found on disk: 648540
After image filter: 648534
After tag-count filter: 647094


### 3) Split and Transforms

In [83]:
train_df, val_df = train_test_split(df, test_size=0.2, shuffle=True, random_state=0)
print(f"Train / Val: {len(train_df)} / {len(val_df)}")

train_transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(256),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(MEAN, STD),
])
val_transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(256),
    transforms.ToTensor(),
    transforms.Normalize(MEAN, STD),
])


Train / Val: 517675 / 129419


### 4) Dataset and DataLoaders

In [90]:
class TagDataset(Dataset):
    def __init__(self, df, tags, id2path, transform):
        self.df        = df.reset_index(drop=True)
        self.tags      = tags
        self.id2path   = id2path
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row    = self.df.iloc[idx]
        path   = self.id2path.get(row.gridimage_id)
        img    = Image.open(path).convert("RGB") if path else Image.new("RGB", (256,256))
        img    = self.transform(img)
        label  = torch.tensor(row[self.tags].values.astype(np.float32))
        return img, label

train_ds = TagDataset(train_df, tag_cols, id2path, train_transform)
val_ds   = TagDataset(val_df,   tag_cols, id2path, val_transform)

train_loader = DataLoader(
    train_ds, batch_size=64, shuffle=True,
    num_workers=0, pin_memory=PIN_MEMORY
)
val_loader = DataLoader(
    val_ds, batch_size=64, shuffle=False,
    num_workers=0, pin_memory=PIN_MEMORY
)


### 5) Model, Loss, Opimiser, Scheduler

In [96]:
model = models.vgg16(weights=models.VGG16_Weights.IMAGENET1K_V1)
for p in model.features.parameters():
    p.requires_grad = False

in_feats = model.classifier[-1].in_features
model.classifier[-1] = nn.Linear(in_feats, len(tag_cols))
model = model.to(DEVICE)

criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.classifier.parameters(), lr=1e-4)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=2, factor=0.5)


### 6) Train and Validate

In [None]:
OUT_MODEL  = "vgg16_multi.pth"

def train_epoch():
    model.train()
    total = 0.0
    for imgs, labels in tqdm(train_loader, desc="Train", leave=False):
        imgs, labels = imgs.to(DEVICE), labels.to(DEVICE)
        optimizer.zero_grad()
        loss = criterion(model(imgs), labels)
        loss.backward()
        optimizer.step()
        total += loss.item() * imgs.size(0)
    return total / len(train_loader.dataset)

def eval_epoch():
    model.eval()
    total = 0.0
    with torch.no_grad():
        for imgs, labels in tqdm(val_loader, desc="Validate", leave=False):
            imgs, labels = imgs.to(DEVICE), labels.to(DEVICE)
            total += criterion(model(imgs), labels).item() * imgs.size(0)
    return total / len(val_loader.dataset)

best_val = float("inf")
for epoch in range(1, 11):
    tr = train_epoch()
    va = eval_epoch()
    scheduler.step(va)
    print(f"Epoch {epoch:02d} — train: {tr:.4f}  val: {va:.4f}")
    if va < best_val:
        best_val = va
        torch.save({"model_state": model.state_dict(), "tags": tag_cols}, OUT_MODEL)
        print("  ✨ Saved best model")


                                                           

KeyboardInterrupt: 

## Spare

### 2) Data Transforms & Dataset


In [81]:
# a) Define transforms (unchanged)
train_transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(256),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(MEAN, STD),
])
val_transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(256),
    transforms.ToTensor(),
    transforms.Normalize(MEAN, STD),
])

# b) Updated Dataset using id2path
class TagDataset(Dataset):
    def __init__(self, df, tag_cols, id2path, transform):
        self.df        = df.reset_index(drop=True)
        self.tags      = tag_cols
        self.id2path   = id2path      # dict: ID -> full filepath
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row    = self.df.iloc[idx]
        img_id = row.gridimage_id
        path   = self.id2path.get(img_id, None)

        if path is None:
            # Shouldn’t happen after filtering, but just in case
            img = Image.new("RGB", (256,256))
        else:
            img = Image.open(path).convert("RGB")

        img = self.transform(img)
        label = torch.tensor(row[self.tags].values.astype(np.float32))
        return img, label


### 3) Load & Prepare Labels


In [69]:
import os
import pandas as pd
from pathlib import Path

def load_labels():
    sample = pd.read_csv(TRAIN_CSV, nrows=0)
    tags   = [c for c in sample.columns if c not in ("gridimage_id", "description")]

    def reindex(df):
        df = df[["gridimage_id"] + tags].copy()
        # pad or cast as strings if needed
        df["gridimage_id"] = df["gridimage_id"].astype(str).str.zfill(5)
        return df

    df1 = reindex(pd.read_csv(TRAIN_CSV))
    df2 = reindex(pd.read_csv(EXTRA_CSV))
    all_df = pd.concat([df1, df2], ignore_index=True)
    return all_df, tags

df, tag_cols = load_labels()
print(f"Before filtering: {len(df)} samples")

# ── Build a map from ID → full filepath ──────────────────────────────────
IMAGE_DIRS = ["train_images", "extra_images"]
VALID_EXTS = {".jpg", ".jpeg", ".png"}

id2path = {}
for root in IMAGE_DIRS:
    for filepath in Path(root).rglob("*"):
        if filepath.suffix.lower() in VALID_EXTS:
            stem = filepath.stem  # filename without extension
            # only keep the first occurrence if duplicates exist
            if stem not in id2path:
                id2path[stem] = str(filepath)

print(f"Found {len(id2path)} images on disk")

# ── Filter DataFrame to only IDs we have files for ──────────────────────
df = df[df["gridimage_id"].isin(id2path)].reset_index(drop=True)
print(f"After filtering:  {len(df)} samples remain")

# ── Filter image that has tags <=5 labels from training set ───────────────────────
# Filter out images with 5 or fewer tags
df["tag_count"] = df[tag_cols].sum(axis=1)
df = df[df["tag_count"] <= 5].reset_index(drop=True)
print(f"After filtering:  {len(df)} samples remain")


Before filtering: 657235 samples
Found 648540 images on disk
After filtering:  648534 samples remain
After filtering:  647094 samples remain


### 4) Split & Create DataLoaders


In [None]:
from sklearn.model_selection import train_test_split

train_df, val_df = train_test_split(df, test_size=0.2, random_state=0)

train_ds = TagDataset(train_df, tag_cols, IMAGE_DIRS, train_transform)
val_ds   = TagDataset(val_df,   tag_cols, IMAGE_DIRS, val_transform)

train_loader = DataLoader(train_ds, batch_size=32, shuffle=True,  num_workers=4, pin_memory=True)
val_loader   = DataLoader(val_ds,   batch_size=32, shuffle=False, num_workers=4, pin_memory=True)


In [70]:
# 1) Build one unified Dataset over your already-filtered df
full_ds = TagDataset(df, tag_cols, id2path, train_transform)

# 2) Compute train/val sizes
n = len(full_ds)
n_val   = int(n * 0.2)      # 20% for validation
n_train = n - n_val

# 3) Randomly split (with a fixed seed for reproducibility)
train_ds, val_ds = random_split(
    full_ds,
    [n_train, n_val],
    generator=torch.Generator().manual_seed(0)
)

# 4) DataLoaders exactly as before
train_loader = DataLoader(
    train_ds,
    batch_size=32,
    shuffle=True,
    num_workers=4,
    pin_memory=True
)
val_loader = DataLoader(
    val_ds,
    batch_size=32,
    shuffle=False,
    num_workers=4,
    pin_memory=True
)


In [75]:
print(len(train_loader.dataset))
print(len(val_loader.dataset))

517676
129418


### 5) Build & Freeze VGG16

In [76]:
# Load pretrained VGG16
model = models.vgg16(weights=models.VGG16_Weights.IMAGENET1K_V1)

# Freeze feature extractor
for param in model.features.parameters():
    param.requires_grad = False

# Replace classifier head
in_feats = model.classifier[-1].in_features
model.classifier[-1] = nn.Linear(in_feats, len(tag_cols))

model = model.to(DEVICE)


### 6) Loss, Optimizer & (Optional) Scheduler

In [77]:
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.classifier.parameters(), lr=1e-4)
# optional: reduce LR on plateau
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=2, factor=0.5)


### 7) Train & Validate Loops

In [78]:
from tqdm import tqdm  # plain progress bar

# DataLoaders
train_loader = DataLoader(train_ds, batch_size=128,
                          shuffle=True, num_workers=0, pin_memory=False)
val_loader   = DataLoader(val_ds,   batch_size=128,
                          shuffle=False, num_workers=0, pin_memory=False)

# Training / Validation loops
def train_epoch():
    model.train()
    total_loss = 0.0
    for imgs, labels in tqdm(train_loader, desc="Train", leave=False):
        imgs, labels = imgs.to(DEVICE), labels.to(DEVICE)
        optimizer.zero_grad()
        logits = model(imgs)
        loss   = criterion(logits, labels)
        loss.backward()
        optimizer.step()
        total_loss += loss.item() * imgs.size(0)
    return total_loss / len(train_loader.dataset)

def eval_epoch():
    model.eval()
    total_loss = 0.0
    with torch.no_grad():
        for imgs, labels in tqdm(val_loader, desc="Validate", leave=False):
            imgs, labels = imgs.to(DEVICE), labels.to(DEVICE)
            logits = model(imgs)
            total_loss += criterion(logits, labels).item() * imgs.size(0)
    return total_loss / len(val_loader.dataset)

# Main loop
best_val = float("inf")
for epoch in range(1, 11):
    tr_loss = train_epoch()
    va_loss = eval_epoch()
    scheduler.step(va_loss)
    print(f"Epoch {epoch:02d} — train: {tr_loss:.4f}  val: {va_loss:.4f}")
    if va_loss < best_val:
        best_val = va_loss
        torch.save({"model_state": model.state_dict(),
                    "tags": tag_cols}, OUT_MODEL)
        print("  ✨ saved best model")


                                                          

KeyboardInterrupt: 

### 8) Inference: Top-3 + Threshold + At Least-1



In [None]:
def predict_tags(model, imgs, tags, thresh=0.5, topk=3):
    model.eval()
    with torch.no_grad():
        logits = model(imgs.to(DEVICE))
        probs  = torch.sigmoid(logits)
    top_vals, top_idx = probs.topk(topk, dim=1)

    all_preds = []
    for vals, idxs in zip(top_vals, top_idx):
        # pick those above threshold
        picked = [tags[i] for i, v in zip(idxs.tolist(), vals.tolist()) if v >= thresh]
        # fallback if none pass
        if not picked:
            picked = [tags[idxs[0].item()]]
        all_preds.append(picked)
    return all_preds

# Example on a batch:
imgs, _ = next(iter(val_loader))
predicted = predict_tags(model, imgs, tag_cols)
print(predicted)


## EfficientNet

### 5) EfficientNet-B0 Setup

In [97]:
from torchvision.models import efficientnet_b0, EfficientNet_B0_Weights

# 1) Load pretrained EfficientNet-B0
model = efficientnet_b0(weights=EfficientNet_B0_Weights.IMAGENET1K_V1)

# 2) Freeze the convolutional features
for param in model.features.parameters():
    param.requires_grad = False

# 3) Swap in a new classification head (dropout → linear)
in_feats = model.classifier[1].in_features
model.classifier = nn.Sequential(
    nn.Dropout(0.2),
    nn.Linear(in_feats, len(tag_cols))
)

# 4) Move to device
model = model.to(DEVICE)

# 5) Loss, optimizer, scheduler (same as before)
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.classifier.parameters(), lr=1e-4)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=2, factor=0.5)


### 6) Train and Validate

In [98]:
OUT_MODEL = "efficientnet_multi.pth"
def train_epoch():
    model.train()
    total = 0.0
    for imgs, labels in tqdm(train_loader, desc="Train", leave=False):
        imgs, labels = imgs.to(DEVICE), labels.to(DEVICE)
        optimizer.zero_grad()
        loss = criterion(model(imgs), labels)
        loss.backward()
        optimizer.step()
        total += loss.item() * imgs.size(0)
    return total / len(train_loader.dataset)

def eval_epoch():
    model.eval()
    total = 0.0
    with torch.no_grad():
        for imgs, labels in tqdm(val_loader, desc="Validate", leave=False):
            imgs, labels = imgs.to(DEVICE), labels.to(DEVICE)
            total += criterion(model(imgs), labels).item() * imgs.size(0)
    return total / len(val_loader.dataset)

best_val = float("inf")
for epoch in range(1, 11):
    tr = train_epoch()
    va = eval_epoch()
    scheduler.step(va)
    print(f"Epoch {epoch:02d} — train: {tr:.4f}  val: {va:.4f}")
    if va < best_val:
        best_val = va
        torch.save({"model_state": model.state_dict(), "tags": tag_cols}, OUT_MODEL)
        print("  ✨ Saved best model")


                                                           

KeyboardInterrupt: 