In [None]:
# Imports, config, utils

import os, json, time, random, shutil, gc
from pathlib import Path
import numpy as np
import pandas as pd
from PIL import Image

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as T

from sklearn.model_selection import StratifiedKFold
from sklearn.svm import SVR
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

import timm

def set_seed(seed=42):
    random.seed(seed); np.random.seed(seed); torch.manual_seed(seed); torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.benchmark = True

def rmse_score(y_true, y_pred):
    y_true = np.asarray(y_true, dtype=np.float32)
    y_pred = np.asarray(y_pred, dtype=np.float32)
    return float(np.sqrt(np.mean((y_true - y_pred) ** 2)))

def save_json(obj, path):
    with open(path, "w") as f:
        json.dump(obj, f, indent=2)

print("Torch", torch.__version__, "| CUDA:", torch.cuda.is_available())

class CFG:
    train_csv = "src/inputs/train.csv"
    train_imgdir = "src/inputs/train"
    test_csv = "src/inputs/test.csv"
    test_imgdir = "src/inputs/test"
    sample_submission = "src/inputs/sample_submission.csv"
    outdir = "outputs_mtl_piq_brisque"
    model_name = "swin_large_patch4_window12_384.ms_in22k"
    img_size = 384
    folds = 5
    epochs = 10
    patience = 5
    batch_size = 8
    lr = 2e-5
    weight_decay = 1e-4
    seed = 42
    tta = 1
    w_paw = 1.0   # Pawpularity loss weight
    w_bq  = 0.5   # BRISQUE loss weight

set_seed(CFG.seed)
os.makedirs(CFG.outdir, exist_ok=True)


Torch 2.6.0+cu124 | CUDA: True


In [3]:
# B) PIQ BRISQUE utilities

from piq import brisque as piq_brisque
from PIL import Image

def load_tensor_rgb01(path, img_size=None):
    img = Image.open(path).convert("RGB")
    if img_size is not None:
        tfm = T.Compose([
            T.Resize(int(img_size*1.15)),
            T.CenterCrop(img_size),
            T.ToTensor(),           # 0..1 RGB
        ])
        x = tfm(img)
    else:
        x = T.ToTensor()(img)
    return x

def compute_brisque_piq_for_df(df, imgdir, id_col="Id", img_ext=".jpg", img_size=384, batch_size=16, device="cuda"):
    ids = df[id_col].tolist()
    xs = []
    for img_id in ids:
        p = Path(imgdir) / f"{img_id}{img_ext}"
        x = load_tensor_rgb01(p, img_size=img_size)    # 0..1 RGB
        xs.append(x)
    X = torch.stack(xs, 0)
    if torch.cuda.is_available() and device == "cuda":
        X = X.cuda()
    scores = []
    for i in range(0, len(X), batch_size):
        s = piq_brisque(X[i:i+batch_size], data_range=1.0, reduction='none')  # (B,)
        scores.append(s.detach().cpu())
    scores = torch.cat(scores, 0).float().numpy()
    return scores.astype(np.float32)


In [None]:
#  Load CSVs, compute PIQ BRISQUE, create robust bins

df = pd.read_csv(CFG.train_csv)
test_df = pd.read_csv(CFG.test_csv)

if "Pawpularity" in test_df.columns:
    test_df = test_df.drop(columns=["Pawpularity"])

print("Computing PIQ BRISQUE for train...")
df["brisque"] = compute_brisque_piq_for_df(df, CFG.train_imgdir, img_size=CFG.img_size)
print("Computing PIQ BRISQUE for test...")
test_df["brisque"] = compute_brisque_piq_for_df(test_df, CFG.test_imgdir, img_size=CFG.img_size)


df["Pawpularity"] = pd.to_numeric(df["Pawpularity"], errors="coerce")
df = df.dropna(subset=["Pawpularity"]).reset_index(drop=True)


n_unique = df["Pawpularity"].nunique()
n_bins = int(min(10, max(3, n_unique)))
try:
    df["_bin"] = pd.qcut(df["Pawpularity"], q=n_bins, labels=False, duplicates="drop")
except Exception:
    df["_bin"] = pd.cut(df["Pawpularity"], bins=n_bins, labels=False, include_lowest=True)

if df["_bin"].isna().any():
    ranks = df["Pawpularity"].rank(method="average")
    df["_bin"] = pd.qcut(ranks, q=n_bins, labels=False, duplicates="drop")

df["_bin"] = df["_bin"].astype(int)

print("Bin value counts:\n", df["_bin"].value_counts(dropna=False).sort_index())
print("Train/Test shapes:", df.shape, test_df.shape)


Computing PIQ BRISQUE for train...
Computing PIQ BRISQUE for test...
Bin value counts:
 _bin
0    1100
1     998
2    1158
3     940
4     842
5     948
6    1074
7     921
8     953
9     978
Name: count, dtype: int64
Train/Test shapes: (9912, 16) (8, 14)


In [None]:
# Dataset

class PawDataset(Dataset):
    def __init__(self, df, img_dir, img_size=384, is_train=True):
        self.df = df.reset_index(drop=True)
        self.img_dir = Path(img_dir)
        self.is_train = is_train
        if is_train:
            self.transform = T.Compose([
                T.RandomResizedCrop(img_size, scale=(0.8, 1.0), ratio=(0.9, 1.1)),
                T.RandomHorizontalFlip(p=0.5),
                T.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.05),
                T.ToTensor(),
                T.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
            ])
        else:
            self.transform = T.Compose([
                T.Resize(int(img_size*1.15)),
                T.CenterCrop(img_size),
                T.ToTensor(),
                T.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
            ])

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        img_id = row["Id"]
        img = Image.open(self.img_dir / f"{img_id}.jpg").convert("RGB")
        img = self.transform(img)
        y = row["Pawpularity"] if "Pawpularity" in row.index else np.nan
        y_norm = np.float32(y / 100.0) if not np.isnan(y) else np.nan
        bq = np.float32(row["brisque"])
        return {"image": img, "id": img_id, "target_norm": y_norm, "brisque": bq}


In [None]:
#  Multi-task model: Swin backbone + Pawpularity & BRISQUE heads

class SwinBackboneMulti(nn.Module):
    def __init__(self, model_name="swin_large_patch4_window12_384.ms_in22k", emb_dim=128, pretrained=True):
        super().__init__()
        self.backbone = timm.create_model(model_name, pretrained=pretrained, num_classes=0)
        in_features = self.backbone.num_features
        self.emb = nn.Linear(in_features, emb_dim)
        self.head_paw = nn.Linear(emb_dim, 1)   # Pawpularity 
        self.head_bq  = nn.Linear(emb_dim, 1)   # BRISQUE 

    def forward(self, x, return_embedding=False):
        feats = self.backbone(x)
        emb = F.relu(self.emb(feats), inplace=True)
        paw_logit = self.head_paw(emb).squeeze(1)
        bq_pred   = self.head_bq(emb).squeeze(1)
        out = {"paw_logit": paw_logit, "brisque_pred": bq_pred}
        if return_embedding:
            return out, emb
        return out


In [None]:
#  Train/validate and extract embeddings

def train_one_epoch(model, loader, optimizer, scaler, device, w_paw=1.0, w_bq=0.5):
    model.train()
    bce = nn.BCEWithLogitsLoss()
    l1  = nn.SmoothL1Loss(beta=1.0)
    losses = []
    for batch in loader:
        x = batch["image"].to(device, non_blocking=True)
        y_paw = batch["target_norm"].to(device)  # 0..1
        y_bq  = batch["brisque"].to(device)      # ~0..100
        optimizer.zero_grad(set_to_none=True)
        with torch.amp.autocast(device_type="cuda", enabled=True):
            out = model(x)
            loss_paw = bce(out["paw_logit"], y_paw)
            loss_bq  = l1(out["brisque_pred"], y_bq)
            loss = w_paw*loss_paw + w_bq*loss_bq
        scaler.scale(loss).backward()
        scaler.step(optimizer); scaler.update()
        losses.append(loss.item())
        del x, y_paw, y_bq
        torch.cuda.empty_cache()
    return float(np.mean(losses))

def validate_epoch(model, loader, device):
    model.eval()
    preds, gts = [], []
    for batch in loader:
        x = batch["image"].to(device, non_blocking=True)
        y = batch["target_norm"].numpy()
        out = model(x)
        prob = torch.sigmoid(out["paw_logit"]).detach().cpu().numpy() * 100.0
        preds.append(prob); gts.append(y * 100.0)
        del x
        torch.cuda.empty_cache()
    preds = np.concatenate(preds); gts = np.concatenate(gts)
    return rmse_score(gts, preds)

def extract_embeddings(model, loader, device):
    model.eval()
    ids, embs, preds_paw, preds_bq = [], [], [], []
    for batch in loader:
        x = batch["image"].to(device, non_blocking=True)
        out, emb = model(x, return_embedding=True)
        paw = torch.sigmoid(out["paw_logit"]).detach().cpu().numpy() * 100.0
        bq  = out["brisque_pred"].detach().cpu().numpy()
        embs.append(emb.detach().cpu().numpy())
        preds_paw.append(paw)
        preds_bq.append(bq)
        ids.extend(batch["id"])
        del x
        torch.cuda.empty_cache()
    return ids, np.concatenate(embs), np.concatenate(preds_paw), np.concatenate(preds_bq)


In [None]:
#  Cross-validation loop

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
skf = StratifiedKFold(n_splits=CFG.folds, shuffle=True, random_state=CFG.seed)

oof_rows = []
test_preds_folds = []
fold_summaries = {}

for fold, (tr_idx, va_idx) in enumerate(skf.split(df, df["_bin"])):
    print(f"\n========== Fold {fold} ==========")
    fold_dir = Path(CFG.outdir) / f"fold{fold}"
    if fold_dir.exists():
        shutil.rmtree(fold_dir)
    fold_dir.mkdir(parents=True, exist_ok=True)

    tr_df = df.iloc[tr_idx].copy().reset_index(drop=True)
    va_df = df.iloc[va_idx].copy().reset_index(drop=True)

    train_ds = PawDataset(tr_df, CFG.train_imgdir, img_size=CFG.img_size, is_train=True)
    valid_ds = PawDataset(va_df, CFG.train_imgdir, img_size=CFG.img_size, is_train=False)
    test_ds  = PawDataset(test_df.assign(Pawpularity=np.nan), CFG.test_imgdir, img_size=CFG.img_size, is_train=False)

    train_loader = DataLoader(train_ds, batch_size=CFG.batch_size, shuffle=True, num_workers=4, pin_memory=True)
    valid_loader = DataLoader(valid_ds, batch_size=CFG.batch_size, shuffle=False, num_workers=4, pin_memory=True)
    test_loader  = DataLoader(test_ds,  batch_size=CFG.batch_size, shuffle=False, num_workers=4, pin_memory=True)

    model = SwinBackboneMulti(model_name=CFG.model_name, emb_dim=128, pretrained=True).to(device)
    optimizer = torch.optim.AdamW(model.parameters(), lr=CFG.lr, weight_decay=CFG.weight_decay)
    scaler = torch.amp.GradScaler(device="cuda", enabled=True)

    best_rmse = float("inf")
    best_path = fold_dir / "best.pt"
    patience_count = 0

    for epoch in range(1, CFG.epochs + 1):
        t0 = time.time()
        tr_loss = train_one_epoch(model, train_loader, optimizer, scaler, device, CFG.w_paw, CFG.w_bq)
        va_rmse = validate_epoch(model, valid_loader, device)
        print(f"Fold {fold} | Epoch {epoch} | Train loss {tr_loss:.4f} | Val RMSE {va_rmse:.4f} | {time.time()-t0:.1f}s")
        if va_rmse < best_rmse - 1e-4:
            best_rmse = va_rmse
            patience_count = 0
            torch.save({"state_dict": model.state_dict(), "rmse": best_rmse}, best_path)
        else:
            patience_count += 1
            if patience_count >= CFG.patience:
                print(f"Early stopping at epoch {epoch}, best RMSE {best_rmse:.4f}")
                break
        torch.cuda.empty_cache()

    ckpt = torch.load(best_path, map_location="cpu")
    model.load_state_dict(ckpt["state_dict"])
    del optimizer, scaler, ckpt
    gc.collect(); torch.cuda.empty_cache()

    va_ids, va_embs, va_paw, va_bq = extract_embeddings(model, valid_loader, device)
    te_ids, te_embs, te_paw, te_bq = extract_embeddings(model, test_loader, device)
    tr_ids, tr_embs, tr_paw, tr_bq = extract_embeddings(model, train_loader, device)

    svr = Pipeline([("scaler", StandardScaler()), ("svr", SVR(C=10.0, epsilon=0.2, kernel="rbf"))])
    y_tr = tr_df["Pawpularity"].values.astype(np.float32)
    svr.fit(tr_embs, y_tr)

    va_svr = svr.predict(va_embs).astype(np.float32)
    te_svr = svr.predict(te_embs).astype(np.float32)

    va_ens = (va_paw + va_svr) / 2.0
    te_ens = (te_paw + te_svr) / 2.0

    for i, vid in enumerate(va_ids):
        oof_rows.append({
            "Id": vid,
            "fold": fold,
            "y_true": float(va_df.loc[va_df["Id"] == vid, "Pawpularity"].values[0]),
            "pred_mlp": float(va_paw[i]),
            "pred_svr": float(va_svr[i]),
            "pred_ens": float(va_ens[i]),
            "pred_brisque": float(va_bq[i]),
        })

    np.save(fold_dir / "val_embeddings.npy", va_embs)
    np.save(fold_dir / "train_embeddings.npy", tr_embs)
    np.save(fold_dir / "test_embeddings.npy", te_embs)
    np.save(fold_dir / "val_pred_mlp.npy", va_paw)
    np.save(fold_dir / "val_pred_svr.npy", va_svr)
    np.save(fold_dir / "val_pred_ens.npy", va_ens)
    np.save(fold_dir / "test_pred_mlp.npy", te_paw)
    np.save(fold_dir / "test_pred_svr.npy", te_svr)
    np.save(fold_dir / "test_pred_ens.npy", te_ens)
    np.save(fold_dir / "val_pred_brisque.npy", va_bq)
    np.save(fold_dir / "test_pred_brisque.npy", te_bq)
    torch.save(svr, fold_dir / "svr.pkl")

    fold_summaries[f"fold_{fold}"] = {"best_val_rmse": best_rmse, "val_count": len(va_ids), "test_count": len(te_ids)}
    test_preds_folds.append(pd.DataFrame({"Id": te_ids, f"fold{fold}": te_ens}))

    del train_loader, valid_loader, test_loader
    del train_ds, valid_ds, test_ds
    del tr_embs, va_embs, te_embs, tr_paw, va_paw, te_paw, va_svr, te_svr, va_ens, te_ens
    del model, svr
    gc.collect(); torch.cuda.empty_cache()



Fold 0 | Epoch 1 | Train loss 3.9356 | Val RMSE 20.1494 | 1499.1s
Fold 0 | Epoch 2 | Train loss 2.8710 | Val RMSE 19.5414 | 4331.5s
Fold 0 | Epoch 3 | Train loss 2.5800 | Val RMSE 19.2912 | 3956.6s
Fold 0 | Epoch 4 | Train loss 2.3841 | Val RMSE 19.3116 | 3969.8s
Fold 0 | Epoch 5 | Train loss 2.1783 | Val RMSE 18.5916 | 5053.9s
Fold 0 | Epoch 6 | Train loss 2.0478 | Val RMSE 18.7837 | 4670.8s
Fold 0 | Epoch 7 | Train loss 1.8707 | Val RMSE 18.7320 | 4542.5s
Fold 0 | Epoch 8 | Train loss 1.7737 | Val RMSE 18.9551 | 4896.4s
Fold 0 | Epoch 9 | Train loss 1.6296 | Val RMSE 18.6201 | 4312.3s
Fold 0 | Epoch 10 | Train loss 1.5435 | Val RMSE 18.8069 | 4716.4s
Early stopping at epoch 10, best RMSE 18.5916

Fold 1 | Epoch 1 | Train loss 3.9496 | Val RMSE 20.7110 | 5614.0s
Fold 1 | Epoch 2 | Train loss 2.8897 | Val RMSE 20.2042 | 5678.6s
Fold 1 | Epoch 3 | Train loss 2.5904 | Val RMSE 19.6665 | 5671.3s
Fold 1 | Epoch 4 | Train loss 2.3869 | Val RMSE 19.4245 | 5409.0s
Fold 1 | Epoch 5 | Train lo

In [None]:
#  Save OOF and submission

oof_df = pd.DataFrame(oof_rows)
oof_df.to_csv(Path(CFG.outdir) / "oof_predictions.csv", index=False)
oof_rmse = rmse_score(oof_df["y_true"].values, oof_df["pred_ens"].values)
print(f"OOF RMSE (ensemble): {oof_rmse:.4f}")
fold_summaries = {"oof_rmse": oof_rmse, **fold_summaries}
save_json(fold_summaries, Path(CFG.outdir) / "fold_summaries.json")

sub_base = pd.read_csv(CFG.sample_submission)
sub = sub_base[["Id"]].copy()
for df_fold in test_preds_folds:
    sub = sub.merge(df_fold, on="Id", how="left")
fold_cols = [c for c in sub.columns if c.startswith("fold")]
sub["Pawpularity"] = sub[fold_cols].mean(axis=1).clip(1, 100)
sub[["Id", "Pawpularity"]].to_csv(Path(CFG.outdir) / "submission.csv", index=False)
print("Saved:", Path(CFG.outdir) / "submission.csv")


In [None]:
#  Ensembling

oof_path = Path(CFG.outdir) / "oof_predictions.csv"
oof = pd.read_csv(oof_path)

y_true   = oof["y_true"].values.astype(np.float32)
pred_mlp = oof["pred_mlp"].values.astype(np.float32)
pred_svr = oof["pred_svr"].values.astype(np.float32)

def rmse(y_true, y_pred):
    return np.sqrt(np.mean((y_true - y_pred)**2))

rmse_mlp = rmse(y_true, pred_mlp)
rmse_svr = rmse(y_true, pred_svr)
rmse_avg = rmse(y_true, 0.5 * pred_mlp + 0.5 * pred_svr)

def find_best_weight(y_true, mlp, svr):
    best_w = 0.0
    best_rmse = 999.0
    for w in np.linspace(0, 1, 1001):
        blended = w * mlp + (1 - w) * svr
        r = rmse(y_true, blended)
        if r < best_rmse:
            best_rmse = r
            best_w = w
    return best_w, best_rmse

best_w, best_rmse = find_best_weight(y_true, pred_mlp, pred_svr)

print("=======================================")
print("           OOF RMSE RESULTS")
print("=======================================")
print(f" MLP only RMSE              : {rmse_mlp:.6f}")
print(f" SVR only RMSE              : {rmse_svr:.6f}")
print(f" Simple Average (0.5/0.5)   : {rmse_avg:.6f}")
print("---------------------------------------")
print(f" Optimal Blend Weight (MLP) : {best_w:.4f}")
print(f" Optimal Blend RMSE         : {best_rmse:.6f}")
print("=======================================")
