### Train a ViT regressor (5-fold) and emit OOF/TEST preds

In [1]:
# === Finetune ViT-B/16 @384 on Pawpularity (BCEWithLogits on y/100) ===
from pathlib import Path
import numpy as np, pandas as pd, torch, timm, math, time
from torch import nn
from torch.utils.data import Dataset, DataLoader, Subset
from PIL import Image
from tqdm import tqdm
from timm.data import create_transform
try:
    from timm.data import resolve_data_config
except:
    from timm.data import resolve_model_data_config as resolve_data_config
from sklearn.model_selection import KFold

ROOT = Path("/workspace/pet-finder")
DATA = ROOT/"data"/"raw"
PROC = ROOT/"data"/"processed"; PROC.mkdir(parents=True, exist_ok=True)

device = "cuda" if torch.cuda.is_available() else "cpu"
torch.backends.cudnn.benchmark = (device == "cuda")
scaler = torch.cuda.amp.GradScaler(enabled=(device=="cuda"))

# --- IDs (same order as your SVR step) ---
train_df = pd.read_csv(DATA/"train.csv")
ids_train = train_df["Id"].astype(str).tolist()
ids_test  = sorted(p.stem for p in (DATA/"test").glob("*.jpg"))
y_all = train_df["Pawpularity"].values.astype("float32") / 100.0  # scale to 0..1

# --- Transforms (use model’s cfg; eval tfm for val/test, train tfm for train) ---
model_name = "vit_base_patch16_384"; img_size = 384
_dummy = timm.create_model(model_name, pretrained=True, num_classes=1)
cfg = resolve_data_config({}, model=_dummy)
def make_tfm(is_train: bool):
    C = dict(cfg)  # copy
    C.update({"input_size": (3, img_size, img_size), "is_training": is_train})
    return create_transform(**C)

tfm_train = make_tfm(True)
tfm_eval  = make_tfm(False)

# --- Dataset ---
class PetDataset(Dataset):
    def __init__(self, split, ids, targets=None, tfm=None):
        self.split, self.ids, self.targets, self.tfm = split, ids, targets, tfm
        self.root = DATA/split
    def __len__(self): return len(self.ids)
    def __getitem__(self, i):
        sid = self.ids[i]
        img = Image.open(self.root/f"{sid}.jpg").convert("RGB")
        x = self.tfm(img)
        if self.targets is None: return x, sid
        return x, self.targets[i]

# --- Training helpers ---
def rmse_np(a, b):  # a,b are numpy arrays
    return float(np.sqrt(np.mean((a - b)**2)))

def train_one_epoch(model, loader, opt, loss_fn):
    model.train()
    total = 0.0
    for xb, yb in tqdm(loader, leave=False):
        xb, yb = xb.to(device), yb.to(device)
        opt.zero_grad(set_to_none=True)
        with torch.cuda.amp.autocast(enabled=(device=="cuda")):
            logits = model(xb).squeeze(-1)
            loss = loss_fn(logits, yb)
        scaler.scale(loss).backward()
        scaler.step(opt)
        scaler.update()
        total += loss.item() * xb.size(0)
    return total / len(loader.dataset)

@torch.no_grad()
def predict(model, loader, tta_hflip=True):
    model.eval()
    preds = []
    for xb in tqdm(loader, leave=False):
        if isinstance(xb, (list, tuple)): xb = xb[0]
        xb = xb.to(device)
        with torch.cuda.amp.autocast(enabled=(device=="cuda")):
            logits = model(xb).squeeze(-1)
            if tta_hflip:
                logits = 0.5*logits + 0.5*model(torch.flip(xb, dims=[-1])).squeeze(-1)
        preds.append(torch.sigmoid(logits).float().cpu().numpy() * 100.0)  # back to 0..100
    return np.concatenate(preds)

# --- 5-fold CV (same KFold seed/logic as SVR) ---
EPOCHS = 6
BATCH  = 32
LR     = 2e-4
WD     = 1e-4

oof = np.zeros(len(ids_train), dtype="float32")
test_pred_accum = np.zeros(len(ids_test), dtype="float32")

kf = KFold(n_splits=5, shuffle=True, random_state=42)
for fold, (tr_idx, va_idx) in enumerate(kf.split(ids_train), 1):
    print(f"\n===== Fold {fold} =====")
    # datasets
    ds_tr = PetDataset("train", [ids_train[i] for i in tr_idx], y_all[tr_idx], tfm_train)
    ds_va = PetDataset("train", [ids_train[i] for i in va_idx], y_all[va_idx], tfm_eval)
    ds_te = PetDataset("test",  ids_test, None, tfm_eval)

    dl_tr = DataLoader(ds_tr, batch_size=BATCH, shuffle=True,  num_workers=4, pin_memory=True)
    dl_va = DataLoader(ds_va, batch_size=BATCH, shuffle=False, num_workers=4, pin_memory=True)
    dl_te = DataLoader(ds_te, batch_size=BATCH, shuffle=False, num_workers=4, pin_memory=True)

    # model, opt, loss
    model = timm.create_model(model_name, pretrained=True, num_classes=1).to(device)
    opt   = torch.optim.AdamW(model.parameters(), lr=LR, weight_decay=WD)
    loss_fn = nn.BCEWithLogitsLoss()

    best_rmse, best_state = float("inf"), None
    for epoch in range(1, EPOCHS+1):
        tr_loss = train_one_epoch(model, dl_tr, opt, loss_fn)
        # val
        va_pred = predict(model, dl_va)              # 0..100
        va_true = (y_all[va_idx] * 100.0).astype("float32")
        rmse = rmse_np(va_true, va_pred)
        print(f"Epoch {epoch}: train_loss={tr_loss:.4f} | val_RMSE={rmse:.4f}")
        if rmse < best_rmse:
            best_rmse = rmse
            best_state = {k: v.detach().cpu() for k, v in model.state_dict().items()}
    print(f"Best fold RMSE: {best_rmse:.4f}")

    # load best and produce final val/test preds
    if best_state is not None:
        model.load_state_dict(best_state, strict=False)
    oof[va_idx] = predict(model, dl_va)
    test_pred_accum += predict(model, dl_te) / kf.get_n_splits()

# Save finetuned predictions for blending
np.save(PROC/"FINETUNED_vitb16_384_train.npy", oof.astype("float32"))
np.save(PROC/"FINETUNED_vitb16_384_test.npy",  test_pred_accum.astype("float32"))
print("Saved FINETUNED_vitb16_384_{train,test}.npy")

  scaler = torch.cuda.amp.GradScaler(enabled=(device=="cuda"))



===== Fold 1 =====


  with torch.cuda.amp.autocast(enabled=(device=="cuda")):
  with torch.cuda.amp.autocast(enabled=(device=="cuda")):
                                               

Epoch 1: train_loss=0.7665 | val_RMSE=22.4000


                                                 

Epoch 2: train_loss=0.6713 | val_RMSE=21.0283


                                                 

Epoch 3: train_loss=0.6702 | val_RMSE=21.3528


                                                 

Epoch 4: train_loss=0.6684 | val_RMSE=22.5960


                                                 

Epoch 5: train_loss=0.6672 | val_RMSE=21.2462


                                                 

Epoch 6: train_loss=0.6678 | val_RMSE=21.0315
Best fold RMSE: 21.0283


                                               


===== Fold 2 =====


                                                 

Epoch 1: train_loss=0.7458 | val_RMSE=21.4845


                                                 

Epoch 2: train_loss=0.6720 | val_RMSE=20.5382


                                                 

Epoch 3: train_loss=0.6725 | val_RMSE=20.5966


                                                 

Epoch 4: train_loss=0.6677 | val_RMSE=20.5624


                                                 

Epoch 5: train_loss=0.6706 | val_RMSE=25.7686


                                                 

Epoch 6: train_loss=0.6703 | val_RMSE=20.6824
Best fold RMSE: 20.5382


                                               


===== Fold 3 =====


                                                 

Epoch 1: train_loss=0.7398 | val_RMSE=20.6498


                                                 

Epoch 2: train_loss=0.6713 | val_RMSE=20.2451


                                                 

Epoch 3: train_loss=0.6697 | val_RMSE=20.3899


                                                 

Epoch 4: train_loss=0.6703 | val_RMSE=20.0585


                                                 

Epoch 5: train_loss=0.6694 | val_RMSE=20.2984


                                                 

Epoch 6: train_loss=0.6680 | val_RMSE=20.0670
Best fold RMSE: 20.0585


                                               


===== Fold 4 =====


                                                 

Epoch 1: train_loss=0.7271 | val_RMSE=20.5512


                                                 

Epoch 2: train_loss=0.6718 | val_RMSE=20.5529


                                                 

Epoch 3: train_loss=0.6732 | val_RMSE=20.7039


                                                 

Epoch 4: train_loss=0.6675 | val_RMSE=20.7708


                                                 

Epoch 5: train_loss=0.6698 | val_RMSE=21.6838


                                                 

Epoch 6: train_loss=0.6667 | val_RMSE=20.5132
Best fold RMSE: 20.5132


                                               


===== Fold 5 =====


                                                 

Epoch 1: train_loss=0.7338 | val_RMSE=20.8532


                                                 

Epoch 2: train_loss=0.6709 | val_RMSE=20.6634


                                                 

Epoch 3: train_loss=0.6658 | val_RMSE=21.3546


                                                 

Epoch 4: train_loss=0.6684 | val_RMSE=20.8871


                                                 

Epoch 5: train_loss=0.6672 | val_RMSE=20.7788


                                                 

Epoch 6: train_loss=0.6684 | val_RMSE=20.6385
Best fold RMSE: 20.6385


                                               

Saved FINETUNED_vitb16_384_{train,test}.npy




### Blend the finetuned model with your SVR stack (OOF-based weights)

In [2]:
from pathlib import Path
import numpy as np, pandas as pd
from sklearn.linear_model import Ridge

ROOT = Path("/workspace/pet-finder")
DATA = ROOT/"data"/"raw"
PROC = ROOT/"data"/"processed"

y = pd.read_csv(DATA/"train.csv")["Pawpularity"].values.astype("float32")
ids_test  = sorted(p.stem for p in (DATA/"test").glob("*.jpg"))

# Load OOF/TEST columns
oof_svr  = np.load(PROC/"SVR_oof.npy")
test_svr = np.load(PROC/"SVR_test.npy")
oof_ft   = np.load(PROC/"FINETUNED_vitb16_384_train.npy")
test_ft  = np.load(PROC/"FINETUNED_vitb16_384_test.npy")

X_oof  = np.vstack([oof_svr, oof_ft]).T
X_test = np.vstack([test_svr, test_ft]).T

# Small ridge to stabilize weights; intercept allowed
blender = Ridge(alpha=1e-3, fit_intercept=True)
blender.fit(X_oof, y)
print("Blend weights:", blender.coef_, "intercept:", blender.intercept_)

final = blender.predict(X_test)
# GM-style calibration + bounds
final = np.clip(1.032 * final, 0, 100)

sub = pd.DataFrame({"Id": ids_test, "Pawpularity": final})
sub.to_csv("submission_blend.csv", index=False)
sub.head()

Blend weights: [0.95943797 0.1476404 ] intercept: -2.800251


Unnamed: 0,Id,Pawpularity
0,4128bae22183829d2b5fea10effdb0c3,37.312897
1,43a2262d7738e3d420d453815151079e,37.429646
2,4e429cead1848a298432a0acad014c9d,37.291924
3,80bc3ccafcc51b66303c2c263aa38486,37.23027
4,8f49844c382931444e68dffbe20228f4,37.337021
