### Extract ImageNet “1k-logit” features (feature factory)

In [2]:
from pathlib import Path
import numpy as np, pandas as pd, torch, timm
from PIL import Image
from tqdm import tqdm
from timm.data import create_transform
try:
    # most timm versions
    from timm.data import resolve_data_config
except Exception:
    # older alias fallback
    from timm.data import resolve_model_data_config as resolve_data_config

ROOT = Path("/workspace/pet-finder")
DATA = ROOT / "data" / "raw"
PROC = ROOT / "data" / "processed"; PROC.mkdir(parents=True, exist_ok=True)

device = "cuda" if torch.cuda.is_available() else ("mps" if torch.backends.mps.is_available() else "cpu")
torch.set_grad_enabled(False)
print("Device:", device)

def ids_for(split):
    if split == "train":
        # keep train order identical to train.csv to align y with rows
        return pd.read_csv(DATA / "train.csv")["Id"].astype(str).tolist()
    # keep test order stable and repeatable
    return sorted(p.stem for p in (DATA / "test").glob("*.jpg"))

BACKBONES = [
    ("vit_base_patch16_384", 384),
    ("deit_base_distilled_patch16_384", 384),
    ("tf_efficientnet_l2_ns_475", 475),
]
TTA_HFLIP = True  # simple & effective

def make_transform(model, img_size: int):
    # FIX: do NOT pass input_size twice; override inside cfg and pass once
    cfg = resolve_data_config({}, model=model)
    cfg.update({
        "input_size": (3, img_size, img_size),
        "is_training": False,   # eval-time transforms
    })
    return create_transform(**cfg)

def extract_logits(model_name, img_size, split):
    out = PROC / f"{model_name}_{split}_logits.npy"
    if out.exists():
        return np.load(out)

    model = timm.create_model(model_name, pretrained=True).eval().to(device)
    tfm = make_transform(model, img_size)

    feats, id_list = [], ids_for(split)
    for sid in tqdm(id_list, desc=f"{model_name}-{split}"):
        img = Image.open(DATA / split / f"{sid}.jpg").convert("RGB")
        x = tfm(img).unsqueeze(0).to(device)
        with torch.cuda.amp.autocast(enabled=(device == "cuda")):
            y = model(x)
            if TTA_HFLIP:
                y = 0.5 * (y + model(torch.flip(x, dims=[-1])))
        feats.append(y.squeeze(0).float().cpu().numpy())

    feats = np.stack(feats, 0).astype("float16")  # compact cache
    np.save(out, feats)
    return feats

def build_stack(split):
    mats = [extract_logits(n, s, split) for (n, s) in BACKBONES]
    # (optional) append your cached CLIP embeddings from the first notebook
    clip_cache = PROC / f"clip_vitb32_{split}.npy"
    if clip_cache.exists():
        print(f"Appending CLIP cache: {clip_cache.name}")
        mats.append(np.load(clip_cache))
    X = np.concatenate(mats, axis=1).astype("float32")  # float32 for SVR stability
    np.save(PROC / f"STACK_{split}.npy", X)
    return X

X_tr = build_stack("train")
X_te = build_stack("test")
print("Stack shapes:", X_tr.shape, X_te.shape)  # ~ (9912, 3000–4000+), (~8k, 3000–4000+)


Device: cuda


  with torch.cuda.amp.autocast(enabled=(device == "cuda")):
vit_base_patch16_384-train: 100%|██████████| 9912/9912 [01:36<00:00, 102.78it/s]


model.safetensors:   0%|          | 0.00/351M [00:00<?, ?B/s]

  with torch.cuda.amp.autocast(enabled=(device == "cuda")):
deit_base_distilled_patch16_384-train: 100%|██████████| 9912/9912 [01:35<00:00, 103.34it/s]
  model = create_fn(


model.safetensors:   0%|          | 0.00/1.93G [00:00<?, ?B/s]

  with torch.cuda.amp.autocast(enabled=(device == "cuda")):
tf_efficientnet_l2_ns_475-train: 100%|██████████| 9912/9912 [06:35<00:00, 25.04it/s]
vit_base_patch16_384-test: 100%|██████████| 8/8 [00:00<00:00, 166.67it/s]
deit_base_distilled_patch16_384-test: 100%|██████████| 8/8 [00:00<00:00, 168.51it/s]
  model = create_fn(
tf_efficientnet_l2_ns_475-test: 100%|██████████| 8/8 [00:00<00:00, 27.80it/s]

Stack shapes: (9912, 3000) (8, 3000)





### Train SVR (RBF) with 5-fold CV and save OOF/TEST preds

In [5]:
from sklearn.model_selection import KFold
from sklearn.metrics import mean_squared_error  # still fine to import
import numpy as np, pandas as pd

y = pd.read_csv(DATA/"train.csv")["Pawpularity"].values.astype("float32")
X_tr = np.load(PROC/"STACK_train.npy"); X_te = np.load(PROC/"STACK_test.npy")

# Try RAPIDS cuML; fall back to scikit-learn
USE_CUML = False
try:
    from cuml.svm import SVR as cuSVR
    USE_CUML = True
    print("Using RAPIDS cuML SVR")
except Exception:
    from sklearn.svm import SVR as skSVR
    from sklearn.preprocessing import StandardScaler
    from sklearn.pipeline import make_pipeline
    print("Using scikit-learn SVR")

def asnumpy(x):
    """Return a NumPy array whether x is NumPy or CuPy."""
    try:
        import cupy as cp
        if isinstance(x, cp.ndarray):
            return cp.asnumpy(x)
    except Exception:
        pass
    return np.asarray(x)

cv = KFold(n_splits=5, shuffle=True, random_state=42)
oof = np.zeros(len(y), dtype="float32")
test_pred = np.zeros(len(X_te), dtype="float32")

for fold, (tr, va) in enumerate(cv.split(X_tr), 1):
    if USE_CUML:
        # per-fold standardization (no leakage)
        mu, sd = X_tr[tr].mean(0), X_tr[tr].std(0) + 1e-6
        Xtr, Xva, Xte = (X_tr[tr]-mu)/sd, (X_tr[va]-mu)/sd, (X_te-mu)/sd
        model = cuSVR(kernel="rbf", C=16.0, epsilon=0.1, max_iter=4000)
        model.fit(Xtr, y[tr])
        pred_va = asnumpy(model.predict(Xva)).astype("float32", copy=False)
        pred_te = asnumpy(model.predict(Xte)).astype("float32", copy=False)
    else:
        model = make_pipeline(
            StandardScaler(with_mean=True, with_std=True),
            skSVR(kernel="rbf", C=10.0, epsilon=0.1, gamma="scale", max_iter=4000),
        )
        model.fit(X_tr[tr], y[tr])
        pred_va = model.predict(X_tr[va]).astype("float32", copy=False)
        pred_te = model.predict(X_te).astype("float32", copy=False)

    oof[va] = pred_va
    test_pred += pred_te / cv.get_n_splits()

    # Manual RMSE (avoid squared= kwarg)
    fold_rmse = float(np.sqrt(np.mean((y[va] - pred_va)**2)))
    print(f"Fold {fold} RMSE: {fold_rmse:.5f}")

# Overall CV RMSE
rmse = float(np.sqrt(np.mean((y - oof)**2)))
print(f"CV RMSE (OOF): {rmse:.5f}")

np.save(PROC/"SVR_oof.npy", oof)
np.save(PROC/"SVR_test.npy", test_pred)


Using RAPIDS cuML SVR
Fold 1 RMSE: 17.27574
Fold 2 RMSE: 17.69399
Fold 3 RMSE: 17.15335
Fold 4 RMSE: 17.07812
Fold 5 RMSE: 17.63782
CV RMSE (OOF): 17.36966


### Light calibration + submission

In [6]:
final = np.clip(1.032 * np.load(PROC/"SVR_test.npy"), 0, 100)
sub = pd.DataFrame({"Id": ids_for("test"), "Pawpularity": final})
sub.to_csv("submission.csv", index=False)
sub.head()

Unnamed: 0,Id,Pawpularity
0,4128bae22183829d2b5fea10effdb0c3,36.215462
1,43a2262d7738e3d420d453815151079e,36.337147
2,4e429cead1848a298432a0acad014c9d,36.200581
3,80bc3ccafcc51b66303c2c263aa38486,36.137867
4,8f49844c382931444e68dffbe20228f4,36.242157
