In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
!pip install -q optuna transformers

In [None]:
df_train = pd.read_csv("/kaggle/input/aml-dataset/augmented_catalog_final_2.csv")

In [None]:
df_train

In [None]:
df_train = df_train.drop('image_link', axis=1)

In [None]:
df_train

In [None]:
import argparse

In [None]:
df_test = pd.read_csv("/kaggle/input/aml-dataset/augmented_test_final.csv")

In [None]:
df_test

In [None]:
# ==== Cell 0: Pin compatible versions (run once, then restart & run from Cell 1) ====
!pip -q install "httpx>=0.27.0" "huggingface_hub>=0.24.6" "transformers>=4.43.4,<4.46"

import sys, os, pkgutil
print("httpx  :", __import__("httpx").__version__)
print("hf_hub:", __import__("huggingface_hub").__version__)
print("trfs  :", __import__("transformers").__version__)

print("\n✅ Please now restart the Python kernel (Kaggle: Runtime > Restart) and run from Cell 1.")


In [1]:
# ===== Cell 1: Imports & Config =====
import os
import json
import math
import random
import numpy as np
import pandas as pd

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

from transformers import (
    AutoTokenizer,
    AutoModel,
    DataCollatorWithPadding,
    get_cosine_schedule_with_warmup
)
from torch.optim import AdamW
from tqdm.auto import tqdm

# Optional (your original had this, but Kaggle has 1 GPU; keep commented)
# os.environ["CUDA_VISIBLE_DEVICES"] = "3"
os.environ["TOKENIZERS_PARALLELISM"] = "false"

CONFIG = {
    # Paths
    "train_path": "/kaggle/input/aml-dataset/augmented_catalog_final_2.csv",   # change if yours differs
    "test_path":  "/kaggle/input/aml-dataset/augmented_test_final.csv",
    "final_model_path": "/kaggle/working/price_predictor_deberta_v3_large.pt",
    "out_dir": "/kaggle/working",

    # Repro
    "random_seed": 42,

    # Model
    "model_name": "microsoft/deberta-v3-small",
    "max_length": 160,   # use for both search & final to stay as close to original as possible
    "dropout": 0.2,      # will be overridden by best search value for final run

    # Training (final)
    "batch_size": 16,
    "epochs": 10,        # your original default for full-data train
    "lr_encoder": 2e-5,  # overridden by best search value
    "lr_head": 1e-3,     # overridden by best search value
    "weight_decay": 0.01,
    "warmup_ratio": 0.1,
    "gradient_clip": 1.0,

    # System
    "device": "cuda" if torch.cuda.is_available() else "cpu",
    "num_workers": max(0, min(8, (os.cpu_count() or 2) // 2)),
    "use_amp": True,
}

# Search settings (simple grid, no extra dependency)
SEARCH = {
    "folds": 3,
    "epochs": 3,   # keep short to fit 1–2h; adjust if you want more accuracy
    "batch_size": 16,
    "grid_lr_encoder": [2e-5, 3e-5, 5e-5],
    "grid_lr_head":    [1e-3, 2e-3, 3e-3],
    "grid_dropout":    [0.2, 0.3],
}

def seed_everything(seed: int = 42):
    random.seed(seed); np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed); torch.cuda.manual_seed_all(seed)

seed_everything(CONFIG["random_seed"])
device = torch.device(CONFIG["device"])
print("Device:", device)


The cache for model files in Transformers v4.22.0 has been updated. Migrating your old cache. This is a one-time only operation. You can interrupt this and resume the migration later on by calling `transformers.utils.move_cache()`.


0it [00:00, ?it/s]

2025-10-13 12:23:22.841966: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1760358202.864708     129 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1760358202.871822     129 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


Device: cuda


In [2]:
# ===== Cell 2: Helpers & original-style components =====

def create_combined_text(row):
    category = str(row['product_category']).strip()
    description = str(row['description']).strip() if 'description' in row and pd.notna(row['description']) else ""
    value = row['value']
    unit = str(row['unit']).strip().capitalize()
    text_parts = [f"Category: {category}"]
    if description:
        text_parts.append(f"Description: {description}")
    if unit.lower() != 'count' or value != 1:
        text_parts.append(f"Amount: {value} {unit}")
    return " [SEP] ".join(text_parts)

class PriceDataset(Dataset):
    def __init__(self, encodings, prices):
        self.encodings = encodings
        self.prices = prices
    def __len__(self):
        return len(self.encodings["input_ids"])
    def __getitem__(self, idx):
        item = {k: self.encodings[k][idx] for k in ('input_ids', 'attention_mask')}
        if self.prices is not None:
            item['price'] = torch.tensor(self.prices[idx], dtype=torch.float32)
        return item

class PriceRegressor(nn.Module):
    def __init__(self, model_name: str, dropout: float = 0.2):
        super().__init__()
        self.encoder = AutoModel.from_pretrained(model_name)
        hidden_size = self.encoder.config.hidden_size
        self.regressor = nn.Sequential(
            nn.LayerNorm(hidden_size),
            nn.Dropout(dropout),
            nn.Linear(hidden_size, 512),
            nn.GELU(),
            nn.Dropout(dropout),
            nn.Linear(512, 256),
            nn.GELU(),
            nn.Dropout(dropout),
            nn.Linear(256, 1)
        )
    def forward(self, input_ids, attention_mask):
        outputs = self.encoder(input_ids=input_ids, attention_mask=attention_mask)
        last_hidden = outputs.last_hidden_state
        mask = attention_mask.unsqueeze(-1).type_as(last_hidden)
        pooled = (last_hidden * mask).sum(dim=1) / mask.sum(dim=1).clamp(min=1e-9)
        return self.regressor(pooled).squeeze(-1)

def build_optimizer(model, lr_enc, lr_head, weight_decay, fused=True):
    encoder_params = [p for p in model.encoder.parameters() if p.requires_grad]
    head_params    = list(model.regressor.parameters())
    opt_kwargs = {"weight_decay": weight_decay}
    if device.type == "cuda" and fused:
        opt_kwargs["fused"] = True
    optimizer = AdamW([
        {'params': encoder_params, 'lr': lr_enc},
        {'params': head_params,    'lr': lr_head}
    ], **opt_kwargs)
    return optimizer

def build_collate_fn(tokenizer):
    collator = DataCollatorWithPadding(tokenizer=tokenizer, padding="longest", return_tensors="pt")
    def collate_fn(batch):
        if 'price' in batch[0]:
            prices = torch.tensor([ex['price'] for ex in batch], dtype=torch.float32)
        else:
            prices = None
        features = [{k: ex[k] for k in ('input_ids', 'attention_mask')} for ex in batch]
        batch_out = collator(features)
        if prices is not None:
            batch_out['price'] = prices
        return batch_out
    return collate_fn

def train_epoch(model, loader, optimizer, scheduler, scaler, amp_enabled):
    model.train()
    total_loss = 0.0
    for batch in tqdm(loader, desc="Training", leave=False):
        input_ids = batch['input_ids'].to(device, non_blocking=True)
        attention_mask = batch['attention_mask'].to(device, non_blocking=True)
        targets = batch['price'].to(device, non_blocking=True)

        optimizer.zero_grad(set_to_none=True)
        with torch.autocast(device_type='cuda', dtype=torch.float16, enabled=amp_enabled):
            preds = model(input_ids, attention_mask)           # preds = log_price
            loss = nn.functional.huber_loss(preds, targets, delta=0.5)
        scaler.scale(loss).backward()
        scaler.unscale_(optimizer)
        nn.utils.clip_grad_norm_(model.parameters(), CONFIG["gradient_clip"])
        scaler.step(optimizer)
        scaler.update()
        scheduler.step()
        total_loss += float(loss)
    return total_loss / max(1, len(loader))

@torch.no_grad()
def evaluate_smape(model, loader):
    model.eval()
    preds, trues = [], []
    for batch in loader:
        input_ids = batch['input_ids'].to(device, non_blocking=True)
        attention_mask = batch['attention_mask'].to(device, non_blocking=True)
        targets = batch['price'].to(device, non_blocking=True)
        out = model(input_ids, attention_mask)    # log_price
        preds.append(torch.expm1(out).cpu().numpy())
        trues.append(torch.expm1(targets).cpu().numpy())
    preds = np.concatenate(preds)
    trues = np.concatenate(trues)
    # SMAPE
    num = np.abs(preds - trues)
    den = (np.abs(preds) + np.abs(trues) + 0.1) / 2.0
    return float(np.mean(num / den))


In [3]:
# ===== Cell 3: Load + preprocess =====
def load_train_df(path):
    df = pd.read_csv(path)
    if "description" not in df.columns and "misc_info" in df.columns:
        df = df.rename(columns={"misc_info": "description"})
    # keep your original handling
    df.dropna(subset=['price', 'product_category'], inplace=True)
    missing_value_mask = df['value'].isna()
    df.loc[missing_value_mask, 'value'] = 1
    df.loc[missing_value_mask, 'unit'] = 'Count'
    df['description'] = df['description'].fillna("")

    df['combined_text'] = df.apply(create_combined_text, axis=1)
    df['log_price'] = np.log1p(df['price'])
    return df.reset_index(drop=True)

def load_test_df(path):
    df = pd.read_csv(path)
    if "description" not in df.columns and "misc_info" in df.columns:
        df = df.rename(columns={"misc_info": "description"})
    if "value" not in df.columns:
        df["value"] = 1
    if "unit" not in df.columns:
        df["unit"] = "Count"
    df['description'] = df['description'].fillna("")
    df['combined_text'] = df.apply(create_combined_text, axis=1)
    return df.reset_index(drop=True)

df_train = load_train_df(CONFIG["train_path"])
print("Train shape:", df_train.shape)
display(df_train.head(3))


Train shape: (74999, 10)


Unnamed: 0,sample_id,product_category,price,value,unit,price_per_unit,description,image_link,combined_text,log_price
0,33127,"La Victoria Green Taco Sauce Mild, 12 Ounce (P...",4.89,72.0,Fl Oz,0.067917,,https://m.media-amazon.com/images/I/51mo8htwTH...,"Category: La Victoria Green Taco Sauce Mild, 1...",1.773256
1,198967,"Salerno Cookies, The Original Butter Cookies, ...",13.12,32.0,Ounce,0.41,Original Butter Cookies: Classic butter cookie...,https://m.media-amazon.com/images/I/71YtriIHAA...,"Category: Salerno Cookies, The Original Butter...",2.647592
2,261251,"Bear Creek Hearty Soup Bowl, Creamy Chicken wi...",1.97,11.4,Ounce,0.172807,Loaded with hearty long grain wild rice and ve...,https://m.media-amazon.com/images/I/51+PFEe-w-...,"Category: Bear Creek Hearty Soup Bowl, Creamy ...",1.088562


In [4]:
# ===== Cell 4: Tokenizer & fold split (robust) =====
from transformers import AutoTokenizer
from sklearn.model_selection import StratifiedKFold

def get_tokenizer_robust(model_name: str):
    # 1) normal path (fast tokenizer)
    try:
        print(f"Loading tokenizer for '{model_name}' (fast)...")
        tok = AutoTokenizer.from_pretrained(model_name, use_fast=True)
        return tok
    except Exception as e:
        print("Tokenizer fast load failed. Will try local snapshot. Error:")
        print(e)

    # 2) snapshot to a local dir, then load with local_files_only=True
    try:
        print("Downloading local snapshot and loading tokenizer locally...")
        from huggingface_hub import snapshot_download
        local_dir = snapshot_download(repo_id=model_name)  # downloads tokenizer files
        tok = AutoTokenizer.from_pretrained(local_dir, use_fast=True, local_files_only=True)
        return tok
    except Exception as e2:
        print("Local fast load failed as well. Falling back to slow tokenizer. Error:")
        print(e2)

    # 3) last fallback: slow tokenizer
    print("Loading slow tokenizer as last resort...")
    tok = AutoTokenizer.from_pretrained(model_name, use_fast=False)
    return tok

tokenizer = get_tokenizer_robust(CONFIG["model_name"])

def make_folds(df, n_splits=3, seed=42):
    df = df.copy()
    bins = min(50, max(10, int(len(df) ** 0.5)))
    df["price_bin"] = pd.qcut(df["price"].rank(method="first"), q=bins, labels=False, duplicates="drop")
    df["kfold"] = -1
    skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=seed)
    for fold, (_, val_idx) in enumerate(skf.split(df, df["price_bin"])):
        df.loc[df.index[val_idx], "kfold"] = fold
    return df


Loading tokenizer for 'microsoft/deberta-v3-small' (fast)...




In [None]:
# ===== Cell 5: Grid search over (lr_encoder, lr_head, dropout) =====
def run_grid_search(df):
    results = []
    df_folds = make_folds(df, n_splits=SEARCH["folds"], seed=CONFIG["random_seed"])
    texts = df_folds["combined_text"].tolist()
    log_prices = df_folds["log_price"].values
    # Pre-tokenize once for speed
    enc = tokenizer(texts, truncation=True, max_length=CONFIG["max_length"], padding=False)

    for dropout in SEARCH["grid_dropout"]:
        for lr_enc in SEARCH["grid_lr_encoder"]:
            for lr_head in SEARCH["grid_lr_head"]:
                fold_scores = []
                print(f"\n>>> Trying: dropout={dropout}, lr_encoder={lr_enc}, lr_head={lr_head}")
                for f in range(SEARCH["folds"]):
                    tr_idx = df_folds.index[df_folds["kfold"] != f].to_list()
                    va_idx = df_folds.index[df_folds["kfold"] == f].to_list()

                    tr_enc = {k: [enc[k][i] for i in tr_idx] for k in ("input_ids","attention_mask")}
                    va_enc = {k: [enc[k][i] for i in va_idx] for k in ("input_ids","attention_mask")}
                    tr_ds = PriceDataset(tr_enc, log_prices[tr_idx])
                    va_ds = PriceDataset(va_enc, log_prices[va_idx])

                    collate_fn = build_collate_fn(tokenizer)
                    common = dict(batch_size=SEARCH["batch_size"], pin_memory=(device.type=="cuda"), collate_fn=collate_fn)
                    if CONFIG["num_workers"] > 0:
                        common.update(num_workers=CONFIG["num_workers"], persistent_workers=True, prefetch_factor=2)
                    tr_loader = DataLoader(tr_ds, shuffle=True, **common)
                    va_loader = DataLoader(va_ds, shuffle=False, **common)

                    # Model & optim
                    model = PriceRegressor(CONFIG["model_name"], dropout=dropout).to(device)
                    optimizer = build_optimizer(model, lr_enc, lr_head, CONFIG["weight_decay"], fused=True)
                    total_steps = len(tr_loader) * SEARCH["epochs"]
                    warmup_steps = int(total_steps * CONFIG["warmup_ratio"])
                    scheduler = get_cosine_schedule_with_warmup(optimizer, warmup_steps, total_steps)
                    scaler = torch.cuda.amp.GradScaler(enabled=(device.type=="cuda") and CONFIG["use_amp"])

                    # Train few epochs
                    for ep in range(1, SEARCH["epochs"]+1):
                        _ = train_epoch(model, tr_loader, optimizer, scheduler, scaler, amp_enabled=(device.type=="cuda") and CONFIG["use_amp"])

                    # Validate SMAPE (on price scale)
                    sm = evaluate_smape(model, va_loader)
                    fold_scores.append(sm)

                    # cleanup
                    del model, optimizer, scheduler, scaler, tr_loader, va_loader
                    torch.cuda.empty_cache(); 
                    import gc; gc.collect()

                mean_smape = float(np.mean(fold_scores))
                results.append({
                    "dropout": dropout, "lr_encoder": lr_enc, "lr_head": lr_head,
                    "fold_smapes": fold_scores, "mean_smape": mean_smape
                })
                print(f" -> 3-fold SMAPE: {mean_smape:.4f}")

    res_df = pd.DataFrame(results).sort_values("mean_smape").reset_index(drop=True)
    res_df.to_csv(os.path.join(CONFIG["out_dir"], "grid_search_results.csv"), index=False)
    return res_df

grid_results = run_grid_search(df_train)
display(grid_results.head(10))
best_row = grid_results.iloc[0].to_dict()
best_params = {
    "dropout": float(best_row["dropout"]),
    "lr_encoder": float(best_row["lr_encoder"]),
    "lr_head": float(best_row["lr_head"])
}
print("\nBest params from grid:", best_params)
with open(os.path.join(CONFIG["out_dir"], "best_params.json"), "w") as f:
    json.dump(best_params, f, indent=2)



>>> Trying: dropout=0.2, lr_encoder=2e-05, lr_head=0.001


  scaler = torch.cuda.amp.GradScaler(enabled=(device.type=="cuda") and CONFIG["use_amp"])


Training:   0%|          | 0/3125 [00:00<?, ?it/s]

Training:   0%|          | 0/3125 [00:00<?, ?it/s]

Training:   0%|          | 0/3125 [00:00<?, ?it/s]

Training:   0%|          | 0/3125 [00:00<?, ?it/s]

Training:   0%|          | 0/3125 [00:00<?, ?it/s]

Training:   0%|          | 0/3125 [00:00<?, ?it/s]

Training:   0%|          | 0/3125 [00:00<?, ?it/s]

Training:   0%|          | 0/3125 [00:00<?, ?it/s]

Training:   0%|          | 0/3125 [00:00<?, ?it/s]

 -> 3-fold SMAPE: 0.4787

>>> Trying: dropout=0.2, lr_encoder=2e-05, lr_head=0.002


Training:   0%|          | 0/3125 [00:00<?, ?it/s]

Training:   0%|          | 0/3125 [00:00<?, ?it/s]

Training:   0%|          | 0/3125 [00:00<?, ?it/s]

Training:   0%|          | 0/3125 [00:00<?, ?it/s]

Training:   0%|          | 0/3125 [00:00<?, ?it/s]

Training:   0%|          | 0/3125 [00:00<?, ?it/s]

Training:   0%|          | 0/3125 [00:00<?, ?it/s]

Training:   0%|          | 0/3125 [00:00<?, ?it/s]

Training:   0%|          | 0/3125 [00:00<?, ?it/s]

 -> 3-fold SMAPE: 0.4776

>>> Trying: dropout=0.2, lr_encoder=2e-05, lr_head=0.003


Training:   0%|          | 0/3125 [00:00<?, ?it/s]

Training:   0%|          | 0/3125 [00:00<?, ?it/s]

Training:   0%|          | 0/3125 [00:00<?, ?it/s]

Training:   0%|          | 0/3125 [00:00<?, ?it/s]

Training:   0%|          | 0/3125 [00:00<?, ?it/s]

Training:   0%|          | 0/3125 [00:00<?, ?it/s]

Training:   0%|          | 0/3125 [00:00<?, ?it/s]

Training:   0%|          | 0/3125 [00:00<?, ?it/s]

Training:   0%|          | 0/3125 [00:00<?, ?it/s]

 -> 3-fold SMAPE: 0.4773

>>> Trying: dropout=0.2, lr_encoder=3e-05, lr_head=0.001


Training:   0%|          | 0/3125 [00:00<?, ?it/s]

Training:   0%|          | 0/3125 [00:00<?, ?it/s]

Training:   0%|          | 0/3125 [00:00<?, ?it/s]

Training:   0%|          | 0/3125 [00:00<?, ?it/s]

Training:   0%|          | 0/3125 [00:00<?, ?it/s]

Training:   0%|          | 0/3125 [00:00<?, ?it/s]

Training:   0%|          | 0/3125 [00:00<?, ?it/s]

Training:   0%|          | 0/3125 [00:00<?, ?it/s]

Training:   0%|          | 0/3125 [00:00<?, ?it/s]

 -> 3-fold SMAPE: 0.4715

>>> Trying: dropout=0.2, lr_encoder=3e-05, lr_head=0.002


Training:   0%|          | 0/3125 [00:00<?, ?it/s]

Training:   0%|          | 0/3125 [00:00<?, ?it/s]

Training:   0%|          | 0/3125 [00:00<?, ?it/s]

Training:   0%|          | 0/3125 [00:00<?, ?it/s]

Training:   0%|          | 0/3125 [00:00<?, ?it/s]

In [None]:
print(5)

In [None]:
# ===== Cell 6: Final full-data training with best params =====
CONFIG["dropout"]   = best_params["dropout"]
CONFIG["lr_encoder"] = best_params["lr_encoder"]
CONFIG["lr_head"]    = best_params["lr_head"]

print("="*70)
print("Final Model Training on Full Dataset (your original style)")
print("="*70)

# 1) Data
train_texts = df_train['combined_text'].tolist()
train_prices = df_train['log_price'].values

# 2) Tokenize & dataset
train_enc = tokenizer(train_texts, truncation=True, max_length=CONFIG["max_length"], padding=False)
train_dataset = PriceDataset(train_enc, train_prices)

collate_fn = build_collate_fn(tokenizer)
loader_common = dict(batch_size=CONFIG["batch_size"], pin_memory=(device.type=="cuda"), collate_fn=collate_fn)
if CONFIG["num_workers"] > 0:
    loader_common.update(num_workers=CONFIG["num_workers"], persistent_workers=True, prefetch_factor=2)
train_loader = DataLoader(train_dataset, shuffle=True, **loader_common)

# 3) Build model & optim
model = PriceRegressor(CONFIG["model_name"], dropout=CONFIG["dropout"]).to(device)
optimizer = build_optimizer(model, CONFIG["lr_encoder"], CONFIG["lr_head"], CONFIG["weight_decay"], fused=True)
total_steps = len(train_loader) * CONFIG["epochs"]
warmup_steps = int(total_steps * CONFIG["warmup_ratio"])
scheduler = get_cosine_schedule_with_warmup(optimizer, warmup_steps, total_steps)
scaler = torch.cuda.amp.GradScaler(enabled=(device.type=="cuda") and CONFIG["use_amp"])

# 4) Train
for epoch in range(1, CONFIG["epochs"] + 1):
    print(f"\nEpoch {epoch}/{CONFIG['epochs']}")
    train_loss = train_epoch(model, train_loader, optimizer, scheduler, scaler, amp_enabled=(device.type=="cuda") and CONFIG["use_amp"])
    print(f"  Train Loss (Huber on log_price): {train_loss:.4f}")

# Save
torch.save(model.state_dict(), CONFIG["final_model_path"])
print(f"\n✅ Final model saved to: {CONFIG['final_model_path']}")


In [None]:
# ===== Cell 7: Predict test & build submission =====
# Reload model to ensure clean state (optional but safe)
infer_model = PriceRegressor(CONFIG["model_name"], dropout=CONFIG["dropout"]).to(device)
infer_model.load_state_dict(torch.load(CONFIG["final_model_path"], map_location=device))
infer_model.eval()

# Load test
df_test = load_test_df(CONFIG["test_path"])
test_texts = df_test["combined_text"].tolist()
test_enc = tokenizer(test_texts, truncation=True, max_length=CONFIG["max_length"], padding=False)

test_ds = PriceDataset(test_enc, prices=None)
def collate_test(batch):
    features = [{k: ex[k] for k in ('input_ids', 'attention_mask')} for ex in batch]
    return DataCollatorWithPadding(tokenizer=tokenizer, padding="longest", return_tensors="pt")(features)

common = dict(batch_size=CONFIG["batch_size"], pin_memory=(device.type=="cuda"), collate_fn=collate_test)
if CONFIG["num_workers"] > 0:
    common.update(num_workers=CONFIG["num_workers"], persistent_workers=True, prefetch_factor=2)
test_loader = DataLoader(test_ds, shuffle=False, **common)

# Predict
all_preds = []
with torch.no_grad():
    for batch in tqdm(test_loader, desc="Inference", leave=False):
        input_ids = batch['input_ids'].to(device, non_blocking=True)
        attention_mask = batch['attention_mask'].to(device, non_blocking=True)
        out = infer_model(input_ids, attention_mask)  # log_price
        all_preds.append(torch.expm1(out).detach().cpu().numpy())
test_price = np.concatenate(all_preds)

# Build submission with columns exactly: id, price
id_col = "id" if "id" in df_test.columns else ("sample_id" if "sample_id" in df_test.columns else None)
assert id_col is not None, "Test must contain 'id' or 'sample_id' column."

submission = pd.DataFrame({"id": df_test[id_col].values, "price": test_price})
sub_path = "/kaggle/working/submission.csv"
submission.to_csv(sub_path, index=False)
print("Submission saved at:", sub_path)
display(submission.head())
