# DeBERTa-v3 Inference Pipeline
RAM-optimized inference for Kaggle CPU environment

In [None]:
import os
import gc
import math
import numpy as np
import pandas as pd
import joblib
import torch
from transformers import AutoTokenizer, AutoModel
from tqdm import tqdm

## Configuration

In [None]:
MODEL_PATH = "/kaggle/input/deberta-v3-response-classifier/transformers/default/2"
TEST_CSV = "/kaggle/input/llm-classification-finetuning/test.csv"
OUT_SUBMISSION = "submission.csv"

NUM_THREADS = 8
BATCH_SIZE = 1
MAX_LENGTH = 256
USE_FLOAT16 = True
FLUSH_EVERY_N_BATCHES = 50

## Setup Device

In [None]:
device = torch.device("cpu")
torch.set_num_threads(NUM_THREADS)
print(f"[INFO] device={device}, threads={torch.get_num_threads()}")

## Load Model & Tokenizer

In [None]:
print("[INFO] Loading tokenizer and model (low_cpu_mem_usage=True)...")
tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH, use_fast=True, trust_remote_code=True)
model = AutoModel.from_pretrained(MODEL_PATH, trust_remote_code=True, low_cpu_mem_usage=True)
model.to(device)
model.eval()
print("[INFO] Model loaded.")

## Load Classifier

In [None]:
clf = None
try:
    clf = joblib.load(os.path.join(MODEL_PATH, "deberta_classifier.pkl"))
    has_clf = True
    print("[INFO] Classifier loaded.")
except Exception as e:
    has_clf = False
    print("[WARN] Classifier not found or failed to load. Will save embeddings per-batch instead.")
    print("       Exception:", e)

## Define Embedding Function

In [None]:
def encode_texts_get_cls_embeddings(texts):
    encoded = tokenizer(
        texts,
        padding=True,
        truncation=True,
        max_length=MAX_LENGTH,
        return_tensors="pt"
    )
    
    for k, v in encoded.items():
        encoded[k] = v.to(device)
    
    with torch.no_grad():
        outputs = model(**encoded)
        cls = outputs.last_hidden_state[:, 0, :].cpu()
        if USE_FLOAT16:
            cls = cls.to(torch.float16)
        else:
            cls = cls.to(torch.float32)
        arr = cls.numpy()
    
    del encoded, outputs, cls
    gc.collect()
    return arr

## Load Test Data

In [None]:
print("[INFO] Loading test CSV...")
test = pd.read_csv(TEST_CSV)
n = len(test)
print(f"[INFO] Test rows: {n}")

## Process Batches & Generate Predictions

In [None]:
out_rows = []
flush_count = 0

for start in tqdm(range(0, n, BATCH_SIZE), desc="Processing batches"):
    end = min(start + BATCH_SIZE, n)
    batch_df = test.iloc[start:end]
    
    batch_texts_a = (batch_df["prompt"].astype(str) + " " + batch_df["response_a"].astype(str)).tolist()
    batch_texts_b = (batch_df["prompt"].astype(str) + " " + batch_df["response_b"].astype(str)).tolist()

    emb_a = encode_texts_get_cls_embeddings(batch_texts_a)
    emb_b = encode_texts_get_cls_embeddings(batch_texts_b)

    for i, idx in enumerate(batch_df.index):
        combined = np.concatenate([emb_a[i], emb_b[i]], axis=0)
        if has_clf:
            proba = clf.predict_proba(combined.reshape(1, -1))[0]
            out_rows.append({
                "id": int(test.at[idx, "id"]),
                "winner_model_a": float(proba[0]),
                "winner_model_b": float(proba[1]),
                "winner_tie": float(proba[2]),
            })
        else:
            emb_path = f"emb_sample_{idx}.npy"
            np.save(emb_path, combined)
    
    if len(out_rows) >= FLUSH_EVERY_N_BATCHES:
        if not os.path.exists(OUT_SUBMISSION):
            pd.DataFrame(out_rows).to_csv(OUT_SUBMISSION, index=False, mode="w")
        else:
            pd.DataFrame(out_rows).to_csv(OUT_SUBMISSION, index=False, mode="a", header=False)
        out_rows = []
        flush_count += 1
        gc.collect()

## Save Final Results

In [None]:
if has_clf:
    if len(out_rows) > 0:
        if not os.path.exists(OUT_SUBMISSION):
            pd.DataFrame(out_rows).to_csv(OUT_SUBMISSION, index=False, mode="w")
        else:
            pd.DataFrame(out_rows).to_csv(OUT_SUBMISSION, index=False, mode="a", header=False)
    print(f"[INFO] Predictions saved to {OUT_SUBMISSION}")
else:
    print("[INFO] Embeddings saved per-sample as 'emb_sample_<index>.npy' in the working directory.")
    print("       If you want a single combined embeddings file, you can concat them later by loading those .npy files sequentially.")