In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import torch
import torch.nn as nn
from transformers import AutoModel

class TIDAutoBertClassification(nn.Module):
    def __init__(
        self,
        model_name = "google/mobilebert-uncased",
        pooling: str = "cls",  # one of {"cls", "mean", "max"}
        dropout: float = 0.1,
        num_labels: int = 3,
    ):
        super().__init__()
        self.model = AutoModel.from_pretrained(model_name)

        hidden = self.model.config.hidden_size
        self.init_param = {"pooling":pooling, "dropout":dropout, "num_labels":num_labels}
        self.pooling = pooling.lower()
        self.dropout = nn.Dropout(dropout)

        # Concatenate pooled A and B: 2 * hidden
        self.classifier = nn.Sequential(
            nn.Linear(2 * hidden, hidden),
            nn.GELU(),
            nn.Dropout(dropout),
            nn.Linear(hidden, num_labels),
        )

    def _pool(self, last_hidden_state: torch.Tensor, attention_mask: torch.Tensor) -> torch.Tensor:
        if self.pooling == "cls":
            return last_hidden_state[:, 0]  # [CLS]-like (actually first token)
        elif self.pooling == "max":
            # Mask padded positions to very negative before max
            mask = attention_mask.unsqueeze(-1).bool()
            masked = last_hidden_state.masked_fill(~mask, -1e9)
            return masked.max(dim=1).values
        else:  # mean (mask-aware)
            mask = attention_mask.unsqueeze(-1)  # (B, L, 1)
            summed = (last_hidden_state * mask).sum(dim=1)
            denom = mask.sum(dim=1).clamp(min=1)
            return summed / denom

    def encode(self, **inputs) -> torch.Tensor:
        outputs = self.model(**inputs)
        return outputs.last_hidden_state

    def forward(
        self,
        input_ids_a: torch.Tensor,
        attention_mask_a: torch.Tensor,
        input_ids_b: torch.Tensor,
        attention_mask_b: torch.Tensor,
    ):
        # Run shared backbone on A
        h_a = self.encode(
            input_ids=input_ids_a,
            attention_mask=attention_mask_a,
        )
        # Run shared backbone on B
        h_b = self.encode(
            input_ids=input_ids_b,
            attention_mask=attention_mask_b,
        )

        # Token-level -> sequence-level pooling
        z_a = self._pool(h_a, attention_mask_a)
        z_b = self._pool(h_b, attention_mask_b)

        # Concatenate and classify
        z = torch.cat([z_a, z_b], dim=-1)
        z = self.dropout(z)
        logits = self.classifier(z)

        return logits


In [None]:
def load(path, device, model_class, tokenizer_class):
    state = torch.load(os.path.join(path, "checkpoint.ckpt"), map_location=device)
    pooling = state["init_param"]["pooling"]
    dropout = state["init_param"]["dropout"]
    num_labels = state["init_param"]["num_labels"]

    model = model_class(
        model_name = path,
        pooling=pooling,
        dropout=dropout,
        num_labels=num_labels
    )

    model.init_param = state["init_param"]
    model.pooling = state["pooling"]
    model.dropout.load_state_dict(state["dropout_state_dict"])
    model.classifier.load_state_dict(state["classifier_state_dict"])

    # 4. tokenizer 로드
    tokenizer = tokenizer_class.from_pretrained(path)

    return model, tokenizer

In [None]:
from transformers import AutoTokenizer

device = "cuda" if torch.cuda.is_available() else "cpu"
ckpt_path = "/kaggle/input/deberta-v3-xsmall-classification/pytorch/default/1/"
model, tokenizer = load(ckpt_path, device=device,
                        model_class=TIDAutoBertClassification,
                        tokenizer_class=AutoTokenizer)
model.to(device)
print("model load!")

In [None]:
import torch.nn.functional as F

TEST_PATH="/kaggle/input/llm-classification-finetuning/test.csv"
OUTPUT_PATH="/kaggle/working/submission.csv"
df = pd.read_csv(TEST_PATH)
ids = df["id"]
prompt = df["prompt"]
response_a = df["response_a"]
response_b = df["response_b"]
print(df.head())

results = []
for i in range(0, len(prompt)):
  k = ids[i]
  p = prompt[i]
  a = response_a[i]
  b = response_b[i]

  input1 = p + "[SEP]" + a
  input2 = p + "[SEP]" + b
  t1 = tokenizer(input1,padding=True,truncation=True,max_length=512,return_tensors="pt")
  t2 = tokenizer(input2,padding=True,truncation=True,max_length=512,return_tensors="pt")
  input_ids1 = t1.input_ids
  input_ids2 = t2.input_ids
  attention_mask1 = t1.attention_mask
  attention_mask2 = t2.attention_mask

  input_ids1 = input_ids1.to(device)
  attention_mask1 = attention_mask1.to(device)
  input_ids2 = input_ids2.to(device)
  attention_mask2 = attention_mask2.to(device)

  with torch.no_grad():
    logits = model(input_ids1, attention_mask1, input_ids2, attention_mask2)
    probs = F.softmax(logits, dim=-1).cpu().numpy().flatten()
  results.append({
    "id": k,
    "winner_model_a": probs[0],
    "winner_model_b": probs[1],
    "winner_tie": probs[2]
  })

# DataFrame 변환 및 저장
submission_df = pd.DataFrame(results)
submission_df.to_csv(OUTPUT_PATH, index=False)

print(f"✅ '{OUTPUT_PATH}' 파일이 생성되었습니다.")
print(submission_df.head())