In [4]:
import zipfile
from pathlib import Path

# Folder to download and extract competition files
download_path = Path("/kaggle/working/llm_competition")
download_path.mkdir(exist_ok=True, parents=True)

# Step 1: Download the competition data
!kaggle competitions download -c llm-classification-finetuning -p {download_path} --force

# Step 2: Unzip all zip files in that folder
for zip_file in download_path.glob("*.zip"):
    with zipfile.ZipFile(zip_file, 'r') as zip_ref:
        zip_ref.extractall(download_path)

# Step 3: Set paths to train and test CSV
train_file = download_path / "train.csv"
test_file  = download_path / "test.csv"

# Step 4: Quick check
print("Train file exists:", train_file.exists())
print("Test file exists:", test_file.exists())


Traceback (most recent call last):
  File "/usr/local/bin/kaggle", line 4, in <module>
    from kaggle.cli import main
  File "/usr/local/lib/python3.12/dist-packages/kaggle/__init__.py", line 6, in <module>
    api.authenticate()
  File "/usr/local/lib/python3.12/dist-packages/kaggle/api/kaggle_api_extended.py", line 434, in authenticate
    raise IOError('Could not find {}. Make sure it\'s located in'
OSError: Could not find kaggle.json. Make sure it's located in /root/.config/kaggle. Or use the environment method. See setup instructions at https://github.com/Kaggle/kaggle-api/
Train file exists: False
Test file exists: False


In [2]:
!pip install -q transformers datasets accelerate peft

import os
import pandas as pd
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
from torch.optim import AdamW

from transformers import AutoTokenizer, AutoModelForSequenceClassification
from peft import LoraConfig, get_peft_model, TaskType


2026-01-10 19:50:14.238695: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1768074614.450668      55 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1768074614.506823      55 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1768074614.955264      55 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1768074614.955303      55 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1768074614.955306      55 computation_placer.cc:177] computation placer alr

In [5]:
import pandas as pd

train_file = "/kaggle/input/llm-classification-finetuning/train.csv"
test_file  = "/kaggle/input/llm-classification-finetuning/test.csv"

train_df = pd.read_csv(train_file)
test_df  = pd.read_csv(test_file)

# Convert one-hot winner columns into a single integer label for classification
train_df["label"] = train_df.apply(
    lambda row: 0 if row.winner_model_a == 1 else 1 if row.winner_model_b == 1 else 2,
    axis=1
)

train_df.head()


Unnamed: 0,id,model_a,model_b,prompt,response_a,response_b,winner_model_a,winner_model_b,winner_tie,label
0,30192,gpt-4-1106-preview,gpt-4-0613,"[""Is it morally right to try to have a certain...","[""The question of whether it is morally right ...","[""As an AI, I don't have personal beliefs or o...",1,0,0,0
1,53567,koala-13b,gpt-4-0613,"[""What is the difference between marriage lice...","[""A marriage license is a legal document that ...","[""A marriage license and a marriage certificat...",0,1,0,1
2,65089,gpt-3.5-turbo-0613,mistral-medium,"[""explain function calling. how would you call...","[""Function calling is the process of invoking ...","[""Function calling is the process of invoking ...",0,0,1,2
3,96401,llama-2-13b-chat,mistral-7b-instruct,"[""How can I create a test set for a very rare ...","[""Creating a test set for a very rare category...","[""When building a classifier for a very rare c...",1,0,0,0
4,198779,koala-13b,gpt-3.5-turbo-0314,"[""What is the best way to travel from Tel-Aviv...","[""The best way to travel from Tel Aviv to Jeru...","[""The best way to travel from Tel-Aviv to Jeru...",0,1,0,1


In [7]:
print("Train shape:", train_df.shape)
print("Test shape:", test_df.shape)


Train shape: (57477, 10)
Test shape: (3, 4)


In [8]:
class PreferenceDataset(Dataset):
    def __init__(self, df, tokenizer, train=True, swap=True, max_len=512):
        self.df = df
        self.tokenizer = tokenizer
        self.train = train
        self.swap = swap
        self.max_len = max_len

    def __len__(self):
        return len(self.df) * (2 if self.swap else 1)

    def __getitem__(self, idx):
        swap_flag = False
        if self.swap and idx >= len(self.df):
            swap_flag = True
            idx -= len(self.df)
        row = self.df.iloc[idx]

        if swap_flag:
            a, b = row["response_b"], row["response_a"]
            label = 1 if row["label"]==0 else 0 if row["label"]==1 else 2
        else:
            a, b = row["response_a"], row["response_b"]
            label = row["label"]

        text = f"<PROMPT>{row['prompt']}</PROMPT><ANSWER_A>{a}</ANSWER_A><ANSWER_B>{b}</ANSWER_B>"
        enc = self.tokenizer(text, truncation=True, padding="max_length", max_length=self.max_len, return_tensors="pt")
        item = {k: v.squeeze(0) for k,v in enc.items()}
        if self.train:
            item["labels"] = torch.tensor(label)
        return item


In [9]:
MODEL_NAME = "microsoft/deberta-v3-base"
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
base_model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME, num_labels=3)

lora_config = LoraConfig(
    task_type=TaskType.SEQ_CLS,
    r=8,
    lora_alpha=16,
    lora_dropout=0.05,
    target_modules=["query_proj", "value_proj"],
    bias="none"
)
model = get_peft_model(base_model, lora_config)

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
model.to(DEVICE)
model.print_trainable_parameters()


tokenizer_config.json:   0%|          | 0.00/52.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/579 [00:00<?, ?B/s]

spm.model:   0%|          | 0.00/2.46M [00:00<?, ?B/s]



pytorch_model.bin:   0%|          | 0.00/371M [00:00<?, ?B/s]

Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


model.safetensors:   0%|          | 0.00/371M [00:00<?, ?B/s]

trainable params: 297,219 || all params: 184,721,670 || trainable%: 0.1609


In [10]:
BATCH_SIZE = 8
MAX_LEN = 512

train_ds = PreferenceDataset(train_df, tokenizer, train=True, swap=True, max_len=MAX_LEN)
train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True)

test_ds = PreferenceDataset(test_df, tokenizer, train=False, swap=False, max_len=MAX_LEN)
test_loader = DataLoader(test_ds, batch_size=BATCH_SIZE)


In [11]:
EPOCHS = 2
LR = 2e-4

optimizer = AdamW(model.parameters(), lr=LR)
scaler = torch.cuda.amp.GradScaler()

model.train()
for epoch in range(EPOCHS):
    total_loss = 0
    for batch in train_loader:
        batch = {k:v.to(DEVICE) for k,v in batch.items()}
        optimizer.zero_grad()
        with torch.cuda.amp.autocast():
            outputs = model(**batch)
            loss = outputs.loss
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        total_loss += loss.item()
    print(f"Epoch {epoch+1} | Loss: {total_loss/len(train_loader):.4f}")


  scaler = torch.cuda.amp.GradScaler()
  with torch.cuda.amp.autocast():


Epoch 1 | Loss: 1.0729
Epoch 2 | Loss: 1.0282


In [13]:
from torch.utils.data import Dataset

class MyDataset(Dataset):
    def __init__(self, dataframe, is_test=False):
        self.df = dataframe
        self.is_test = is_test

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        a, b = row["response_a"], row["response_b"]
        text = f"<PROMPT>{row['prompt']}</PROMPT><ANSWER_A>{a}</ANSWER_A><ANSWER_B>{b}</ANSWER_B>"

        item = {"input_text": text}

        if not self.is_test:
            # Only include labels if not test data
            item["labels"] = row["label"]

        return item

    def __len__(self):
        return len(self.df)


In [14]:
test_dataset = MyDataset(test_df, is_test=True)

test_loader = torch.utils.data.DataLoader(
    test_dataset,
    batch_size=16,  # adjust as needed
    shuffle=False
)


In [18]:
import torch
import numpy as np
from torch.utils.data import Dataset, DataLoader

class TestDataset(Dataset):
    def __init__(self, dataframe, tokenizer, max_length=512):
        self.df = dataframe
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        a, b = row["response_a"], row["response_b"]
        text = f"<PROMPT>{row['prompt']}</PROMPT><ANSWER_A>{a}</ANSWER_A><ANSWER_B>{b}</ANSWER_B>"
        encoding = self.tokenizer(
            text,
            truncation=True,
            padding="max_length",
            max_length=self.max_length,
            return_tensors="pt"
        )
        item = {k: v.squeeze(0) for k, v in encoding.items()}
        return item

    def __len__(self):
        return len(self.df)

test_dataset = TestDataset(test_df, tokenizer, max_length=512)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)

model.eval()
preds = []

with torch.no_grad():
    for batch in test_loader:
        batch = {k: v.to(DEVICE) for k, v in batch.items()}
        with torch.amp.autocast("cuda"):
            outputs = model(**batch)
            logits = outputs.logits
            probs = torch.softmax(logits, dim=-1)
        preds.append(probs.cpu().numpy())

preds = np.vstack(preds)
pred_labels = np.argmax(preds, axis=1)

print("Predictions shape:", preds.shape)
print("Predicted labels shape:", pred_labels.shape)


Predictions shape: (3, 3)
Predicted labels shape: (3,)


In [19]:
submission = pd.DataFrame({
    "id": test_df["id"],
    "winner_model_a": preds[:,0],
    "winner_model_b": preds[:,1],
    "winner_model_tie": preds[:,2]
})

submission.to_csv("submission.csv", index=False)
submission.head()


Unnamed: 0,id,winner_model_a,winner_model_b,winner_model_tie
0,136060,0.314745,0.521483,0.163772
1,211333,0.286959,0.452637,0.260404
2,1233961,0.369682,0.404335,0.225982
