In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/llm-classification-finetuning/sample_submission.csv
/kaggle/input/llm-classification-finetuning/train.csv
/kaggle/input/llm-classification-finetuning/test.csv


In [2]:
# === 1. Imports ===
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import log_loss
import lightgbm as lgb

# === 2. Load data directly from Kaggle input ===
train = pd.read_csv("/kaggle/input/llm-classification-finetuning/train.csv")
test = pd.read_csv("/kaggle/input/llm-classification-finetuning/test.csv")
sample_sub = pd.read_csv("/kaggle/input/llm-classification-finetuning/sample_submission.csv")

# === 3. Label encoding ===
def label_from_row(r):
    if r["winner_model_a"] == 1:
        return 0
    elif r["winner_model_b"] == 1:
        return 1
    else:
        return 2

train["label"] = train.apply(label_from_row, axis=1)

# === 4. Clean text ===
def tidy(s):
    if pd.isna(s):
        return ""
    s = str(s)
    if s.startswith('["') and s.endswith('"]'):
        s = s[2:-2]
    return s.strip()

train["text"] = (
    "PROMPT: " + train["prompt"].apply(tidy) +
    " RESP_A: " + train["response_a"].apply(tidy) +
    " RESP_B: " + train["response_b"].apply(tidy)
)

test["text"] = (
    "PROMPT: " + test["prompt"].apply(tidy) +
    " RESP_A: " + test["response_a"].apply(tidy) +
    " RESP_B: " + test["response_b"].apply(tidy)
)

# === 5. TF-IDF features ===
tfidf = TfidfVectorizer(ngram_range=(1, 2), max_features=20000)
X_all = tfidf.fit_transform(train["text"])
X_test = tfidf.transform(test["text"])
y_all = train["label"].values

# === 6. Train/validation split ===
X_train, X_val, y_train, y_val = train_test_split(
    X_all, y_all, test_size=0.15, random_state=42, stratify=y_all
)

# === 7. LightGBM model ===
dtrain = lgb.Dataset(X_train, label=y_train)
dval = lgb.Dataset(X_val, label=y_val, reference=dtrain)

params = {
    "objective": "multiclass",
    "num_class": 3,
    "metric": "multi_logloss",
    "learning_rate": 0.2,
    "verbosity": -1,
    "num_threads": 4,
    "seed": 42
}

bst = lgb.train(
    params,
    dtrain,
    valid_sets=[dtrain, dval],
    num_boost_round=150,
    callbacks=[
        lgb.early_stopping(stopping_rounds=20),
        lgb.log_evaluation(20)
    ]
)

# === 8. Validation log loss ===
val_pred = bst.predict(X_val, num_iteration=bst.best_iteration)
val_logloss = log_loss(y_val, val_pred, labels=[0, 1, 2])
print(f"Validation multi_logloss: {val_logloss:.6f}")

# === 9. Retrain on full data ===
dfull = lgb.Dataset(X_all, label=y_all)
bst_full = lgb.train(params, dfull, num_boost_round=bst.best_iteration)

# === 10. Predict test set ===
test_probs = bst_full.predict(X_test, num_iteration=bst_full.best_iteration)

# === 11. Prepare submission ===
sub = pd.DataFrame(test_probs, columns=["winner_model_a", "winner_model_b", "winner_tie"])
sub.insert(0, "id", test["id"].values)
sub.to_csv("submission.csv", index=False)
print("Submission saved to submission.csv")


Training until validation scores don't improve for 20 rounds
[20]	training's multi_logloss: 1.00279	valid_1's multi_logloss: 1.07201
Early stopping, best iteration is:
[15]	training's multi_logloss: 1.01858	valid_1's multi_logloss: 1.07072
Validation multi_logloss: 1.070717
Submission saved to submission.csv
