In [1]:
# =========================
# LLM Classification Finetuning — baseline submission
# TF-IDF + Logistic Regression (multiclass softmax)
# =========================

import pandas as pd
from sklearn.pipeline import Pipeline
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression

# 1) Load data
train = pd.read_csv('/kaggle/input/llm-classification-finetuning/train.csv')
test  = pd.read_csv('/kaggle/input/llm-classification-finetuning/test.csv')

# 2) Build training labels from the one-hot targets
target_cols = ['winner_model_a', 'winner_model_b', 'winner_tie']
# y is string labels: 'winner_model_a' | 'winner_model_b' | 'winner_tie'
y = train[target_cols].idxmax(axis=1)

# 3) Text features (same pattern you used)
train_text = train['prompt'] + ' [SEP] ' + train['response_a'] + ' [VS] ' + train['response_b']
test_text  = test['prompt']  + ' [SEP] ' + test['response_a']  + ' [VS] ' + test['response_b']

# 4) Model: TF-IDF + multinomial LR for calibrated class probabilities
pipe = Pipeline([
    ('tfidf', TfidfVectorizer(max_features=10000, ngram_range=(1,2))),
    ('clf', LogisticRegression(max_iter=2000, multi_class='multinomial', solver='lbfgs'))
])

pipe.fit(train_text, y)

# 5) Predict probabilities for each of the 3 classes
proba = pipe.predict_proba(test_text)  # shape: (n_test, 3)

# Map pipeline's class order to the required submission columns
class_order = list(pipe.named_steps['clf'].classes_)  # e.g. ['winner_model_a','winner_model_b','winner_tie']
proba_df = pd.DataFrame(proba, columns=class_order, index=test.index)

# Ensure all required columns exist and are ordered correctly
for col in target_cols:
    if col not in proba_df.columns:
        proba_df[col] = 0.0  # (shouldn't happen, but keeps it robust)

proba_df = proba_df[target_cols]  # reorder to winner_model_a, winner_model_b, winner_tie

# 6) Build and save submission
submission = pd.concat([test['id'], proba_df], axis=1)
submission.to_csv('/kaggle/working/submission.csv', index=False)

print("✅ Saved /kaggle/working/submission.csv")
submission.head()


✅ Saved /kaggle/working/submission.csv


Unnamed: 0,id,winner_model_a,winner_model_b,winner_tie
0,136060,0.159723,0.335464,0.504813
1,211333,0.441432,0.25216,0.306408
2,1233961,0.339436,0.487675,0.17289
