In [1]:
# ==============================
# Load Libraries
# ==============================
import numpy as np
import pandas as pd
import os

# ==============================
# Show input files
# ==============================
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# ==============================
# Load dataset
# ==============================
train = pd.read_csv("/kaggle/input/llm-classification-finetuning/train.csv")
test = pd.read_csv("/kaggle/input/llm-classification-finetuning/test.csv")

print("Train shape:", train.shape)
print("Test shape:", test.shape)

# ==============================
# Combine prompt + responses
# ==============================
def combine_text(df):
    return (
        df["prompt"].fillna("") + " " +
        df["response_a"].fillna("") + " " +
        df["response_b"].fillna("")
    )

train["text"] = combine_text(train)
test["text"] = combine_text(test)

# ==============================
# Create labels
# ==============================
train["label"] = np.argmax(
    train[["winner_model_a", "winner_model_b", "winner_tie"]].values,
    axis=1
)

# ==============================
# TF-IDF Features
# ==============================
from sklearn.feature_extraction.text import TfidfVectorizer

vectorizer = TfidfVectorizer(
    max_features=50000,
    ngram_range=(1,2),
    stop_words="english"
)

X_train = vectorizer.fit_transform(train["text"])
X_test = vectorizer.transform(test["text"])
y_train = train["label"]

# ==============================
# Train Model
# ==============================
from sklearn.linear_model import LogisticRegression

model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

# ==============================
# Predictions
# ==============================
probs = model.predict_proba(X_test)

# ==============================
# Create Submission File
# ==============================
submission = pd.DataFrame({
    "id": test["id"],
    "winner_model_a": probs[:, 0],
    "winner_model_b": probs[:, 1],
    "winner_tie": probs[:, 2]
})

submission.to_csv("submission.csv", index=False)

print("Submission file created!")
submission.head()


/kaggle/input/llm-classification-finetuning/sample_submission.csv
/kaggle/input/llm-classification-finetuning/train.csv
/kaggle/input/llm-classification-finetuning/test.csv
Train shape: (57477, 9)
Test shape: (3, 4)
Submission file created!


Unnamed: 0,id,winner_model_a,winner_model_b,winner_tie
0,136060,0.213132,0.411697,0.375171
1,211333,0.432757,0.255139,0.312104
2,1233961,0.350302,0.505996,0.143702
