### Model Approach

This notebook trains a multinomial logistic regression classifier to predict which LLM response wins in a head-to-head comparison.  

- **Feature Representation**: The raw conversation text is transformed into numerical features using a TF-IDF vectorizer.  
- **Model**: A multinomial logistic regression model from scikit-learn (`LogisticRegression`) is used to model the probability that a given response wins.  
- **Calibration**: To improve the quality of the probability estimates, the base classifier is wrapped in a `CalibratedClassifierCV` with isotonic regression.  
- **Cross-Validation**: The training data is split into stratified folds. For each fold, the model is trained on the training portion and predictions are generated for the validation portion and the test set. Test predictions are averaged across folds.  
- **Evaluation**: Out-of-fold predictions are used to compute the overall log-loss on the training data, which provides an unbiased estimate of performance.  

This approach provides a simple yet effective baseline for the competition and can be extended with more complex feature representations or models.

In [None]:
# ==============================================
# LLM Classification Finetuning - TFIDF Approach
# ==============================================
# This script uses local TF-IDF + Logistic Regression pipeline.

# ==========================
# Library Imports
# ==========================
import pandas as pd
import numpy as np
from sklearn.model_selection import StratifiedKFold
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.calibration import CalibratedClassifierCV
from sklearn.metrics import log_loss
from scipy.sparse import hstack

# ==========================
# Data Loading
# ==========================
train_df = pd.read_csv('/kaggle/input/llm-classification-finetuning/train.csv')
test_df = pd.read_csv('/kaggle/input/llm-classification-finetuning/test.csv')

# Map one-hot targets to single label (0: model_a, 1: model_b, 2: tie)
target = train_df[['winner_model_a', 'winner_model_b', 'winner_tie']].values
labels = target.argmax(axis=1)

print(f"Training samples: {len(train_df)}, Test samples: {len(test_df)}")

# ==========================
# Text Vectorization
# ==========================
# Combine all responses into a single corpus for fitting the vectorizer
corpus = pd.concat([
    train_df['response_a'].astype(str),
    train_df['response_b'].astype(str),
    test_df['response_a'].astype(str),
    test_df['response_b'].astype(str)
])

# Initialize TF-IDF vectorizer (unigrams + bigrams)
vectorizer = TfidfVectorizer(
    max_features=5000,
    ngram_range=(1, 2),
    stop_words='english'
)
vectorizer.fit(corpus)

# Transform responses
train_a_tfidf = vectorizer.transform(train_df['response_a'].astype(str))
train_b_tfidf = vectorizer.transform(train_df['response_b'].astype(str))
test_a_tfidf = vectorizer.transform(test_df['response_a'].astype(str))
test_b_tfidf = vectorizer.transform(test_df['response_b'].astype(str))

# ==========================
# Feature Engineering
# ==========================
# Difference-based features between model responses
train_diff = train_a_tfidf - train_b_tfidf
train_abs_diff = abs(train_a_tfidf - train_b_tfidf)
test_diff = test_a_tfidf - test_b_tfidf
test_abs_diff = abs(test_a_tfidf - test_b_tfidf)

# Combine features horizontally
X_train = hstack([train_diff, train_abs_diff])
X_test = hstack([test_diff, test_abs_diff])

# ==========================
# Model Training
# ==========================
oof_preds = np.zeros((len(train_df), 3))
test_preds = np.zeros((len(test_df), 3))

n_splits = 5
skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)

for fold, (train_idx, val_idx) in enumerate(skf.split(X_train, labels), 1):
    print(f"\n===== Fold {fold} / {n_splits} =====")
    X_tr, X_val = X_train[train_idx], X_train[val_idx]
    y_tr, y_val = labels[train_idx], labels[val_idx]

    # Multinomial logistic regression with isotonic calibration
    base_clf = LogisticRegression(max_iter=1000, multi_class='multinomial', n_jobs=-1)
    calibrated_clf = CalibratedClassifierCV(base_clf, method='isotonic', cv=3)
    calibrated_clf.fit(X_tr, y_tr)

    # Store out-of-fold and test predictions
    oof_preds[val_idx] = calibrated_clf.predict_proba(X_val)
    test_preds += calibrated_clf.predict_proba(X_test) / n_splits

# ==========================
# Evaluation
# ==========================
oof_logloss = log_loss(labels, oof_preds)
print(f"\nOverall OOF log loss: {oof_logloss:.6f}")

# ==========================
# Submission
# ==========================
submission = pd.DataFrame({
    'id': test_df['id'],
    'winner_model_a': test_preds[:, 0],
    'winner_model_b': test_preds[:, 1],
    'winner_tie':    test_preds[:, 2],
})

print("\nSubmission preview:")
print(submission.head())

submission.to_csv('submission.csv', index=False)
print("\nSaved submission.csv successfully.")