In [None]:
# 1. Imports
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split

# 2. Load Data
train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')

# 3. Handle Missing Values
for col in ["prompt", "response_a", "response_b"]:
    train[col] = train[col].fillna("")
    test[col] = test[col].fillna("")

# 4. Prepare Labels
def get_winner(row):
    if row.get('winner_model_a', 0) == 1:
        return 0  # A
    if row.get('winner_model_b', 0) == 1:
        return 1  # B
    if row.get('winner_tie', 0) == 1:
        return 2  # Tie
    return -1    # Unknown

train['winner'] = train.apply(get_winner, axis=1)
train = train[train['winner'] != -1]  # Remove unknowns

# 5. Feature Engineering (TF-IDF)
vectorizer_prompt = TfidfVectorizer(max_features=150)
vectorizer_response_a = TfidfVectorizer(max_features=150)
vectorizer_response_b = TfidfVectorizer(max_features=150)

X_prompt = vectorizer_prompt.fit_transform(train["prompt"])
X_a = vectorizer_response_a.fit_transform(train["response_a"])
X_b = vectorizer_response_b.fit_transform(train["response_b"])

X = np.hstack([X_prompt.toarray(), X_a.toarray(), X_b.toarray()])
y = train["winner"].values

# 6. Train/Test Split (optional, for validation)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# 7. Train Model
model = LogisticRegression(max_iter=500, solver='saga')
model.fit(X_train, y_train)

# 8. Prepare Test Features
test_prompt = vectorizer_prompt.transform(test["prompt"])
test_a = vectorizer_response_a.transform(test["response_a"])
test_b = vectorizer_response_b.transform(test["response_b"])
X_test = np.hstack([test_prompt.toarray(), test_a.toarray(), test_b.toarray()])

# 9. Predict
probs = model.predict_proba(X_test)
preds = model.predict(X_test)

# 10. Map predictions to user choice
# 0: A, 1: B, 2: Tie
choice_map = {0: "A", 1: "B", 2: "Tie"}
test['user_choice'] = [choice_map[p] for p in preds]