In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import warnings
import joblib
warnings.filterwarnings("ignore", category=RuntimeWarning)

train = pd.read_csv("/kaggle/input/llm-classification-finetuning/train.csv")
test  = pd.read_csv("/kaggle/input/llm-classification-finetuning/test.csv")

def encode_label(row):
    if row["winner_model_a"] == 1:
        return 0
    elif row["winner_model_b"] == 1:
        return 1
    else:
        return 2

def style_counts(text):
    ex = text.count("!")
    qm = text.count("?")
    co = text.count(",")
    return ex, qm, co

def lexical_diversity(text):
    words = [w.lower() for w in text.split() if w.isalpha()]
    return len(set(words)) / (len(words) + 1e-6)

train["win_label"] = train.apply(encode_label, axis=1)

a_win_rate = (train["win_label"] == 0).mean()
b_win_rate = (train["win_label"] == 1).mean()
tie_rate  = (train["win_label"] == 2).mean()

rate_df = pd.DataFrame({
    "Outcome": ["A_win", "B_win", "Tie"],
    "Rate": [a_win_rate, b_win_rate, tie_rate]
})

sns.set(style="whitegrid", font_scale=1.1)
plt.figure(figsize=(5,4))
sns.barplot(x="Outcome", y="Rate", data=rate_df,
            palette=["#4A90E2", "#E94E77", "#F5C518"])
plt.title("Position Bias â€” Overall Outcome Distribution", fontsize=13)
plt.ylabel("Proportion")
plt.ylim(0,1)
for container in plt.gca().containers:
    plt.bar_label(container, fmt="%.2f", fontsize=11)
plt.tight_layout()
plt.show()

# -----------------------------------------------------------------#

train["a_len"] = train["response_a"].astype(str).apply(len)
train["b_len"] = train["response_b"].astype(str).apply(len)
train["a_word_cnt"] = train["response_a"].astype(str).apply(lambda x: len(x.split()))
train["b_word_cnt"] = train["response_b"].astype(str).apply(lambda x: len(x.split()))
train["a_sent_cnt"] = train["response_a"].astype(str).apply(lambda x: x.count(".")+x.count("!")+x.count("?"))
train["b_sent_cnt"] = train["response_b"].astype(str).apply(lambda x: x.count(".")+x.count("!")+x.count("?"))

train["diff_char_len"] = train["a_len"] - train["b_len"]
train["diff_word_cnt"] = train["a_word_cnt"] - train["b_word_cnt"]
train["diff_sent_cnt"] = train["a_sent_cnt"] - train["b_sent_cnt"]

# -----------------------------------------------------------------#

a_style = train["response_a"].astype(str).apply(style_counts).apply(pd.Series)
b_style = train["response_b"].astype(str).apply(style_counts).apply(pd.Series)
a_style.columns = ["a_exclam", "a_qmark", "a_commas"]
b_style.columns = ["b_exclam", "b_qmark", "b_commas"]
train = pd.concat([train, a_style, b_style], axis=1)

for col in ["exclam", "qmark", "commas"]:
    train[f"diff_{col}"] = train[f"a_{col}"] - train[f"b_{col}"]

# -----------------------------------------------------------------#

train["a_lexdiv"] = train["response_a"].astype(str).apply(lexical_diversity)
train["b_lexdiv"] = train["response_b"].astype(str).apply(lexical_diversity)
train["diff_lexdiv"] = train["a_lexdiv"] - train["b_lexdiv"]

# -----------------------------------------------------------------#

sia = joblib.load("/kaggle/input/vader-model/vader_model.pkl")

train["a_sentiment"] = train["response_a"].apply(lambda x: sia.polarity_scores(x)["compound"])
train["b_sentiment"] = train["response_b"].apply(lambda x: sia.polarity_scores(x)["compound"])
train["diff_sentiment"] = train["a_sentiment"] - train["b_sentiment"]

# -----------------------------------------------------------------#

def outcome_rates(df, feature, bins=10):
    df = df.copy()
    df["bin"] = pd.qcut(df[feature], q=bins, duplicates="drop")
    grouped = (
    df.groupby("bin", observed=True)["win_label"]
      .value_counts(normalize=True)
      .unstack()
      .fillna(0)
    )
    grouped.columns = ["A_win", "B_win", "Tie"]
    grouped["bin_mid"] = grouped.index.map(lambda b: (b.left + b.right)/2 if hasattr(b, "left") else np.nan)
    return grouped.reset_index()

verbosity_feats = ["diff_char_len","diff_word_cnt","diff_sent_cnt"]
style_feats = ["diff_exclam", "diff_qmark", "diff_commas", "diff_sentiment", "diff_lexdiv"]

sns.set(style="whitegrid")
total_plots = len(verbosity_feats) + len(style_feats)
cols = (total_plots + 1) // 2

plt.figure(figsize=(15,8))

for i, f in enumerate(verbosity_feats, 1):
    plt.subplot(2, cols, i)
    wr = outcome_rates(train, f)
    plt.plot(wr["bin_mid"], wr["A_win"], color="#4A90E2", label="A win")
    plt.plot(wr["bin_mid"], wr["B_win"], color="#E94E77", label="B win")
    plt.plot(wr["bin_mid"], wr["Tie"],   color="#F5C518", label="Tie")
    plt.title(f"Verbosity: {f}")
    plt.xlabel(f); plt.ylabel("Proportion"); plt.legend()

for j, f in enumerate(style_feats, 1):
    idx = len(verbosity_feats) + j
    plt.subplot(2, cols, idx)
    wr = outcome_rates(train, f)
    plt.plot(wr["bin_mid"], wr["A_win"], color="#4A90E2", label="A win")
    plt.plot(wr["bin_mid"], wr["B_win"], color="#E94E77", label="B win")
    plt.plot(wr["bin_mid"], wr["Tie"],   color="#F5C518", label="Tie")
    plt.title(f"Style: {f}")
    plt.xlabel(f); plt.ylabel("Proportion"); plt.legend()

plt.tight_layout()
plt.show()

# -----------------------------------------------------------------#

train["A_win"] = (train["win_label"] == 0).astype(int)
train["B_win"] = (train["win_label"] == 1).astype(int)
train["Tie"]   = (train["win_label"] == 2).astype(int)

corr_df = {}
for col in ["A_win","B_win","Tie"]:
    corr_df[col] = train[verbosity_feats + style_feats + [col]].corr()[col]

corr_df = pd.DataFrame(corr_df)
print("\n Correlation of features with each outcome:")
print(corr_df.sort_index())