In [25]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import f1_score
from scipy.optimize import minimize
from sklearn.neural_network import MLPClassifier

def process_predictions(file_path):
    df = pd.read_csv(file_path)
    df_melt = df.melt(id_vars='session_id', var_name='question_number', value_name='pred')
    df_melt['new_session_id'] = df_melt.apply(lambda row: f"{row['session_id']}_q{row['question_number']}", axis=1)
    df = df_melt[['new_session_id', 'pred']].copy()
    df['q'] = df['new_session_id'].apply(lambda x: int(x[x.index('_')+2:]))
    df['session'] = df['new_session_id'].apply(lambda x: x.split('_')[0])
    return df

def f1_score_macro_for_thresholds(y_true, y_pred_prob, thresholds):
    y_pred_binary = (y_pred_prob > thresholds).astype(int)
    score = f1_score(y_true, y_pred_binary, average="macro")
    return score

def optimize_thresholds(y_true, y_pred_prob, method="Powell"):
    n_labels = y_pred_prob.shape[1]
    init_thresholds = np.full(n_labels, 0.6)

    objective = lambda thresholds: -f1_score_macro_for_thresholds(y_true, y_pred_prob, thresholds)
    result = minimize(objective, init_thresholds, bounds=[(0, 1)] * n_labels, method=method)

    return result.x

In [26]:
gbt_preds = process_predictions('data/GBT_FE2_predictions.csv')
light_preds = process_predictions('data/lightGBM_FE2_predictions.csv')
mlp_preds = process_predictions('data/MLP_FE2_predictions.csv')
xg_preds = process_predictions('data/XGBoost_FE2_predictions.csv')

In [27]:
ensemble_input = np.stack([
    gbt_preds['pred'].values,
    light_preds['pred'].values,
    mlp_preds['pred'].values,
    xg_preds['pred'].values
], axis=1)

display(ensemble_input)

array([[0.84462607, 0.80328515, 0.72708839, 0.82489628],
       [0.77079618, 0.68866658, 0.73736656, 0.74789894],
       [0.76679516, 0.80268405, 0.72708839, 0.82396626],
       ...,
       [0.94851995, 0.9450226 , 0.9539693 , 0.95010722],
       [0.97005421, 0.9671587 , 0.95127714, 0.9599641 ],
       [0.8423987 , 0.8606421 , 0.9539693 , 0.82868534]])

In [28]:
oof = xg_preds.copy()
oof["pred"] = 0
display(oof)

Unnamed: 0,new_session_id,pred,q,session
0,20090312431273200_q0,0,0,20090312431273200
1,20090312433251036_q0,0,0,20090312433251036
2,20090312455206810_q0,0,0,20090312455206810
3,20090313091715820_q0,0,0,20090313091715820
4,20090313571836404_q0,0,0,20090313571836404
...,...,...,...,...
424111,22100215342220508_q17,0,17,22100215342220508
424112,22100215460321130_q17,0,17,22100215460321130
424113,22100217104993650_q17,0,17,22100217104993650
424114,22100219442786200_q17,0,17,22100219442786200


In [29]:
true = pd.read_csv('data/true.csv')
true_melt = true.melt(id_vars='session_id', var_name='question_number', value_name='correct')
correct_labels = true_melt["correct"]
correct_labels
y_true = true.drop(columns=['session_id'])

In [30]:
lr_models = []
mlp_models = []
lr_preds = xg_preds['pred'].copy()
MLP_preds = xg_preds['pred'].copy()
final_preds = xg_preds['pred'].copy()

In [31]:
for q in range(0,18):
    idx = oof[oof["q"] == q]
    idx = idx.index
    _ensemble_input = ensemble_input[idx]
    _target = correct_labels[idx]

    lr_model = LogisticRegression(random_state=42)
    lr_model.fit(_ensemble_input, _target)
    lr_models.append(lr_model)

    mlp = MLPClassifier(hidden_layer_sizes=(10,), max_iter=500, random_state=42)
    mlp.fit(_ensemble_input, _target)
    mlp_models.append(mlp)

    lr_p = lr_model.predict_proba(_ensemble_input)[:, 1]
    mlp_p = mlp.predict_proba(_ensemble_input)[:, 1]
    final_p = (lr_p + mlp_p) / 2

    lr_preds[idx] = lr_p
    MLP_preds[idx] = mlp_p
    final_preds[idx] = final_p

In [33]:
lr_preds = lr_preds.to_numpy().reshape(-1, 1).astype("float32")
thr = optimize_thresholds(correct_labels, lr_preds, "Powell")
f1 = f1_score_macro_for_thresholds(correct_labels, lr_preds, thr)

print('thrs: ', thr)
print('Avg F1 Score:', np.mean(f1))

thrs:  [0.63385304]
Avg F1 Score: 0.6807556088021515


In [34]:
MLP_preds = MLP_preds.to_numpy().reshape(-1, 1).astype("float32")
thr = optimize_thresholds(correct_labels, MLP_preds, "Powell")
f1 = f1_score_macro_for_thresholds(correct_labels, MLP_preds, thr)

print('thrs: ', thr)
print('Avg F1 Score:', np.mean(f1))

thrs:  [0.62926782]
Avg F1 Score: 0.6805403552810034


In [35]:
final_preds = final_preds.to_numpy().reshape(-1, 1).astype("float32")
thr = optimize_thresholds(correct_labels, final_preds, "Powell")
f1 = f1_score_macro_for_thresholds(correct_labels, final_preds, thr)

print('thrs: ', thr)
print('Avg F1 Score:', np.mean(f1))

thrs:  [0.6266311]
Avg F1 Score: 0.6807592001381572
