In [1]:
import pandas as pd
import numpy as np
import pickle
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import f1_score
from scipy.optimize import minimize
from sklearn.neural_network import MLPClassifier

def process_predictions(file_path):
    """
    Processes predictions stored in a CSV file.
    
    - Loads the DataFrame from the given CSV file.
    - Transforms the DataFrame from wide to long format using 'melt'.
    - Generates a new 'new_session_id' by combining 'session_id' and 'question_number'.
    - Extracts the 'question_number' and 'session_id' from 'new_session_id' as new columns.
    - Returns the processed DataFrame with 'new_session_id', 'pred', 'q', and 'session'.

    Parameters:
    file_path (str): The path to the CSV file containing the predictions data.

    Returns:
    pandas.DataFrame: Processed DataFrame with relevant columns.
    """
    df = pd.read_csv(file_path)
    df_melt = df.melt(id_vars='session_id', var_name='question_number', value_name='pred')
    df_melt['new_session_id'] = df_melt.apply(lambda row: f"{row['session_id']}_q{row['question_number']}", axis=1)
    df = df_melt[['new_session_id', 'pred']].copy()
    df['q'] = df['new_session_id'].apply(lambda x: int(x[x.index('_')+2:]))
    df['session'] = df['new_session_id'].apply(lambda x: x.split('_')[0])
    return df

def f1_score_macro_for_thresholds(y_true, y_pred_prob, thresholds):
    """
    Calculate macro-averaged F1 score for binary classification at different probability thresholds.

    Parameters:
    y_true (numpy.ndarray): The true binary labels.
    y_pred_prob (numpy.ndarray): The predicted probabilities for the positive class.
    thresholds (numpy.ndarray): An array of probability thresholds to apply.

    Returns:
    float: Macro-averaged F1 score for each threshold.
    """
    y_pred_binary = (y_pred_prob > thresholds).astype(int)
    score = f1_score(y_true, y_pred_binary, average="macro")
    return score

def optimize_thresholds(y_true, y_pred_prob, method="Powell"):
    """
    Optimize probability thresholds for binary classification to maximize the macro-averaged F1 score.

    Parameters:
    y_true (numpy.ndarray): The true binary labels.
    y_pred_prob (numpy.ndarray): The predicted probabilities for the positive class.
    method (str, optional): The optimization method to use (default is "Powell").

    Returns:
    numpy.ndarray: Optimal probability thresholds for each label.
    """
    n_labels = y_pred_prob.shape[1]
    init_thresholds = np.full(n_labels, 0.6)

    objective = lambda thresholds: -f1_score_macro_for_thresholds(y_true, y_pred_prob, thresholds)
    result = minimize(objective, init_thresholds, bounds=[(0, 1)] * n_labels, method=method)

    return result.x

In [2]:
# Load the predictions
gbt_preds = process_predictions('data/GBT_FE1_predictions.csv')
light_preds = process_predictions('data/lightGBM_FE1_predictions.csv')
mlp_preds = process_predictions('data/MLP_FE1_predictions.csv')
xg_preds = process_predictions('data/XGBoost_FE1_predictions.csv')

In [3]:
# Stack the predictions
ensemble_input = np.stack([
    gbt_preds['pred'].values,
    light_preds['pred'].values,
    mlp_preds['pred'].values,
    xg_preds['pred'].values
], axis=1)

display(ensemble_input)

array([[0.72025532, 0.74339731, 0.73632896, 0.72240227],
       [0.82191908, 0.7690637 , 0.73775929, 0.80756241],
       [0.79695243, 0.77578528, 0.73632896, 0.77131236],
       ...,
       [0.96105742, 0.95361682, 0.94810164, 0.94596398],
       [0.97004759, 0.97108219, 0.94344366, 0.95030183],
       [0.9036718 , 0.90467326, 0.94810164, 0.86856925]])

In [4]:
oof = xg_preds.copy()
oof["pred"] = 0
display(oof)

Unnamed: 0,new_session_id,pred,q,session
0,20090312431273200_q0,0,0,20090312431273200
1,20090312433251036_q0,0,0,20090312433251036
2,20090312455206810_q0,0,0,20090312455206810
3,20090313091715820_q0,0,0,20090313091715820
4,20090313571836404_q0,0,0,20090313571836404
...,...,...,...,...
424111,22100215342220508_q17,0,17,22100215342220508
424112,22100215460321130_q17,0,17,22100215460321130
424113,22100217104993650_q17,0,17,22100217104993650
424114,22100219442786200_q17,0,17,22100219442786200


In [5]:
# Load the true labels
true = pd.read_csv('data/true.csv')
true_melt = true.melt(id_vars='session_id', var_name='question_number', value_name='correct')
correct_labels = true_melt["correct"]
correct_labels

In [6]:
lr_models = []
mlp_models = []
lr_preds = xg_preds['pred'].copy()
MLP_preds = xg_preds['pred'].copy()
final_preds = xg_preds['pred'].copy()

In [7]:
# Train the models with each question
for q in range(0,18):
    idx = oof[oof["q"] == q]
    idx = idx.index
    _ensemble_input = ensemble_input[idx]
    _target = correct_labels[idx]

    lr_model = LogisticRegression(random_state=42)
    lr_model.fit(_ensemble_input, _target)
    lr_models.append(lr_model)

    mlp = MLPClassifier(hidden_layer_sizes=(10,), max_iter=500, random_state=42)
    mlp.fit(_ensemble_input, _target)
    mlp_models.append(mlp)

    lr_p = lr_model.predict_proba(_ensemble_input)[:, 1]
    mlp_p = mlp.predict_proba(_ensemble_input)[:, 1]
    final_p = (lr_p + mlp_p) / 2

    lr_preds[idx] = lr_p
    MLP_preds[idx] = mlp_p
    final_preds[idx] = final_p

In [None]:
# Save the models in pickle
with open("data/LR_FE1_models.pkl", "wb") as f:
    pickle.dump(lr_models, f)
with open("data/MLP_for_STACK_FE1_models.pkl", "wb") as f:
    pickle.dump(mlp_models, f)

In [8]:
# Print the results of LR
lr_preds = lr_preds.to_numpy().reshape(-1, 1).astype("float32")
thr = optimize_thresholds(correct_labels, lr_preds, "Powell")
f1 = f1_score_macro_for_thresholds(correct_labels, lr_preds, thr)

print('thrs: ', thr)
print('Avg F1 Score:', np.mean(f1))

thrs:  [0.63222912]
Avg F1 Score: 0.6730503782258368


In [9]:
# Print the results of MLP
MLP_preds = MLP_preds.to_numpy().reshape(-1, 1).astype("float32")
thr = optimize_thresholds(correct_labels, MLP_preds, "Powell")
f1 = f1_score_macro_for_thresholds(correct_labels, MLP_preds, thr)

print('thrs: ', thr)
print('Avg F1 Score:', np.mean(f1))

thrs:  [0.63086521]
Avg F1 Score: 0.672959947099705


In [10]:
# Print the results of LR and MLP in average
final_preds = final_preds.to_numpy().reshape(-1, 1).astype("float32")
thr = optimize_thresholds(correct_labels, final_preds, "Powell")
f1 = f1_score_macro_for_thresholds(correct_labels, final_preds, thr)

print('thrs: ', thr)
print('Avg F1 Score:', np.mean(f1))

thrs:  [0.63238657]
Avg F1 Score: 0.673147537955282
