In [1]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import joblib
from sklearn.preprocessing import LabelEncoder
import lightgbm as lgb

In [60]:
TARGET_SUB = "./qwen3_eval_fold3.csv"
RERANKER_PATH = "./v2/lgbm_ranker_f3.pkl"
le = joblib.load("./label_encoder.joblib")
TOP_K = 3
FOLD_IDX = 3

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


In [None]:
def predict_topk(df_bgm, ranker, le, k=3):
    # Build candidate table in batch (no labels needed)
    rows = []
    for idx, row in df_bgm.iterrows():
        for class_name in le.classes_:
            rows.append({
                "qid": row["row_id"],
                "QuestionId": row["QuestionId"],
                "is_correct": row["is_correct"],
                "candidate": class_name,
                "rank_feature": row[f"{class_name}_rank"],
                "prob_feature": row[f"{class_name}_prob"],
            })
    cand_df = pd.DataFrame(rows)

    # Cast categorical if needed
    cand_df["QuestionId"] = cand_df["QuestionId"].astype("category")
    cand_df["candidate"] = cand_df["candidate"].astype("category")

    # Predict in one shot
    features = ["QuestionId", "candidate", "is_correct", "rank_feature", "prob_feature"]
    # features = ["QuestionId", "candidate", "is_correct", "prob_feature"]
    cand_df["score"] = ranker.predict(cand_df[features])

    # For each query, sort by score and take top-k
    topk_df = (
        cand_df.sort_values(["qid", "score"], ascending=[True, False])
               .groupby("qid")["candidate"].apply(lambda s: list(s.head(k)))
    )
    
    result_df = (
        df_bgm[["row_id", "is_correct"]]
        .merge(topk_df, left_on="row_id", right_on="qid", how="left")
    )

    return result_df

df_target = pd.read_csv(TARGET_SUB)

# Comment later
from sklearn.model_selection import StratifiedKFold
df_target["Misconception"] = df_target["Misconception"].fillna("NA")
df_target["misconception_target"] = df_target["Category"].apply(lambda x: x.split("_")[-1]+":") + df_target["Misconception"]
df_target["split_key"] = (df_target['QuestionId'].astype(str) + "_" + df_target['misconception_target'].astype(str)).astype('category').cat.codes
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
fold_indices = list(skf.split(df_target, df_target['split_key']))
tr_idx, va_idx = fold_indices[FOLD_IDX]
df_tr, df_va = df_target.iloc[tr_idx].copy(), df_target.iloc[va_idx].copy()
df_target = df_tr.copy()
df_target = df_target.sample(frac=1, random_state=42) 
# comment above later

df_target = df_target[["row_id", "QuestionId", "is_correct", "Pred_Targets", "Corresponding Probs"]]
df_target["Pred_Targets_list"] = df_target["Pred_Targets"].str.split("|")
df_target["Pred_Probs_list"] = df_target["Corresponding Probs"].str.split("|").apply(lambda x: [float(p) for p in x])
for class_name in le.classes_:
    df_target[f"{class_name}_rank"] = df_target.apply(lambda x: x["Pred_Targets_list"].index(class_name), axis=1)
    df_target[f"{class_name}_prob"] = df_target.apply(lambda x: x["Pred_Probs_list"][x[f"{class_name}_rank"]], axis=1)

ranker = joblib.load(RERANKER_PATH)
df_target = predict_topk(df_target, ranker, le, k=TOP_K)

def add_prefix(lbl, flag):
    prefix = "True_" if int(flag) == 1 else "False_"
    # lbl is like "Algebra:Mis-X"
    return f"{prefix}{lbl}"
    
is_corr = df_target['is_correct'].astype(int).values  # 1 or 0
df_target["Category:Misconception"] = df_target.apply(lambda x: ' '.join([ add_prefix(tmp, x['is_correct']) for tmp in x["candidate"] ]), axis=1)
df_target = df_target[["row_id", "Category:Misconception"]].copy()


In [None]:
df_target

In [None]:
final = df_tr.merge(df_target, on="row_id", how="left")
gtruth = final["Category"] + ":" + final["Misconception"].fillna("NA")
pred = final["Category:Misconception"].apply(lambda x: x.split(" "))
def mapk(actual, predicted, k=3):
    score = 0.0
    for a, p in zip(actual, predicted):
        try:
            idx = p[:k].index(a)
            score += 1.0 / (idx + 1)
        except ValueError:
            continue
    return score / len(actual)

mapk(gtruth, pred)