In [1]:
import pandas as pd

pd.options.display.float_format = "{:,.2f}".format


def rename_models(df: pd.DataFrame) -> pd.DataFrame:
    # Rename models
    df = df.copy()
    df["Model"] = df["Model"].apply(
        lambda x: {
            "Original": "RF Original",
            "Augmented": "RF Augmented",
            "Sequence": "Sequence Original",
        }.get(x)
        or x
    )
    return df

In [2]:
def split_metrics_by_model(df: pd.DataFrame) -> dict[str, pd.DataFrame]:
    cols_to_drop = set(df.columns) & {"Trial", "Best Prams"}
    if cols_to_drop:
        df = df.drop(columns=cols_to_drop)

    # Split metrics by model
    metrics = df.groupby("Model")
    # return {model: metrics.get_group(model) for model in metrics.groups}

    ret = {}
    for model in metrics.groups:
        model_df = metrics.get_group(model)
        model_df = model_df.drop(columns=["Model"])

        model_mean_df = model_df.groupby("Chapter").mean().reset_index()

        # Transpose and use the "Chapter" column as the index
        model_mean_df = model_mean_df.set_index("Chapter").T
        ret[model] = model_mean_df

    return ret

# UCSD

In [58]:
ucsd_df = pd.read_csv("metrics/ucsd_metrics_finetune.csv")
ucsd_df = rename_models(ucsd_df)

In [59]:
ucsd_metrics = split_metrics_by_model(ucsd_df)

In [9]:
def print_comparison_table(metrics: dict[str, pd.DataFrame]):
    print("RF Augmented vs. RF Original")
    rf_compare = (
        metrics["RF Augmented"].round(2).astype(str)
        + " ("
        + metrics["RF Original"].round(2).astype(str)
        + ")"
    )
    display(rf_compare)

    print("Sequence Augmented vs. Sequence Original")
    seq_compare = (
        metrics["Sequence Augmented"].round(2).astype(str)
        + " ("
        + metrics["Sequence Original"].round(2).astype(str)
        + ")"
    )
    display(seq_compare)

    # Chapter Ensemble
    print("Chapter Ensemble")
    display(metrics["Chapter Ensemble"].round(2).astype(str))

    # If "Reading Related" or "Performance Related" exists, print them
    if "Reading Related" in metrics and "Performance Related" in metrics:
        print("\n\n ------------ Feature Category Experiments ------------")
        print("Sequence Augmented")
        display(metrics["Sequence Augmented"].round(2).astype(str))

        print("Reading Related")
        display(metrics["Reading Related"].round(2).astype(str))

        print("Performance Related")
        display(metrics["Performance Related"].round(2).astype(str))

In [73]:
print_comparison_table(ucsd_metrics)

RF Augmented vs. RF Original


Chapter,1,2,3,4,5,6,7,8,9,10,11,12
Precision,0.59 (0.0),0.66 (0.0),0.71 (0.0),0.69 (0.0),0.69 (0.0),0.69 (0.0),0.71 (0.13),0.72 (0.9),0.74 (0.96),0.74 (0.83),0.73 (0.8),0.74 (0.77)
Recall,0.28 (0.0),0.49 (0.0),0.46 (0.0),0.47 (0.0),0.48 (0.0),0.48 (0.0),0.5 (0.01),0.53 (0.22),0.53 (0.31),0.55 (0.49),0.55 (0.58),0.55 (0.61)
ROC_AUC,0.62 (0.54),0.74 (0.67),0.75 (0.67),0.76 (0.68),0.78 (0.71),0.79 (0.7),0.8 (0.72),0.82 (0.77),0.84 (0.83),0.85 (0.86),0.85 (0.86),0.85 (0.87)
PR_AUC,0.54 (0.45),0.7 (0.59),0.7 (0.6),0.71 (0.62),0.73 (0.62),0.74 (0.61),0.75 (0.66),0.78 (0.74),0.79 (0.78),0.8 (0.81),0.8 (0.82),0.8 (0.82)
Accuracy,0.66 (0.64),0.72 (0.64),0.73 (0.64),0.73 (0.64),0.73 (0.64),0.73 (0.64),0.74 (0.64),0.75 (0.72),0.76 (0.75),0.76 (0.78),0.76 (0.79),0.76 (0.79)
F1,0.37 (0.0),0.55 (0.0),0.55 (0.0),0.55 (0.0),0.56 (0.0),0.56 (0.0),0.57 (0.02),0.6 (0.34),0.61 (0.46),0.62 (0.61),0.62 (0.66),0.62 (0.67)


Sequence Augmented vs. Sequence Original


Chapter,1,2,3,4,5,6,7,8,9,10,11,12
Precision,0.49 (0.13),0.76 (0.12),0.69 (0.14),0.66 (0.16),0.63 (0.17),0.61 (0.24),0.64 (0.28),0.73 (0.43),0.76 (0.57),0.76 (0.7),0.72 (0.74),0.74 (0.74)
Recall,0.05 (0.07),0.23 (0.07),0.29 (0.06),0.33 (0.09),0.37 (0.08),0.37 (0.15),0.39 (0.17),0.47 (0.25),0.55 (0.35),0.62 (0.47),0.64 (0.61),0.66 (0.71)
ROC_AUC,0.62 (0.51),0.75 (0.53),0.75 (0.54),0.77 (0.6),0.79 (0.58),0.79 (0.61),0.81 (0.64),0.82 (0.69),0.85 (0.76),0.87 (0.8),0.87 (0.85),0.87 (0.9)
PR_AUC,0.56 (0.43),0.71 (0.48),0.72 (0.49),0.73 (0.53),0.73 (0.52),0.74 (0.53),0.76 (0.57),0.77 (0.62),0.8 (0.69),0.81 (0.73),0.8 (0.79),0.83 (0.87)
Accuracy,0.66 (0.62),0.71 (0.61),0.72 (0.61),0.72 (0.63),0.72 (0.61),0.72 (0.64),0.72 (0.64),0.74 (0.65),0.76 (0.7),0.78 (0.73),0.77 (0.76),0.78 (0.78)
F1,0.09 (0.08),0.34 (0.07),0.39 (0.07),0.42 (0.1),0.45 (0.1),0.45 (0.16),0.46 (0.19),0.55 (0.28),0.61 (0.39),0.67 (0.52),0.67 (0.62),0.68 (0.7)


Chapter Ensemble


Chapter,1,2,3,4,5,6,7,8,9,10,11,12
Precision,0.59,0.7,0.7,0.73,0.7,0.73,0.74,0.78,0.76,0.79,0.79,0.77
Recall,0.26,0.45,0.41,0.44,0.43,0.45,0.46,0.49,0.57,0.59,0.61,0.61
ROC_AUC,0.63,0.76,0.74,0.75,0.77,0.78,0.79,0.81,0.86,0.88,0.87,0.87
PR_AUC,0.55,0.7,0.69,0.7,0.71,0.73,0.75,0.78,0.82,0.83,0.83,0.82
Accuracy,0.66,0.73,0.72,0.74,0.72,0.74,0.74,0.76,0.78,0.79,0.79,0.79
F1,0.35,0.54,0.51,0.53,0.52,0.55,0.55,0.59,0.65,0.67,0.68,0.67


# UCLA

In [82]:
ucla_df = pd.read_csv("metrics/ucla_metrics_finetune.csv")
ucla_df = rename_models(ucla_df)
ucla_metrics = split_metrics_by_model(ucla_df)

In [74]:
print_comparison_table(ucla_metrics)

RF Augmented vs. RF Original


Chapter,1,2,3,4,5,6,7,8,9,10,11,12
PR_AUC,0.38 (0.43),0.43 (0.45),0.56 (0.5),0.61 (0.63),0.66 (0.66),0.65 (0.65),0.67 (0.64),0.67 (0.73),0.68 (0.75),0.69 (0.78),0.69 (0.77),0.69 (0.77)
F1,0.11 (0.0),0.05 (0.0),0.15 (0.0),0.3 (0.05),0.33 (0.1),0.33 (0.14),0.35 (0.12),0.4 (0.18),0.4 (0.27),0.39 (0.4),0.39 (0.48),0.39 (0.5)
Precision,0.16 (0.0),0.08 (0.0),0.25 (0.0),0.4 (0.08),0.47 (0.19),0.49 (0.24),0.53 (0.22),0.63 (0.44),0.63 (0.54),0.61 (0.68),0.61 (0.73),0.61 (0.74)
Accuracy,0.6 (0.68),0.61 (0.68),0.63 (0.68),0.68 (0.68),0.7 (0.69),0.7 (0.69),0.71 (0.68),0.72 (0.72),0.72 (0.72),0.72 (0.74),0.72 (0.76),0.72 (0.75)
Recall,0.1 (0.0),0.05 (0.0),0.12 (0.0),0.27 (0.03),0.28 (0.07),0.28 (0.1),0.29 (0.08),0.32 (0.12),0.32 (0.19),0.32 (0.31),0.32 (0.4),0.32 (0.43)
ROC_AUC,0.38 (0.52),0.53 (0.55),0.66 (0.62),0.73 (0.73),0.76 (0.75),0.76 (0.74),0.77 (0.74),0.77 (0.8),0.78 (0.83),0.79 (0.85),0.79 (0.84),0.79 (0.84)


Sequence Augmented vs. Sequence Original


Chapter,1,2,3,4,5,6,7,8,9,10,11,12
PR_AUC,0.49 (0.43),0.58 (0.48),0.64 (0.51),0.71 (0.46),0.75 (0.51),0.74 (0.55),0.77 (0.51),0.8 (0.6),0.82 (0.63),0.8 (0.66),0.79 (0.71),0.7 (0.85)
F1,0.05 (0.05),0.14 (0.1),0.19 (0.13),0.25 (0.07),0.31 (0.13),0.31 (0.19),0.34 (0.16),0.37 (0.24),0.43 (0.34),0.51 (0.45),0.52 (0.48),0.43 (0.5)
Precision,0.07 (0.04),0.17 (0.1),0.27 (0.15),0.29 (0.09),0.44 (0.14),0.47 (0.21),0.47 (0.18),0.53 (0.32),0.51 (0.44),0.64 (0.6),0.65 (0.63),0.47 (0.55)
Accuracy,0.67 (0.63),0.68 (0.63),0.68 (0.66),0.71 (0.62),0.72 (0.65),0.72 (0.66),0.74 (0.65),0.74 (0.67),0.74 (0.7),0.74 (0.71),0.74 (0.72),0.7 (0.73)
Recall,0.04 (0.08),0.13 (0.11),0.17 (0.14),0.24 (0.07),0.27 (0.13),0.27 (0.19),0.3 (0.16),0.32 (0.24),0.42 (0.35),0.49 (0.46),0.5 (0.48),0.44 (0.57)
ROC_AUC,0.58 (0.49),0.69 (0.54),0.74 (0.55),0.8 (0.52),0.83 (0.56),0.81 (0.61),0.83 (0.58),0.85 (0.66),0.9 (0.72),0.88 (0.71),0.85 (0.78),0.8 (0.9)


Chapter Ensemble


Chapter,1,2,3,4,5,6,7,8,9,10,11,12
PR_AUC,0.38,0.51,0.6,0.63,0.7,0.7,0.7,0.73,0.78,0.77,0.78,0.77
F1,0.05,0.04,0.12,0.32,0.32,0.36,0.34,0.38,0.39,0.51,0.5,0.5
Precision,0.1,0.06,0.21,0.47,0.51,0.57,0.56,0.66,0.64,0.75,0.72,0.74
Accuracy,0.63,0.63,0.64,0.69,0.71,0.73,0.73,0.73,0.73,0.76,0.75,0.75
Recall,0.03,0.04,0.1,0.27,0.25,0.29,0.26,0.29,0.33,0.43,0.42,0.43
ROC_AUC,0.41,0.63,0.7,0.76,0.79,0.78,0.79,0.81,0.85,0.85,0.85,0.84


# Transfer

In [3]:
transfer_df = pd.read_csv(
    "metrics/transfer_metrics_finetune_with_feature_category_exp.csv"
)
transfer_df = rename_models(transfer_df)
transfer_metrics = split_metrics_by_model(transfer_df)

In [10]:
print_comparison_table(transfer_metrics)

RF Augmented vs. RF Original


Chapter,1,2,3,4,5,6,7,8,9,10,11,12
PR_AUC,0.42 (0.39),0.53 (0.52),0.55 (0.52),0.62 (0.51),0.62 (0.47),0.62 (0.57),0.64 (0.58),0.64 (0.59),0.64 (0.64),0.65 (0.63),0.65 (0.63),0.64 (0.67)
F1,0.28 (0.0),0.32 (0.0),0.44 (0.0),0.48 (0.0),0.47 (0.0),0.44 (0.0),0.47 (0.0),0.47 (0.19),0.47 (0.19),0.48 (0.35),0.48 (0.42),0.48 (0.42)
Precision,0.4 (0.0),0.42 (0.0),0.54 (0.0),0.57 (0.0),0.53 (0.0),0.54 (0.0),0.64 (0.0),0.64 (1.0),0.64 (1.0),0.7 (1.0),0.7 (1.0),0.7 (1.0)
Accuracy,0.65 (0.68),0.65 (0.68),0.7 (0.68),0.72 (0.68),0.7 (0.68),0.7 (0.68),0.73 (0.68),0.73 (0.72),0.73 (0.72),0.75 (0.75),0.75 (0.77),0.75 (0.77)
Recall,0.21 (0.0),0.26 (0.0),0.37 (0.0),0.42 (0.0),0.42 (0.0),0.37 (0.0),0.37 (0.0),0.37 (0.11),0.37 (0.11),0.37 (0.21),0.37 (0.26),0.37 (0.26)
ROC_AUC,0.55 (0.62),0.63 (0.68),0.66 (0.67),0.74 (0.7),0.74 (0.65),0.72 (0.7),0.73 (0.69),0.73 (0.66),0.75 (0.74),0.75 (0.75),0.75 (0.75),0.75 (0.79)


Sequence Augmented vs. Sequence Original


Chapter,1,2,3,4,5,6,7,8,9,10,11,12
PR_AUC,0.55 (0.48),0.58 (0.47),0.66 (0.52),0.72 (0.45),0.71 (0.47),0.74 (0.52),0.75 (0.52),0.75 (0.59),0.75 (0.62),0.75 (0.62),0.75 (0.64),0.75 (0.72)
F1,0.1 (0.41),0.18 (0.45),0.55 (0.45),0.61 (0.41),0.59 (0.23),0.61 (0.31),0.61 (0.37),0.61 (0.44),0.56 (0.38),0.56 (0.4),0.56 (0.5),0.56 (0.52)
Precision,1.0 (0.6),0.67 (0.58),0.8 (0.58),0.71 (0.47),0.67 (0.43),0.71 (0.57),0.71 (0.62),0.71 (0.75),0.69 (0.71),0.69 (0.83),0.69 (0.78),0.69 (0.88)
Accuracy,0.7 (0.72),0.7 (0.72),0.78 (0.72),0.78 (0.67),0.77 (0.67),0.78 (0.7),0.78 (0.72),0.78 (0.75),0.77 (0.73),0.77 (0.75),0.77 (0.77),0.77 (0.78)
Recall,0.05 (0.32),0.11 (0.37),0.42 (0.37),0.53 (0.37),0.53 (0.16),0.53 (0.21),0.53 (0.26),0.53 (0.32),0.47 (0.26),0.47 (0.26),0.47 (0.37),0.47 (0.37)
ROC_AUC,0.61 (0.62),0.7 (0.65),0.77 (0.68),0.82 (0.68),0.81 (0.68),0.83 (0.66),0.83 (0.65),0.83 (0.59),0.84 (0.68),0.85 (0.67),0.85 (0.68),0.86 (0.82)


Chapter Ensemble


Chapter,1,2,3,4,5,6,7,8,9,10,11,12
PR_AUC,0.42,0.5,0.55,0.61,0.62,0.62,0.6,0.63,0.66,0.66,0.67,0.67
F1,0.36,0.36,0.39,0.57,0.48,0.44,0.43,0.48,0.52,0.38,0.42,0.42
Precision,0.43,0.56,0.5,0.62,0.7,0.75,0.67,0.7,0.88,0.71,1.0,1.0
Accuracy,0.65,0.7,0.68,0.75,0.75,0.75,0.73,0.75,0.78,0.73,0.77,0.77
Recall,0.32,0.26,0.32,0.53,0.37,0.32,0.32,0.37,0.37,0.26,0.26,0.26
ROC_AUC,0.58,0.66,0.7,0.74,0.74,0.71,0.7,0.73,0.74,0.76,0.78,0.79




 ------------ Feature Category Experiments ------------
Sequence Augmented


Chapter,1,2,3,4,5,6,7,8,9,10,11,12
PR_AUC,0.55,0.58,0.66,0.72,0.71,0.74,0.75,0.75,0.75,0.75,0.75,0.75
F1,0.1,0.18,0.55,0.61,0.59,0.61,0.61,0.61,0.56,0.56,0.56,0.56
Precision,1.0,0.67,0.8,0.71,0.67,0.71,0.71,0.71,0.69,0.69,0.69,0.69
Accuracy,0.7,0.7,0.78,0.78,0.77,0.78,0.78,0.78,0.77,0.77,0.77,0.77
Recall,0.05,0.11,0.42,0.53,0.53,0.53,0.53,0.53,0.47,0.47,0.47,0.47
ROC_AUC,0.61,0.7,0.77,0.82,0.81,0.83,0.83,0.83,0.84,0.85,0.85,0.86


Reading Related


Chapter,1,2,3,4,5,6,7,8,9,10,11,12
PR_AUC,0.34,0.39,0.45,0.58,0.6,0.6,0.56,0.55,0.57,0.58,0.63,0.67
F1,0.0,0.0,0.0,0.1,0.1,0.19,0.26,0.26,0.26,0.26,0.32,0.46
Precision,0.0,0.0,0.0,1.0,1.0,1.0,0.75,0.75,0.75,0.75,0.67,0.86
Accuracy,0.68,0.68,0.68,0.7,0.7,0.72,0.72,0.72,0.72,0.72,0.72,0.77
Recall,0.0,0.0,0.0,0.05,0.05,0.11,0.16,0.16,0.16,0.16,0.21,0.32
ROC_AUC,0.46,0.49,0.53,0.7,0.71,0.7,0.69,0.68,0.72,0.73,0.76,0.76


Performance Related


Chapter,1,2,3,4,5,6,7,8,9,10,11,12
PR_AUC,0.49,0.49,0.52,0.58,0.61,0.61,0.68,0.69,0.7,0.69,0.65,0.63
F1,0.0,0.17,0.3,0.59,0.55,0.55,0.55,0.5,0.5,0.5,0.5,0.48
Precision,0.0,0.5,0.5,0.67,0.64,0.64,0.64,0.62,0.62,0.62,0.62,0.57
Accuracy,0.68,0.68,0.68,0.77,0.75,0.75,0.75,0.73,0.73,0.73,0.73,0.72
Recall,0.0,0.11,0.21,0.53,0.47,0.47,0.47,0.42,0.42,0.42,0.42,0.42
ROC_AUC,0.62,0.66,0.72,0.75,0.79,0.79,0.81,0.8,0.83,0.83,0.75,0.72
