In [None]:
import os
import pandas as pd
from scipy.stats import wilcoxon

In [None]:
# Load all CSVs into a single DataFrame
def load_data(data_path):
    dataframes = []
    for filename in os.listdir(data_path):
        if filename.endswith('.csv'):
            file_path = os.path.join(data_path, filename)
            df = pd.read_csv(file_path)
            dataframes.append(df)
    return pd.concat(dataframes, ignore_index=True)

# Apply highlighting for the maximum values in each row
def highlight_max(s):
    is_max = s == s.max()
    return ['background-color: red' if v else '' for v in is_max]

# Perform the Wilcoxon test for paired comparisons between methods
def perform_wilcoxon_tests(merged_df, comparisons, metric):
    wilcoxon_results = []
    for classifier in merged_df["Classifier"].unique():
        for dataset in merged_df["Dataset"].unique():
            for method_1, method_2 in comparisons:
                data_1 = merged_df[(merged_df["Classifier"] == classifier) & 
                                   (merged_df["Dataset"] == dataset) & 
                                   (merged_df["Oversampling"] == method_1)][metric].values
                data_2 = merged_df[(merged_df["Classifier"] == classifier) & 
                                   (merged_df["Dataset"] == dataset) & 
                                   (merged_df["Oversampling"] == method_2)][metric].values

                if len(data_1) > 0 and len(data_1) == len(data_2):
                    stat, p_value = wilcoxon(data_1, data_2)
                    wilcoxon_results.append({
                        "Classifier": classifier,
                        "Dataset": dataset,
                        "Comparison": f"{method_1} vs {method_2}",
                        "Statistic": stat,
                        "p-value": p_value
                    })
    return pd.DataFrame(wilcoxon_results)

# Create a pivot table for the F1 Macro scores and apply statistical significance
def create_styled_pivot(merged_df, wilcoxon_results_df, methods, metric):
    f1_macro_pivot = merged_df.groupby(["Classifier", "Dataset", "Oversampling"]).mean()[metric].reset_index()
    f1_macro_pivot = f1_macro_pivot.pivot_table(index=["Classifier", "Dataset"], columns="Oversampling", values=metric)
    f1_macro_pivot = f1_macro_pivot[methods]
    
    # Create a mask to highlight the maximum values in each row
    max_highlight_mask = f1_macro_pivot.apply(lambda row: row == row.max(), axis=1)
    styled_f1_macro_pivot = f1_macro_pivot.copy()
    
    # Mark statistically significant results on the highest F1-macro score
    for _, row in wilcoxon_results_df.iterrows():
        classifier, dataset, comparison, p_value = row["Classifier"], row["Dataset"], row["Comparison"], row["p-value"]
        if p_value < 0.05:
            method_1, method_2 = comparison.split(" vs ")
            max_score = styled_f1_macro_pivot.loc[(classifier, dataset)].max()
            if f1_macro_pivot.loc[(classifier, dataset), method_1] == max_score:
                styled_f1_macro_pivot.loc[(classifier, dataset), method_1] = f"{f1_macro_pivot.loc[(classifier, dataset), method_1]:.5f}*"
            elif f1_macro_pivot.loc[(classifier, dataset), method_2] == max_score:
                styled_f1_macro_pivot.loc[(classifier, dataset), method_2] = f"{f1_macro_pivot.loc[(classifier, dataset), method_2]:.5f}*"
    
    # Apply the highlight function to the DataFrame
    return styled_f1_macro_pivot.style.apply(lambda s: ['background-color: red' if is_max else '' for is_max in max_highlight_mask.loc[s.name]], axis=1)


In [None]:
# Main execution
if __name__ == "__main__":
    data_path = 'datasets'
    methods_smote = ["none", "ml_smote", "mmo_smote"]
    methods_ros = ["none", "ml_ros", "mmo"]
    metric = "F1 Macro"
    # Load and prepare data
    merged_df = load_data(data_path)

    # Perform Wilcoxon tests for SMOTE comparisons
    comparisons_smote = [("ml_smote", "mmo_smote")]
    wilcoxon_results_smote = perform_wilcoxon_tests(merged_df, comparisons_smote, metric)

    # Create styled pivot table for SMOTE comparisons
    styled_smote_pivot = create_styled_pivot(merged_df, wilcoxon_results_smote, methods_smote, metric)
    display(styled_smote_pivot)

    # Perform Wilcoxon tests for ROS comparisons
    comparisons_ros = [("ml_ros", "mmo")]
    wilcoxon_results_ros = perform_wilcoxon_tests(merged_df, comparisons_ros, metric)

    # Create styled pivot table for ROS comparisons
    styled_ros_pivot = create_styled_pivot(merged_df, wilcoxon_results_ros, methods_ros, metric)
    display(styled_ros_pivot)

In [None]:
f1_macro_pivot = merged_df.groupby(["Classifier", "Dataset", "Oversampling"]).mean()["Train_Set_Increase"].reset_index()
f1_macro_pivot = f1_macro_pivot.pivot_table(index=["Classifier", "Dataset"], columns="Oversampling", values="Train_Set_Increase")
f1_macro_pivot