In [None]:
import os
import pandas as pd
from scipy.stats import friedmanchisquare
import scikit_posthocs as sp
import numpy as np

In [None]:
# %%
import os
import pandas as pd
import scikit_posthocs as sp
from scipy.stats import friedmanchisquare

# %%
# Load all CSVs into a single DataFrame
def load_data(data_path):
    dataframes = []
    for filename in os.listdir(data_path):
        if filename.endswith('.csv'):
            file_path = os.path.join(data_path, filename)
            df = pd.read_csv(file_path)
            dataframes.append(df)
    return pd.concat(dataframes, ignore_index=True)

# Apply highlighting for the maximum values in each row
def highlight_max(s):
    is_max = s == s.max()
    return ['background-color: red' if v else '' for v in is_max]

# Perform the Friedman test and follow up with Nemenyi post-hoc test if significant
def perform_friedman_and_nemenyi(merged_df, methods, metric):
    results = []
    all_ranks = {method: [] for method in methods}  # Dictionary to store ranks across all setups
    
    for classifier in merged_df["Classifier"].unique():
        for dataset in merged_df["Dataset"].unique():
            blocks = merged_df['Seed'].unique()
            data_list = []
            
            for method in methods:
                method_data = []
                for block in blocks:
                    val = merged_df[
                        (merged_df["Classifier"] == classifier) & 
                        (merged_df["Dataset"] == dataset) & 
                        (merged_df["Oversampling"] == method) & 
                        (merged_df['Seed'] == block)
                    ][metric].values
                    if len(val) > 0:
                        method_data.append(val[0])
                    else:
                        method_data.append(np.nan)
                data_list.append(method_data)
            
            # Convert to numpy array and remove rows with NaN
            data_array = np.array(data_list).T
            data_array = data_array[~np.isnan(data_array).any(axis=1)]
            
            if data_array.shape[0] > 0:
                # Perform the Friedman test
                stat, p_value = friedmanchisquare(*data_array.T)
                
                # Calculate ranks for each method in this specific classifier-dataset setup
                ranks = np.argsort(np.argsort(-data_array, axis=1), axis=1) + 1
                for i, method in enumerate(methods):
                    all_ranks[method].extend(ranks[:, i])  # Accumulate ranks across all setups
                
                # If the Friedman test is significant, perform Nemenyi post hoc test
                if p_value < 0.05:
                    data = pd.DataFrame(data_array, columns=methods)
                    nemenyi_results = sp.posthoc_nemenyi_friedman(data)
                    
                    # Use median to find the best method
                    median_scores = data.median()
                    best_method = median_scores.idxmax()
                    
                    # Check if best method is significantly better than all others
                    is_best_method_significant = all(
                        nemenyi_results.loc[best_method, method] < 0.05
                        for method in methods if method != best_method
                    )
                    
                    results.append({
                        "Classifier": classifier,
                        "Dataset": dataset,
                        "Best Method": best_method if is_best_method_significant else None,
                        "p-value": p_value
                    })
                else:
                    results.append({
                        "Classifier": classifier,
                        "Dataset": dataset,
                        "Best Method": None,
                        "p-value": p_value
                    })
    
    # Compute median rank across all ranks for each method
    median_rank_df = pd.DataFrame({
        "Method": methods,
        "Median Rank": [np.mean(all_ranks[method]) for method in methods]
    })
    
    # Convert results to DataFrame
    result_df = pd.DataFrame(results)
    
    return result_df, median_rank_df


# Create a pivot table for the F1 Macro scores and apply statistical significance
def create_styled_pivot(merged_df, friedman_results_df, methods, metric):
    f1_macro_pivot = merged_df.groupby(["Classifier", "Dataset", "Oversampling"]).mean()[metric].reset_index()
    f1_macro_pivot = f1_macro_pivot.pivot_table(index=["Classifier", "Dataset"], columns="Oversampling", values=metric)
    f1_macro_pivot = f1_macro_pivot[methods]
    
    # Create a mask to highlight the maximum values in each row
    max_highlight_mask = f1_macro_pivot.apply(lambda row: row == row.max(), axis=1)
    styled_f1_macro_pivot = f1_macro_pivot.copy()
    
    # Mark statistically significant best method based on Friedman and Nemenyi results
    for _, row in friedman_results_df.iterrows():
        classifier, dataset, best_method = row["Classifier"], row["Dataset"], row["Best Method"]
        if best_method:
            styled_f1_macro_pivot.loc[(classifier, dataset), best_method] = f"{f1_macro_pivot.loc[(classifier, dataset), best_method]:.5f}*"
    
    # Apply the highlight function to the DataFrame
    return styled_f1_macro_pivot.style.apply(lambda s: ['background-color: red' if is_max else '' for is_max in max_highlight_mask.loc[s.name]], axis=1)

In [None]:
# %%
# Main execution
if __name__ == "__main__":
    data_path = 'datasets'
    # methods_combined = ["none", "ml_smote", "mmo_smote"]
    methods_combined = ["none", "ml_ros", "mmo", "ml_smote", "mmo_smote"]
    metric = "F1 Macro"
    
    # Load and prepare data
    merged_df = load_data(data_path)

    # Perform Friedman and Nemenyi post-hoc tests for combined methods
    friedman_results_combined, friedman_results_rank = perform_friedman_and_nemenyi(merged_df, methods_combined, metric)

    # Create styled pivot table for combined methods
    styled_combined_pivot = create_styled_pivot(merged_df, friedman_results_combined, methods_combined, metric)
    display(styled_combined_pivot)

In [None]:
friedman_results_rank

In [None]:
# %%
f1_macro_pivot = merged_df.groupby(["Classifier", "Dataset", "Oversampling"]).mean()["Train_Set_Increase"].reset_index()
f1_macro_pivot = f1_macro_pivot.pivot_table(index=["Classifier", "Dataset"], columns="Oversampling", values="Train_Set_Increase")
f1_macro_pivot
