In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import ttest_ind
from IPython.display import display
from itertools import combinations
from collections import Counter
import numpy as np
from statsmodels.stats.multitest import multipletests
import os


In [None]:
#Functions for analysing:


def get_statistics_summary(df, columns_to_analyze, group_by_col="label_col", 
                         metrics=["mean", "std", "max"], sort_by=None, ascending=False):
    # Calculate statistics for all specified columns
    summary = df.groupby(group_by_col).agg({
        col: metrics for col in columns_to_analyze
    })
    
    # Flatten column names
    summary.columns = [f"{col[0]}_{col[1]}" for col in summary.columns]
    
    # Sort if specified
    if sort_by is not None:
        summary = summary.sort_values(sort_by, ascending=ascending)
    
    return summary

def count_partial_combinations(df, min_features=2, max_features=None):
    # Set max_features to the length of the largest combination if not specified
    if max_features is None:
        max_features = max(len(features) for features in df["features"])
    
    # Counter for partial combinations
    combination_counts = Counter()

    # Iterate over all rows in the DataFrame
    for feature_list in df["features"]:
        # Generate all possible subsets (partial combinations)
        for r in range(min_features, max_features + 1):
            for subset in combinations(sorted(feature_list), r):  # Sort to ensure order invariance
                combination_counts[subset] += 1

    # Convert the Counter to a DataFrame for better visualization
    results_df = pd.DataFrame.from_dict(combination_counts, orient="index", columns=["count"])
    results_df.index = results_df.index.map(lambda x: "+".join(x))  # Convert tuples to strings
    results_df = results_df.sort_values("count", ascending=False)
    
    return results_df
def pretraining_featureextraction_analysis(df):
    #Pretraining Analyses
    pretrained_summary = df.groupby("Pretraining_applied")["Balanced_ACC"].mean()
    display(pretrained_summary)

    pretrained_data = df[df["Pretraining_applied"]==True]["Balanced_ACC"]
    non_pretrained_data = df[df["Pretraining_applied"]==False]["Balanced_ACC"]
    t_stat, p_val = ttest_ind(pretrained_data, non_pretrained_data)
    print("T-test Pretrained vs Non-pretrained:", t_stat, p_val)

    #Feature extraction analyses
    feature_extraction_summary = df.groupby("Feature_extraction_applied")["Balanced_ACC"].mean()
    display(feature_extraction_summary)

    feature_extraction_data = df[df["Feature_extraction_applied"]==True]["Balanced_ACC"]
    non_feature_extraction_data = df[df["Feature_extraction_applied"]==False]["Balanced_ACC"]
    t_stat, p_val = ttest_ind(feature_extraction_data, non_feature_extraction_data)
    print("T-test Feature extraction vs Non-feature extraction:", t_stat, p_val)

def feature_statistical_testing_easy(df, metric_column="mean"):
    # Get a unique list of features
    all_features = set(f for feature_list in df["features"] for f in feature_list)
    for feature in all_features:
        df[feature] = df["features"].apply(lambda x: 1 if feature in x else 0)
    
    # Perform statistical testing
    results = {}
    for feature in all_features:
        group_with_feature = df[df[feature] == 1][metric_column]
        group_without_feature = df[df[feature] == 0][metric_column]
        t_stat, p_value = ttest_ind(group_with_feature, group_without_feature, equal_var=False)
        results[feature] = {"t-statistic": t_stat, "p-value": p_value}
    
    # Convert results to a DataFrame for better visualization
    results_df = pd.DataFrame(results).T
    results_df = results_df.sort_values("p-value")
    display(results_df)
    return results_df

def feature_statistical_testing(df, metric_column="mean", alpha=0.05, correction_method='fdr_bh'):
    """
    Perform statistical testing for feature significance using independent t-tests
    with multiple testing correction.
    
    Parameters:
    -----------
    df : pandas.DataFrame
        DataFrame containing a 'features' column with lists of features and a metric column
    metric_column : str, default="mean"
        Name of the column containing the metric to test
    alpha : float, default=0.05
        Significance level for hypothesis testing
    correction_method : str, default='fdr_bh'
        Multiple testing correction method ('fdr_bh', 'bonferroni', etc.)
        
    Returns:
    --------
    pandas.DataFrame
        Results containing t-statistics, p-values, and adjusted p-values
    """
    # Input validation
    if "features" not in df.columns:
        raise ValueError("DataFrame must contain a 'features' column")
    if metric_column not in df.columns:
        raise ValueError(f"DataFrame must contain the metric column: {metric_column}")
    
    # Get unique features
    all_features = set(f for feature_list in df["features"] for f in feature_list)
    
    # Create binary columns for each feature
    feature_columns = {}
    for feature in all_features:
        feature_columns[feature] = df["features"].apply(lambda x: 1 if feature in x else 0)
    
    # Add binary columns to DataFrame
    feature_df = pd.DataFrame(feature_columns)
    df = pd.concat([df, feature_df], axis=1)
    
    # Perform statistical testing
    results = {}
    p_values = []
    
    for feature in all_features:
        # Get groups
        group_with_feature = df[df[feature] == 1][metric_column]
        group_without_feature = df[df[feature] == 0][metric_column]
        
        # Skip if either group is empty
        if len(group_with_feature) == 0 or len(group_without_feature) == 0:
            results[feature] = {
                "t-statistic": np.nan,
                "p-value": np.nan,
                "mean_with_feature": np.nan,
                "mean_without_feature": np.nan,
                "sample_size_with": len(group_with_feature),
                "sample_size_without": len(group_without_feature)
            }
            p_values.append(np.nan)
            continue
            
        # Calculate t-test
    try:
        t_stat, p_value = ttest_ind(
            group_with_feature,
            group_without_feature,
            equal_var=False,  # Using Welch's t-test
            nan_policy='omit'
        )
    except Exception as e:
        print(f"Error with feature: {feature}")
        # Store results
        results[feature] = {
            "t-statistic": t_stat,
            "p-value": p_value,
            "mean_with_feature": group_with_feature.mean(),
            "mean_without_feature": group_without_feature.mean(),
            "sample_size_with": len(group_with_feature),
            "sample_size_without": len(group_without_feature)
        }
        p_values.append(p_value)
    
    # Convert results to DataFrame
    results_df = pd.DataFrame(results).T
    
    # Apply multiple testing correction
    valid_p_values = ~np.isnan(p_values)
    if sum(valid_p_values) > 0:
        _, adjusted_p_values, _, _ = multipletests(
            p_values[valid_p_values],
            alpha=alpha,
            method=correction_method
        )
        
        # Add adjusted p-values back to results
        results_df['adjusted_p_value'] = np.nan
        results_df.loc[valid_p_values, 'adjusted_p_value'] = adjusted_p_values
    
    # Sort by adjusted p-value, then regular p-value
    results_df = results_df.sort_values(
        ['adjusted_p_value', 'p-value'],
        na_position='last'
    )
    
    return results_df
    

def full_analyses(df, show_whole_df_at_start=False, feature_extraction_analyses=True, tag=""):
    print("#"*10)
    print("Full analyses")
    print("#"*10)
    print("\n Sorted by best model\n")
    if show_whole_df_at_start:
        with pd.option_context('display.max_rows', None, 'display.max_columns', None):
            display(df.sort_values("Balanced_ACC", ascending=False))
    else:
        display(df.sort_values("Balanced_ACC", ascending=False))

    print("\nValue counts for label column\n")
    display(df["label_col"].value_counts())

    print("\nMean and std for Balanced_ACC\n")
    overall_mean = df["Balanced_ACC"].mean()
    overall_std = df["Balanced_ACC"].std()
    overall_permutated_mean = df["Permutation_Balanced_ACC"].mean()
    overall_permutated_std = df["Permutation_Balanced_ACC"].std()

    display(f"Overall mean: {overall_mean}, std: {overall_std}")
    display(f"Overall permuted mean: {overall_permutated_mean}, std: {overall_permutated_std}")
    display(f"Mean difference: {overall_mean-overall_permutated_mean}, std difference: {overall_std-overall_permutated_std}")
    df["Balanced_ACC"].hist()
    plt.title("Balanced Accuracy Distribution")
    plt.show()
    df["Permutation_Balanced_ACC"].hist()
    plt.title("Permutation Balanced Accuracy Distribution")
    plt.show()

    summary = get_statistics_summary(df, ["Balanced_ACC", "Permutation_Balanced_ACC"], sort_by="Balanced_ACC_mean", ascending=False)
    summary.plot(kind="bar", y="Balanced_ACC_mean", yerr="Balanced_ACC_std", title="Balanced Accuracy by Label")
    plt.title("Balanced Accuracy by Label")
    plt.show()
    print(f"Sorted by mean {tag}")
    display(summary)
    print("Sorted by max")
    display(summary.sort_values("Balanced_ACC_max", ascending=False))

    
    print("\nMean and std for Balanced_ACC by mri_table\n")
    print("NOT FILTERED FOR FEATURE EXTRACTION")
    summary = get_statistics_summary(df, ["Balanced_ACC", "Permutation_Balanced_ACC"], group_by_col="mri_table", sort_by="Balanced_ACC_mean", ascending=False)
    summary.plot(kind="bar", y="Balanced_ACC_mean", yerr="Balanced_ACC_std", title="Balanced Accuracy by MRI Table")
    plt.show()
    print(f"Sorted by mean {tag}")
    display(summary)
    print("Sorted by max")
    display(summary.sort_values("Balanced_ACC_max", ascending=False))

    summary["features"] = summary.index.str.split('_')
    top_n = 10
    top_features = summary.nlargest(top_n, 'Balanced_ACC_mean')["features"].explode().value_counts()
    print("Feature Importance by Count:")
    display(top_features)
    if feature_extraction_analyses:
        print("FILTERED FOR FEATURE EXTRACTION - NO FEATURE EXTRACTION APPLIED")
        df_filterd_for_no_feature_extraction = df[df["Feature_extraction_applied"]==False]
        mri_table_summary_filtered = get_statistics_summary(df_filterd_for_no_feature_extraction, ["Balanced_ACC", "Permutation_Balanced_ACC"], group_by_col="mri_table", sort_by="Balanced_ACC_mean", ascending=False)
        mri_table_summary_filtered.plot(kind="bar", y="Balanced_ACC_mean", yerr="Balanced_ACC_std", title="Balanced Accuracy by MRI Table - No FE")
        plt.show()
        print(f"Sorted by mean {tag}")
        display(mri_table_summary_filtered)
        print("Sorted by max")
        display(mri_table_summary_filtered.sort_values("Balanced_ACC_max", ascending=False))

        mri_table_summary_filtered["features"] = mri_table_summary_filtered.index.str.split('_')
        top_n = 10
        top_features = mri_table_summary_filtered.nlargest(top_n, 'Balanced_ACC_mean')["features"].explode().value_counts()
        print("Feature Importance by Count:")
        display(top_features)
        #feature_statistical_testing(mri_table_summary_filtered)
        print("Combination counts for features")
        combination_counts = count_partial_combinations(mri_table_summary_filtered.nlargest(top_n, 'Balanced_ACC_mean')[["Balanced_ACC_mean", "features"]])
        display(combination_counts)

        print("FILTERED FOR FEATURE EXTRACTION - FEATURE EXTRACTION APPLIED")
        df_filterd_for_feature_extraction = df[df["Feature_extraction_applied"]==True]
        mri_table_summary_filtered = get_statistics_summary(df_filterd_for_feature_extraction, ["Balanced_ACC", "Permutation_Balanced_ACC"], group_by_col="mri_table", sort_by="Balanced_ACC_mean", ascending=False)
        mri_table_summary_filtered.plot(kind="bar", y="Balanced_ACC_mean", yerr="Balanced_ACC_std", title="Balanced Accuracy by MRI Table - FE")
        mri_table_summary_filtered["features"] = mri_table_summary_filtered.index.str.split('_')
        top_n = 10
        top_features = mri_table_summary_filtered.nlargest(top_n, 'Balanced_ACC_mean')["features"].explode().value_counts()
        print("Feature Importance by Count:")
        display(top_features)
        #feature_statistical_testing(mri_table_summary_filtered)
    print("\nMean and std for Balanced_ACC by model type\n")
    summary = get_statistics_summary(df, ["Balanced_ACC", "Permutation_Balanced_ACC"], group_by_col="model_type", sort_by="Balanced_ACC_mean", ascending=False)
    summary.plot(kind="bar", y="Balanced_ACC_mean", yerr="Balanced_ACC_std", title="Balanced Accuracy by Model Type")
    plt.show()
    print(f"Sorted by mean {tag}")
    display(summary)
    print("Sorted by max")
    display(summary.sort_values("Balanced_ACC_max", ascending=False))

    if feature_extraction_analyses:
        print("\nPretraing and Feature Extraction analyses")
        pretraining_featureextraction_analysis(df)
        
        print("\nPretraining and Finetuning analyses for DL Models\n")
        df_deeplearning = df[~df["model_type"].isin(["Logistic Regression", "Random Forest"])]
        pretraining_featureextraction_analysis(df_deeplearning)

In [None]:
#df = pd.read_csv('/home/esralenz/Dokumente/20_HITKIP/03_UKB/00_Git_Code/UK-B-CLIP/08_analyse_results/2025_01_12_14_08_43.csv')
#df_raw = pd.read_csv('training_log_raw.csv')

In [None]:
#df_raw.sort_values("Balanced_ACC", ascending=False).head(20)
#if the number of pos < 350 drop the row
#df_raw_filterd = df_raw[df_raw["number_of_pos"]>350]
#df_raw_filterd.sort_values("Balanced_ACC", ascending=False).head(50)


In [None]:
#concatenate all df in the df csvs_deconfounded chekc if teh end on .csv
path = "00_results/tabpfn_age/"
csvs_deconfounded = [path + f for f in os.listdir(path) if f.endswith('.csv')]
df = pd.concat([pd.read_csv(file) for file in csvs_deconfounded])

In [None]:
#df = pd.read_csv("/home/esralenz/Dokumente/20_HITKIP/03_UKB/00_Git_Code/UK-B-CLIP/08_analyse_results/CNN_schaefer/CNN_Sch√§fer21_01.csv")

In [None]:
df["mri_table"] = (
    df["mri_table"]
    .str.replace("FC_100", "FC.100", regex=False)
    .str.replace("FC_25", "FC.25", regex=False)
    .str.replace("grey_matter", "grey.matter", regex=False)
    .str.replace(".csv", "", regex=False)
)

In [None]:
df["mri_table"].unique()

In [None]:
#concatenate the two dataframes
#df = pd.concat([df, df_raw_filterd])

In [None]:
df["model_type"].unique()

In [None]:
df["label_col"].unique()

In [None]:
df

In [None]:
mean_metrics_with_others_df_1 = df.groupby(
    ['search_term'], as_index=False
).agg({
    'Accuracy': 'mean',
    'AUC': 'mean',
    'Balanced_ACC': 'mean',
    'Permutation_Balanced_ACC': 'mean',
    'cross_validation_count': 'mean',
    # Keeping the first instance of other columns
    'label_col': 'first',
    'mri_table': 'first',
    'test_set_size': 'first',
    'Pretraining_applied': 'first',
})

In [None]:
mean_metrics_with_others_df_1.sort_values("Balanced_ACC", ascending=False)

In [None]:
mean_metrics_with_others_df = df.groupby(
    ['search_term', 'model_type'], as_index=False
).agg({
    'Accuracy': 'mean',
    'AUC': 'mean',
    'Balanced_ACC': 'mean',
    'Permutation_Balanced_ACC': 'mean',
    'cross_validation_count': 'mean',
    # Keeping the first instance of other columns
    'label_col': 'first',
    'mri_table': 'first',
    'test_set_size': 'first',
    'Pretraining_applied': 'first',
})
#make the label col fisrt column
target_col = 'label_col'
columns = [target_col] + [col for col in mean_metrics_with_others_df if col != target_col]
mean_metrics_with_others_df = mean_metrics_with_others_df[columns]
df = mean_metrics_with_others_df_1

In [None]:
print(df["label_col"].value_counts())

In [None]:
#get all labels that have sex_column in there
df[df["label_col"].str.contains("sex_balanced")]["label_col"].unique()

In [None]:
df.groupby("mri_table")["Balanced_ACC"].agg(["mean", "max"]).sort_values("mean", ascending=False)


In [None]:
#Do the full analyses for the different tags
full_analyses(df)

In [None]:
for tag in df["tag"].unique():
    print("-"*30)
    print(f"\nAnalyses for tag: {tag}")
    df_tag = df[df["tag"] == tag]
    full_analyses(df_tag, show_whole_df_at_start=False, tag=tag)
    print("-"*30)

In [None]:
#just for sex_balanced rows

#df_sexbalanced = df[df["label_col"].str.contains("sex_balanced")]

In [None]:
#drop all the row where in mri_table there is a "Fc" 
#df = df[~df["mri_table"].str.contains("FC")]
#full_analyses(df)

In [None]:
for tag in df["tag"].unique():
    print("-"*30)
    print(f"\nAnalyses for tag: {tag}")
    df_tag = df[df["tag"] == tag]
    full_analyses(df_tag)
    print("-"*30)