In [16]:
import os 
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd

PLOT_DIR = os.path.join(os.path.abspath(os.getcwd()), 'plots/') 
sns.set_style('darkgrid')
  
# print(file_name.split('/')[-2] + '/' + file_name.split('/')[-1])
def show_histoplot(file_name, model, preprocess_type):
    path = os.path.join(os.path.abspath(os.getcwd()), file_name) 
    # data = []
    # with open(path, 'r') as f:
    #     accuracy = float(f.readline().strip().split(','))
    #     for line in f:
    #         data.append(float(line.strip()))
            
    # Read the CSV file with a comma as the separator
    df = pd.read_csv(path, sep=',')
    # take first row as correct accuracy and remove it
    best_model = df.iloc[0] 
    df = df.drop(0)
    # print(f'Best model for {model}: {best_model.to_dict()}')
    
    # Iterate over all columns and plot the histogram            
    
    metrics_p = {}
    for column in df.columns:
        # print(column)
        data = df[column]
        # print(data)
        # # Count how many Nan values are in the column
        # nan_count = data.isna().sum()
        # print(f'Column {column} has {nan_count} NaN values')
        # # Remove the NaN values
        # data = data.dropna()
        metric = best_model[column]
        
        plt.figure(figsize=(12, 8))
        sns.set_context("paper", font_scale=2)  # Adjust font scale for better readability
        sns.set_style("whitegrid")       

        sns.histplot(data, binwidth=0.01, kde=True, linewidth=3)
        if column == 'Q2':
            plt.xlim(-0.25, 0.75)
        else:
            plt.xlim(0, 1)
        plt.xlabel(f'{column} score')
        
        # Write the line and the value of the accuracy
        plt.axvline(metric, color='r', linestyle='dashed', linewidth=3)
        plt.text(metric, 0.9 * plt.ylim()[1], f' {metric:.2f}', color='r')
        
        # Save the plot
        name = file_name.split('/')[-2] + '/' + file_name.split('/')[-1]
        path_plot = os.path.join(PLOT_DIR, preprocess_type,  name.split('.')[0] + '/')
        if not os.path.exists(path_plot):
            os.makedirs(path_plot)
            
        # Add labels and title
        plt.xlabel(f'{column} Score',fontsize=26)
        plt.ylabel("Occurrences",fontsize=26)
        plt.rc('xtick',labelsize=24)
        plt.rc('ytick',labelsize=24)
        # plt.grid(False)
        plt.title("p-value", fontsize=26)
        
        # plt.savefig(path_plot + column + '.png')
        plt.savefig(path_plot + column + '.pdf')
        
        plt.title(f'{column} Permutation Test')
        # plt.savefig(path_plot + column + 'titled_.png')
        plt.savefig(path_plot + column + 'titled_.pdf')
        # plt.show()
        plt.close()
        
        # calculate p-value
        p_sum = sum([1 for x in data if x >= metric])
        p = p_sum / len(data)
        metrics_p[column] = p
        
    # Print following format
    # For LATEX table Accuracy,Recall,Precision,F1,ROC,Q2
    model_latex = model.replace("_", "\\_")
    print(f'& {model_latex} & {metrics_p["Accuracy"]:.2f} & {metrics_p["Recall"]:.2f} & {metrics_p["Precision"]:.2f} & {metrics_p["F1"]:.2f} \\\\') 
    

In [17]:
import glob
def run_all_permutation_tests(base_dir, preprocess_type, model_names):
    print(r"\multicolumn{1}{|c|}{\multirow{6}{*}{\rotatebox{90}{\textbf{" + preprocess_type.replace("_", "\\_") + r"}}}}")
        
    for model in model_names:
        folder = os.path.join(base_dir, preprocess_type, model)
        # print(f"Processing model: {model} in folder: {folder}")
        
        # Find the permutation_test CSV file automatically
        files = glob.glob(os.path.join(folder, 'permutation_test_*.csv'))
        if not files:
            print(f"No permutation test file found for {model} in {folder}")
            continue
        # Use the first found file (or loop over all if needed)
        for file_path in files:
            show_histoplot(file_path, model, preprocess_type)
            
    print(r"\hline")

print(r"""
\begin{table}[th!]
   \centering
    \setlength\tabcolsep{5.6pt}
    \renewcommand{\thetable}{\Roman{table}}
    \renewcommand{\arraystretch}{1.3} 
    \begin{tabular}{|m{0.6cm}|c|c|c|c|c|c|c|}
        \hhline{~|-------}
        \multicolumn{1}{c|}{}&
        \cellcolor{Mycolor2}\textcolor{Mycolor3}{Algorithm} &
        \cellcolor{Mycolor2}\textcolor{Mycolor3}{Accuracy} &
        \cellcolor{Mycolor2}\textcolor{Mycolor3}{Recall} &
        \cellcolor{Mycolor2}\textcolor{Mycolor3}{Precision} &
        \cellcolor{Mycolor2}\textcolor{Mycolor3}{F1}  \\
        \hline
""")
    
models = ['PLS', 'SVM', 'CARS', 'BOSS', 'GA-iPLS', 'GA-iPLS_BOSS']
for method in ['10_SG_MSC', '10_SG_SVN', '10_SG1_MSC', '10_SG1_SVN']:
    # print(f"Running permutation tests for {method} models...")
    run_all_permutation_tests('.', method, models)

    print()

print(r"""
\end{tabular}
\caption{Permutation test p-values associated with each model configuration. All values below 0.01 indicate statistically significant performance improvements compared to chance.}
\label{tab:results_p}
\end{table}""")


\begin{table}[th!]
   \centering
    \setlength\tabcolsep{5.6pt}
    \renewcommand{\thetable}{\Roman{table}}
    \renewcommand{\arraystretch}{1.3} 
    \begin{tabular}{|m{0.6cm}|c|c|c|c|c|c|c|}
        \hhline{~|-------}
        \multicolumn{1}{c|}{}&
        \cellcolor{Mycolor2}\textcolor{Mycolor3}{Algorithm} &
        \cellcolor{Mycolor2}\textcolor{Mycolor3}{Accuracy} &
        \cellcolor{Mycolor2}\textcolor{Mycolor3}{Recall} &
        \cellcolor{Mycolor2}\textcolor{Mycolor3}{Precision} &
        \cellcolor{Mycolor2}\textcolor{Mycolor3}{F1}  \\
        \hline

\multicolumn{1}{|c|}{\multirow{6}{*}{\rotatebox{90}{\textbf{10\_SG\_MSC}}}}
& PLS & 0.01 & 0.03 & 0.01 & 0.00 \\
& SVM & 0.01 & 0.03 & 0.02 & 0.01 \\
& CARS & 0.00 & 0.00 & 0.00 & 0.00 \\
& BOSS & 0.00 & 0.00 & 0.00 & 0.00 \\
& GA-iPLS & 0.00 & 0.00 & 0.00 & 0.00 \\
& GA-iPLS\_BOSS & 0.00 & 0.00 & 0.00 & 0.00 \\
\hline

\multicolumn{1}{|c|}{\multirow{6}{*}{\rotatebox{90}{\textbf{10\_SG\_SVN}}}}
& PLS & 0.00 & 0.00 & 0.00 & 0.0