# Read results

## Librairies

In [None]:
import os
import pickle
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

## Parameters 

In [None]:
cwd = os.getcwd()

In [None]:
path = "../results/Synthetic/"

In [None]:
# === Choose p for which results are to be computed ===
p_values = [1000] # 2000, 3000, 4000
n__values =  [10]

# percent_relevent_values = [4.6] # 4.25
percent_relevent_values = [n__values[0] / np.log(p_values[0]) * 3.5]

percent_values = [1.25, 2.5, 3.75, 5.0]#, 6.25, 7.5]

## Get scores 

In [None]:
results_d = {}

# level 1
for p in p_values:

    results_folder = os.path.join(path, f"synthetic_data_{p}")
    

    ns = [int(p//n_) for n_ in n__values]
    nzs = [int(p*percent//100) for percent in percent_values]
    nrs = [int(p*percent//100) for percent in percent_relevent_values]

    fts_modes = ["full", "random", "k-best", "lasso", "pk-lpnn"]#XXXXXX
    models = ["knn", "lr", "nb-gaussian"] #XXXXXX
        
    results_d[p] = {}
    
    # level 2
    for n in ns:
        
        results_d[p][n] = {}
        nr = nrs[0]  # XXX
        
        # level 3
        for nz in nzs:

            results_d[p][n][nz] = {}
            
            for fts_mode in fts_modes:

                results_d[p][n][nz][fts_mode] = {}

                for model in models:

                    with open( os.path.join(results_folder, f"{n}", f"{nz}", f"{fts_mode}_{nz}_{model}.pkl"), "rb" ) as fh: #XXXXX

                        results = pickle.load(fh)
                        f1 = np.mean(results["f1"]).round(3)
                        b_acc = np.mean(results["b_acc"]).round(3)
                        # print("f1: ", f1, "b_acc: ", b_acc)

                        results_d[p][n][nz][fts_mode][model] = (f1, b_acc)

In [None]:
# results_d

## Scores tables

In [None]:
results_df = {}

for p in p_values:
    
    print("****************")
    print(f"*** p = {p} ***")
    print("****************\n")
    
    ns = [int(p//n_) for n_ in n__values]
    nzs = [int(p*percent//100) for percent in percent_values]
    nrs = [int(p*percent//100) for percent in percent_relevent_values]
    
    results_df[p] = {}
    
    for n in ns:
        
        results_df[p][n] = {}
        nr = nrs[0]  # XXX

        for nz in nzs:

            results_df[p][n][nz] = pd.DataFrame.from_dict(results_d[p][n][nz], orient='index')
            print(f"*** n = {n}, nb_relevant = {nr} ({n}_{nr}), N_z = {nz} ***")
            display(results_df[p][n][nz])
            print(results_df[p][n][nz].to_latex(bold_rows=True, 
                                                column_format="l | ccc", 
                                               )
                 ) # latex code
            print("\n")

In [None]:
# # === IMPORTANT: RUN THIS CELL ONLY ONCE TO CREATE THE DICT ====
# pklpnn_results_d = {}

In [None]:
for p in p_values:
    pklpnn_results_d[p] = {}
    for n in results_d[p].keys():
        for nz in results_d[p][n].keys():
            pklpnn_results_d[p][nz] = results_d[p][n][nz]['pk-lpnn']

In [None]:
pklpnn_results_d

In [None]:
df_tex_l = []

for p in p_values:
    
    dico = pklpnn_results_d[p]
            
    df = pd.DataFrame.from_dict(dico, orient="columns")
    df_tex = df.to_latex(bold_rows=True, column_format="l | cccc", header=[r"1.25\%", r"2.5\%", r"3.75\%", r"5\%"])
    df_tex_l.append(df_tex)

In [None]:
for t in df_tex_l:
    print(t)

## Get progressions

### Percentage Nz

In [None]:
# Dictionnaire pour stocker les pourcentages de progression
progression_percentages = {}

# Boucle sur les valeurs de `p`
for p in p_values:
    progression_percentages[p] = {}

    # Boucle sur les `n`
    for n in results_d[p]:
        progression_percentages[p][n] = {}

        # Boucle sur les `nz`
        for nz in results_d[p][n]:
            progression_percentages[p][n][nz] = {}

            # Récupérer les résultats pour les modèles
            for model in models:
                # Vérifiez si pk-lpnn, lasso, et k-best existent
                if "pk-lpnn" in results_d[p][n][nz] and model in results_d[p][n][nz]["pk-lpnn"]:
                    pk_lpnn_f1, pk_lpnn_b_acc = results_d[p][n][nz]["pk-lpnn"][model]

                    for mode in ["lasso", "k-best"]:
                        if mode in results_d[p][n][nz] and model in results_d[p][n][nz][mode]:
                            mode_f1, mode_b_acc = results_d[p][n][nz][mode][model]

                            # Calcul des pourcentages de progression
                            f1_progression = ((pk_lpnn_f1 - mode_f1) / mode_f1) * 100
                            b_acc_progression = ((pk_lpnn_b_acc - mode_b_acc) / mode_b_acc) * 100

                            # Stocker les résultats dans le dictionnaire
                            progression_percentages[p][n][nz].setdefault(model, {})
                            progression_percentages[p][n][nz][model][mode] = {
                                "f1_progression": round(f1_progression, 2),
                                "b_acc_progression": round(b_acc_progression, 2),
                            }

# Résumé dans un tableau DataFrame
summary_tables = {}

for p in progression_percentages:
    for n in progression_percentages[p]:
        for nz in progression_percentages[p][n]:
            table_data = []
            for model in progression_percentages[p][n][nz]:
                for mode, metrics in progression_percentages[p][n][nz][model].items():
                    table_data.append([model, mode, metrics["f1_progression"], metrics["b_acc_progression"]])

            # Créer un DataFrame pour chaque combinaison de p, n, et nz
            summary_tables[(p, n, nz)] = pd.DataFrame(
                table_data,
                columns=["Model", "Mode", "F1 Progression (%)", "Balanced Accuracy Progression (%)"],
            )

# Affichage des tables
for key, table in summary_tables.items():
    p, n, nz = key
    print(f"Table for p = {p}, n = {n}, nz = {nz}:\n")
    display(table)
    print("\n")


### Percentage Global

In [None]:
# Initialisation des listes pour les résultats
lasso_data = []
kbest_data = []

# Collecte des données pour k-best et lasso
for p in progression_percentages:
    for n in progression_percentages[p]:
        for nz, metrics in progression_percentages[p][n].items():
            # Calculer le pourcentage de Nz
            nz_percentage = round(((nz / p) * 100),4)
            
            # Collecte des données pour chaque méthode
            for model in models:
                # Pour lasso
                if "lasso" in metrics[model]:
                    lasso_data.append({
                        "Nz Percentage": nz_percentage,
                        "Model": model,
                        "F1 Progression": metrics[model]["lasso"].get("f1_progression", 0),
                        "Balanced Accuracy Progression": metrics[model]["lasso"].get("b_acc_progression", 0)
                    })
                else:
                    lasso_data.append({
                        "Nz Percentage": nz_percentage,
                        "Model": model,
                        "F1 Progression": 0,
                        "Balanced Accuracy Progression": 0
                    })
                
                # Pour k-best
                if "k-best" in metrics[model]:
                    kbest_data.append({
                        "Nz Percentage": nz_percentage,
                        "Model": model,
                        "F1 Progression": metrics[model]["k-best"].get("f1_progression", 0),
                        "Balanced Accuracy Progression": metrics[model]["k-best"].get("b_acc_progression", 0)
                    })
                else:
                    kbest_data.append({
                        "Nz Percentage": nz_percentage,
                        "Model": model,
                        "F1 Progression": 0,
                        "Balanced Accuracy Progression": 0
                    })

# Création des DataFrames pour lasso et k-best
lasso_df = pd.DataFrame(lasso_data)
kbest_df = pd.DataFrame(kbest_data)

# Affichage des DataFrames pour chaque méthode
print("Lasso Results DataFrame:")
display(lasso_df)

print("K-Best Results DataFrame:")
display(kbest_df)


### Save percentages

In [None]:
def save_dataframe_as_image(df, file_name):
    fig, ax = plt.subplots(figsize=(10, len(df) * 0.5 + 1))  # Ajuster la taille en fonction des données
    ax.axis("tight")
    ax.axis("off")
    table = ax.table(
        cellText=df.values,
        colLabels=df.columns,
        cellLoc="center",
        loc="center"
    )
    table.auto_set_font_size(False)
    table.set_fontsize(10)
    table.auto_set_column_width(col=list(range(len(df.columns))))
    
    plt.savefig(file_name, bbox_inches="tight", dpi=300)
    plt.close()

# Sauvegarde des DataFrames sous forme d'images
save_dataframe_as_image(lasso_df, f"lasso_results_{p}.png")
save_dataframe_as_image(kbest_df, f"kbest_results_{p}.png")

print("Images sauvegardées sous 'lasso_results.png' et 'kbest_results.png'.")

## Progession plots

In [None]:
# Initialisation des données pour les courbes
kbest_f1_progression = {model: [] for model in models}
lasso_f1_progression = {model: [] for model in models}
kbest_b_acc_progression = {model: [] for model in models}
lasso_b_acc_progression = {model: [] for model in models}
nz_percentages = []  # Stockage des pourcentages de Nz

# Extraction des données
for p in progression_percentages:
    for n in progression_percentages[p]:
        for nz, metrics in progression_percentages[p][n].items():
            # Calculer le pourcentage de Nz
            nz_percentage = (nz / p) * 100
            if nz_percentage not in nz_percentages:
                nz_percentages.append(nz_percentage)

            # Progression pour chaque modèle dans les modes k-best et lasso
            for model in models:
                if model in metrics:
                    if "k-best" in metrics[model]:
                        kbest_f1_progression[model].append(metrics[model]["k-best"].get("f1_progression", 0))
                        kbest_b_acc_progression[model].append(metrics[model]["k-best"].get("b_acc_progression", 0))
                    else:
                        kbest_f1_progression[model].append(0)
                        kbest_b_acc_progression[model].append(0)
                    
                    if "lasso" in metrics[model]:
                        lasso_f1_progression[model].append(metrics[model]["lasso"].get("f1_progression", 0))
                        lasso_b_acc_progression[model].append(metrics[model]["lasso"].get("b_acc_progression", 0))
                    else:
                        lasso_f1_progression[model].append(0)
                        lasso_b_acc_progression[model].append(0)

In [None]:
def model_name(model):
    if model == "nb-gaussian":
        return "NBC"
    else:
        return model.upper()

In [None]:
save_folder = "../results/Synthetic/plots"

In [None]:
# Fonction pour tracer les courbes
def plot_progressions(nz_percentages, progression_data, p, metrics, save=False):

    progression_data_lasso = progression_data[0]
    progression_data_kbest = progression_data[1]
    
    plt.figure(figsize=(7, 4))
    
    for model, progression in progression_data_lasso.items():
        plt.plot(nz_percentages, progression, marker='o', 
                 label=model_name(model),
                 linewidth=2,
                 linestyle="-", 
                )
        
    plt.gca().set_prop_cycle(None)
    
    for model, progression in progression_data_kbest.items():
        plt.plot(nz_percentages, progression, marker='o', 
                 #label=model_name(model),
                 linewidth=2,
                 linestyle="--",
                )

    # plt.axhline(y=0, color='grey', linewidth=2, linestyle='-')
    y_min = plt.ylim()[0] if plt.ylim()[0] < 0 else 0
    plt.axhspan(y_min, 0, facecolor="grey", alpha=0.3)
    
    plt.title("LPNN-FS vs LASSO (solid) and LPNN-FS vs k-best (dashed)")
    plt.xlabel("Percentage of $N_z$ (%)", size=12)
    plt.ylabel(f"{metrics} Progression (%)", size=12)
    plt.grid(False)
    plt.legend(loc="upper right")
    plt.tight_layout()
    if save==True:
        plt.savefig(os.path.join(save_folder, f"progression_{metrics}_p={p}.pdf"))
    plt.show()

In [None]:
# Graphiques pour k-best
plot_progressions(
    nz_percentages,
    [lasso_f1_progression, kbest_f1_progression],
    p=p_values[0], metrics="F1",
    save=True
)