In [30]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [31]:
MLFLOW = False

In [32]:
import pandas as pd
from pathlib import Path

def load_all_metrics_files(path):
    """
    Parcourt tous les sous-dossiers de `path`, charge les fichiers `df_metrics.csv`,
    et les combine en un seul DataFrame global.

    Parameters:
    - path (Path): Le chemin du dossier racine à parcourir.

    Returns:
    - pd.DataFrame: Le DataFrame global combinant tous les fichiers `df_metrics.csv`.
    """
    path = Path(path)
    all_metrics = []  # Liste pour stocker tous les DataFrames

    # Parcourir tous les sous-dossiers et rechercher les fichiers df_metrics.csv
    for file in path.rglob('df_metrics.csv'):
        try:
            # Charger le fichier CSV dans un DataFrame
            df = pd.read_csv(file)

            # Ajouter une colonne pour identifier la source du fichier
            df['source'] = str(file.parent)  # Ajouter le chemin du dossier parent

            # Ajouter le DataFrame à la liste
            all_metrics.append(df)
        except Exception as e:
            print(f"Erreur lors du chargement de {file}: {e}")

    # Combiner tous les DataFrames en un seul DataFrame global
    if all_metrics:
        df_global = pd.concat(all_metrics, ignore_index=True)
    else:
        df_global = pd.DataFrame()  # Si aucun fichier trouvé, retourne un DataFrame vide

    return df_global

if not MLFLOW:
    # Utilisation de la fonction
    path = Path('../GNN/firemen/firepoint/2x2/test/occurence_default')
    df = load_all_metrics_files(path)

In [33]:
df.to_csv('try.csv')

In [34]:
if MLFLOW:
    metric = 'bad_prediction_modified_nbsinister_max_1'
    df = pd.read_csv(f'{metric}.csv')

    metric = 'wildfire_over_predicted_modified_nbsinister_max_1'
    df1 = pd.read_csv(f'{metric}.csv')

    metric = 'iou_modified_nbsinister_max_1'
    df2 = pd.read_csv(f'{metric}.csv')

    metric = 'apr_nbsinister_max_1'
    df3 = pd.read_csv(f'{metric}.csv')

    metric = 'r2_nbsninster_max_1'
    df4 = pd.read_csv(f'{metric}.csv')

    df = df.set_index('Run').join(df1.set_index('Run')['wildfire_over_predicted_modified_nbsinister_max_1']).reset_index()
    df = df.set_index('Run').join(df2.set_index('Run')['iou_modified_nbsinister_max_1']).reset_index()
    df = df3.set_index('Run').join(df2.set_index('Run')['apr_nbsinister_max_1']).reset_index()
    df = df4.set_index('Run').join(df2.set_index('Run')['r2_nbsninster_max_1']).reset_index()

    #df.dropna(subset=['temporal_spearman', 'temporal_mae', 'temporal_binary_f1', 'temporal_kendall'], inplace=True)
    df.drop_duplicates(inplace=True, keep='first')
    df.reset_index(drop=True, inplace=True)

In [35]:
df

Unnamed: 0.1,Unnamed: 0,Run,nbsinister,nb,apr,r2,KS,IV,SS,SS_no_zeros,...,iou_graph_10_season_medium_class,iou_under_prediction_graph_10_season_medium_class,iou_over_prediction_graph_10_season_medium_class,reliability_predicted_graph_10_season_medium_class,reliability_detected_graph_10_season_medium_class,wildfire_predicted_ratio_graph_10_season_medium_class,wildfire_detected_ratio_graph_10_season_medium_class,over_bad_prediction_local_graph_10_season_medium_class,under_bad_prediction_local_graph_10_season_medium_class,bad_prediction_local_graph_10_season_medium_class
0,0,departement-01-ain_xgboost_full_proportion-on-...,278.0,278.0,0.28,-0.240261,0.343577,0.49,-0.08,-0.29,...,,,,,,,,,,
1,1,departement-01-ain_xgboost_full_proportion-on-...,278.0,278.0,0.24,-0.373929,0.304559,0.39,0.32,0.19,...,,,,,,,,,,
2,2,departement-01-ain_xgboost_full_proportion-on-...,278.0,278.0,0.27,-0.039947,0.371834,0.4,0.07,0.04,...,,,,,,,,,,
3,3,departement-25-doubs_xgboost_full_proportion-o...,176.0,176.0,0.19,-0.191002,0.372394,0.54,0.46,0.06,...,,,,,,,,,,
4,4,departement-25-doubs_xgboost_full_proportion-o...,176.0,176.0,0.15,-0.208325,0.265849,0.79,0.54,0.56,...,,,,,,,,,,
5,5,departement-25-doubs_xgboost_full_proportion-o...,176.0,176.0,0.19,-0.017815,0.355007,1.01,0.32,-0.08,...,,,,,,,,,,
6,6,departement-78-yvelines_xgboost_full_proportio...,201.0,201.0,0.41,0.239349,0.550988,2.29,0.37,-0.0,...,,,,,,,,,,
7,7,departement-78-yvelines_xgboost_full_proportio...,201.0,201.0,0.38,0.181395,0.514581,0.97,0.51,0.07,...,,,,,,,,,,
8,8,departement-78-yvelines_xgboost_full_proportio...,201.0,201.0,0.39,0.214844,0.516055,0.85,0.29,0.12,...,,,,,,,,,,
9,0,departement-01-ain_xgboost_full_proportion-on-...,278.0,278.0,0.21,-0.368658,0.371071,0.53,0.16,-0.13,...,,,,,,,,,,


In [36]:
df.loc[0, 'Run']

'departement-01-ain_xgboost_full_proportion-on-zero-class_risk-kmeans-5-Class-Dept_classification_softmax_full_0_all_6_0_risk-size-watershed_node'

In [37]:
def parse_run_name(x):
    dico = {}
    vec = x.split('_')
    dico['Department'] = vec[0]
    dico['Model'] = vec[1]
    i = 2
    if dico['Model'] == 'fwi':
        i += 1
        dico['Target'] = 'indice'
    else:
        dico['Number_of_samples'] = vec[i]
        i += 1
        dico['weight'] = vec[i]
        i += 1
        dico['Target'] = vec[i]
        i += 1

    if dico['Model'] != 'fwi':
        dico['Task_type'] = vec[i]
        i += 1
        dico['loss'] = vec[i]
        i += 1
    else:
        dico['loss'] = None
        dico['Task_type'] = 'Indice'

    i += 1
    dico['kdays'] = vec[i]
    i += 1
    dico['Number_of_features'] = vec[i]
    i += 1
    dico['Scale'] = vec[i]
    i += 1
    dico['Days_in_futur'] = vec[i]
    i += 1
    dico['Base'] = vec[i]
    i += 1
    dico['Method'] = vec[i]
    i += 1
    if i == len(vec):
        return dico
    if vec[i] == 'kmeans':
        i += 1
        dico['kmeans_shift'] = vec[i]
        i += 1
        dico['kmeans_thresh'] = vec[i]
        i += 1
    return dico

# Initialisation des colonnes avec des valeurs None
df['Department'] = None
df['Model'] = None
df['Target'] = None
df['Task_type'] = None
df['Loss_function'] = None
df['Number_of_samples'] = None
df['kdays'] = None
df['Number_of_features'] = None
df['Scale'] = None
df['Base'] = None
df['Method'] = None
df['Days_in_futur'] = None
df['weight'] = None
df['kmeans_thresh'] = None
df['kmeans_shift'] = None

# Boucle pour remplir les colonnes avec les valeurs de dico_parse
for index, row in df.iterrows():
    dico_parse = parse_run_name(row['Run'])
    
    # Mise à jour de chaque colonne avec les valeurs du dictionnaire dico_parse
    df.loc[index, 'Department'] = dico_parse.get('Department')
    df.loc[index, 'Model'] = dico_parse.get('Model')
    df.loc[index, 'Target'] = dico_parse.get('Target')
    df.loc[index, 'Task_type'] = dico_parse.get('Task_type')
    df.loc[index, 'Loss_function'] = dico_parse.get('loss')
    df.loc[index, 'Number_of_samples'] = dico_parse.get('Number_of_samples')
    df.loc[index, 'kdays'] = dico_parse.get('kdays')
    df.loc[index, 'Number_of_features'] = dico_parse.get('Number_of_features')
    df.loc[index, 'Scale'] = dico_parse.get('Scale')
    df.loc[index, 'Base'] = dico_parse.get('Base')
    df.loc[index, 'Method'] = dico_parse.get('Method')
    df.loc[index, 'Days_in_futur'] = dico_parse.get('Days_in_futur')

    df.loc[index, 'weight'] = dico_parse.get('weight')
    df.loc[index, 'kmeans_thresh'] = dico_parse.get('kmeans_thresh', 0)
    df.loc[index, 'kmeans_shift'] = dico_parse.get('kmeans_shift', 0)


DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`


DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`


DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`


DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented fr

In [38]:
import seaborn as sns

def compare_models2(dff, depts, dept_markers, metrics, scale, col_to_analyse, dir_output, suffix):
    df = dff[dff['Scale'] == scale]
    task_types = df[col_to_analyse].unique()

    np.random.seed(42)  # Pour rendre le code reproductible (vous pouvez le changer ou le retirer)
    colors = sns.color_palette("husl", len(task_types))  # Palette de couleurs diversifiée et aléatoire

    #Associer chaque modèle à une couleur
    task_colors = dict(zip(task_types, colors))
    # Create a new figure for all departments
    fig, axs = plt.subplots(len(depts), len(metrics), figsize=(15, 3 * len(depts)), sharey='row')

    # Loop through each department
    for dept_index, dept in enumerate(depts):
        # Filtering the DataFrame for a specific department
        df_filtered = df[df['Department'].isin([dept])].reset_index()

        # Loop through each metric to create a subplot for each metric
        for metric_index, metric in enumerate(metrics):
            ax = axs[dept_index, metric_index] if len(depts) > 1 else axs[metric_index]
            for method in task_types:
                subset = df_filtered[df_filtered[col_to_analyse] == method]

                if len(subset) == 0:
                    continue

                subset = subset.sort_values(col_to_analyse)
                
                # Add label only for the first occurrence of each method
                label = method if dept_index == 0 else None  # Only show labels for the first department
                ax.plot(subset[col_to_analyse].values, subset[metric].values, marker=dept_markers[dept], linestyle='-', 
                        label=label, color=task_colors[method])  # Use task type color
                
            # Configuration for each subplot
            ax.set_ylim(0, 1)
            ax.set_xlabel(col_to_analyse)
            if dept_index == 0:  # Only set the title for the first row
                ax.set_title(f'{metric}', fontsize=12)
            #ax.set_xticks(subset[col_to_analyse].values, rotation=45)
            ax.grid(True)

        axs[dept_index, 0].set_ylabel('Metric Value')

    # Create a custom legend for task types and departments
    task_handles = [plt.Line2D([0], [0], color=task_colors[method], label=method) for method in task_types]
    dept_handles = [plt.Line2D([0], [0], marker=dept_markers[dept], linestyle='None', color='black', label=dept) for dept in depts]

    # Combine both legends
    fig.legend(handles=task_handles + dept_handles, loc='center left', bbox_to_anchor=(0.85, 0.85), title="Legend")

    plt.tight_layout(rect=[0, 0, 0.85, 1])  # Adjust layout to make room for the legend
    plt.savefig(dir_output / f'{col_to_analyse}_{scale}_{suffix}.png')

In [39]:
# Define markers for each department
dept_markers = {
    'departement-01-ain': 'o',   # Circle
    'departement-25-doubs': 's',   # Square
    'departement-78-yvelines': 'D',   # Diamond
    # Add more departments and their markers as needed
}

#metrics =  ['over_bad_prediction_nbsinister_discretization_dt', 'wildfire_over_predicted_nbsinister_discretization_dt', 'iou_nbsinister_discretization_dt']

#compare_models2(df, df.Department.unique(), dept_markers, metrics, '6', 'kmeans_thresh', Path('./'), '1')
#compare_models2(df, df.Department.unique(), dept_markers, metrics, '7', 'kmeans_thresh', Path('./'), '1')
#compare_models2(df, df.Department.unique(), dept_markers, metrics, 'Departement', 'kmeans_thresh', Path('./'), '1')

In [40]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import pandas as pd
import numpy as np
import plotly.express as px  # Utilisé pour générer une palette de couleurs

def compare_models_2d(df, depts, dept_markers, metrics, col_to_analyse_1, col_to_analyse_2, col_link):
    """
    Compare models in 2D with the third dimension being the metric plotted as an axis.
    Points with the same value in `col_link` are connected in increasing order with a different color.
    Each plot is interactive and can be moved independently using Plotly.

    :param df: DataFrame containing the data.
    :param depts: List of departments to analyze.
    :param dept_markers: Dictionary mapping departments to plot markers.
    :param metrics: List of metrics to plot as Z-axis.
    :param col_to_analyse_1: First column to analyze on X-axis.
    :param col_to_analyse_2: Second column to analyze on Y-axis.
    :param col_link: Column used to connect points in the same group.
    """
    # Encode string values in col_to_analyse_1 and col_to_analyse_2 if necessary
    def encode_column(df, col_name):
        if df[col_name].dtype == 'object':  # Check if column is string-based
            unique_values = df[col_name].unique()
            value_map = {val: idx for idx, val in enumerate(unique_values)}
            df[f"{col_name}_encoded"] = df[col_name].map(value_map)
            return value_map  # Return the mapping for decoding
        else:
            df[f"{col_name}_encoded"] = df[col_name]  # Keep the original if numeric
            return None

    map_1 = encode_column(df, col_to_analyse_1)
    map_2 = encode_column(df, col_to_analyse_2)

    # Generate a unique color for each unique value in col_link
    unique_links = df[col_link].unique()
    colors = px.colors.qualitative.Plotly[:len(unique_links)]

    # Map each unique link to a specific color
    color_map = {link: colors[i % len(colors)] for i, link in enumerate(unique_links)}

    for dept in depts:
        df_filtered = df[df['Department'] == dept]

        # Create a subplot layout with one column per metric
        fig = make_subplots(
            rows=1, 
            cols=len(metrics), 
            specs=[[{'type': 'scatter3d'}] * len(metrics)],
            subplot_titles=[f"{dept} - {metric}" for metric in metrics]
        )

        for col_idx, metric in enumerate(metrics, start=1):
            # Add a trace for each unique value in col_link (to connect points belonging to the same group)
            for link_value in df_filtered[col_link].unique():
                subset = df_filtered[df_filtered[col_link] == link_value]

                # Sort the subset by the encoded columns for ordered plotting
                subset = subset.sort_values(by=[f"{col_to_analyse_1}_encoded", f"{col_to_analyse_2}_encoded"])

                fig.add_trace(
                    go.Scatter3d(
                        x=subset[f"{col_to_analyse_1}_encoded"],
                        y=subset[f"{col_to_analyse_2}_encoded"],
                        z=subset[metric],  # Plot original metric values on the Z-axis
                        mode='lines+markers',
                        marker=dict(
                            size=8,
                            color=color_map[link_value],  # Assign a unique color for each line
                        ),
                        line=dict(
                            color=color_map[link_value],  # Use the same color for the line
                            width=3  # Line width
                        ),
                        name=f"{link_value}",
                    ),
                    row=1, col=col_idx
                )

        # Update the layout to add titles and axis labels for each subplot
        fig.update_layout(
            height=900,  # Increased height
            width=800 * len(metrics),  # Increased width dynamically based on number of metrics
            title_text=f"3D Comparison for {dept}",
            showlegend=True
        )

        # Update axis labels for each subplot
        for i, metric in enumerate(metrics, start=1):
            fig.update_scenes(
                dict(
                    xaxis=dict(
                        title=col_to_analyse_1,
                        tickvals=list(map_1.values()) if map_1 else None,
                        ticktext=list(map_1.keys()) if map_1 else None
                    ),
                    yaxis=dict(
                        title=col_to_analyse_2,
                        tickvals=list(map_2.values()) if map_2 else None,
                        ticktext=list(map_2.keys()) if map_2 else None
                    ),
                    zaxis_title=metric
                ),
                row=1, col=i
            )

        # Show the combined figure for this department
        fig.show()


In [41]:
df.loc[df[df['Scale'] == 'departement'].index, 'Scale'] = 10

In [42]:
for col in df.columns:
    print(col)

Unnamed: 0
Run
nbsinister
nb
apr
r2
KS
IV
SS
SS_no_zeros
SS_gt
SS_no_zeros_gt
accuracy
common_area_class
union_area_class
under_predicted_area_class
over_predicted_area_class
iou_class
iou_wildfire_or_pred_class
iou_wildfire_and_pred_class
iou_under_prediction_class
iou_over_prediction_class
reliability_predicted_class
reliability_detected_class
wildfire_predicted_ratio_class
wildfire_detected_ratio_class
reliability_class
wildfire_over_predicted_class
wildfire_under_predicted_class
over_bad_prediction_class
under_bad_prediction_class
bad_prediction_class
iou_wildfire_or_pred0_class
iou_wildfire_and_pred0_class
iou_0_class
iou_under_prediction_0_class
iou_over_prediction_0_class
reliability_predicted_0_class
reliability_detected_0_class
wildfire_predicted_ratio_local_0_class
wildfire_detected_ratio_local_0_class
over_bad_prediction_local_0_class
under_bad_prediction_local_0_class
bad_prediction_local_0_class
over_bad_prediction_global_0_class
under_bad_prediction_global_0_class
bad_pre

In [43]:
df['kdays']

0     0
1     0
2     0
3     0
4     0
5     0
6     0
7     0
8     0
9     0
10    0
11    0
12    0
13    0
14    0
15    0
16    0
17    0
18    0
19    0
20    0
21    0
22    0
23    0
24    0
25    0
26    0
27    0
28    0
29    0
30    0
31    0
32    0
33    0
34    0
35    0
36    0
37    0
38    0
39    0
40    0
41    0
42    0
43    0
44    0
Name: kdays, dtype: object

In [44]:
#compare_models_2d(df, df.Department.unique(), dept_markers, ['apr', 'r2'],  'Target', 'Scale', 'Scale')

In [45]:
df['Scale'] = df['Scale'].astype(int)

In [46]:
df['Department']

0          departement-01-ain
1          departement-01-ain
2          departement-01-ain
3        departement-25-doubs
4        departement-25-doubs
5        departement-25-doubs
6     departement-78-yvelines
7     departement-78-yvelines
8     departement-78-yvelines
9          departement-01-ain
10         departement-01-ain
11         departement-01-ain
12       departement-25-doubs
13       departement-25-doubs
14       departement-25-doubs
15    departement-78-yvelines
16    departement-78-yvelines
17    departement-78-yvelines
18         departement-01-ain
19         departement-01-ain
20         departement-01-ain
21       departement-25-doubs
22       departement-25-doubs
23       departement-25-doubs
24    departement-78-yvelines
25    departement-78-yvelines
26    departement-78-yvelines
27         departement-01-ain
28         departement-01-ain
29         departement-01-ain
30       departement-25-doubs
31       departement-25-doubs
32       departement-25-doubs
33    depa

In [47]:
results = {}

for department in df['Department'].unique():
    results[department] = {
        'mean_metrique_by_target': {},
        'max_metrique_by_target': {}
    }
    for target in df[df['Department'] == department].Target.unique():
        mean_value = round(df[(df['Department'] == department) & (df['Target'] == target)]['iou_wildfire_or_pred_class'].mean(), 2)
        #max_value = round(df[(df['Department'] == department) & (df['Target'] == target)]['iou_wildfire_graph_0_season_high_class'].max(), 2)

        results[department]['mean_metrique_by_target'][target] = mean_value
        #results[department]['max_metrique_by_target'][target] = max_value

results


{'departement-01-ain': {'mean_metrique_by_target': {'risk-kmeans-5-Class-Dept': 0.26,
   'nbsinister-max-0-kmeans-5-Class-Dept': 0.24,
   'nbsinister-max-1-kmeans-5-Class-Dept': 0.28},
  'max_metrique_by_target': {}},
 'departement-25-doubs': {'mean_metrique_by_target': {'risk-kmeans-5-Class-Dept': 0.24,
   'nbsinister-max-0-kmeans-5-Class-Dept': 0.2,
   'nbsinister-max-1-kmeans-5-Class-Dept': 0.24},
  'max_metrique_by_target': {}},
 'departement-78-yvelines': {'mean_metrique_by_target': {'risk-kmeans-5-Class-Dept': 0.36,
   'nbsinister-max-0-kmeans-5-Class-Dept': 0.28,
   'nbsinister-max-1-kmeans-5-Class-Dept': 0.36},
  'max_metrique_by_target': {}}}

In [48]:
df = df[df['Target'].isin(['nbsinister-max-0-kmeans-5-Class-Dept', 'nbsinister-max-1-kmeans-5-Class-Dept', 'risk-kmeans-5-Class-Dept', 'risk-nbsinister-Robust-kmeans-5-Class-Dept'])]

mean_metrique_by_target = {}
max_metrique_by_target = {}
for target in df.Model.unique():
    mean_metrique_by_target[target] = round(df[df['Model'] == target]['iou_wildfire_or_pred_class'].mean(), 2)
    #max_metrique_by_target[target] = round(df[df['Model'] == target]['iou_wildfire_class'].max(), 2)

mean_metrique_by_target, max_metrique_by_target

({'xgboost': 0.27}, {})

In [49]:
compare_models_2d(df, df.Department.unique(), dept_markers, ['SS'],  'Target', 'Scale', 'Target')

In [50]:
compare_models_2d(df, df.Department.unique(), dept_markers, ['iou_wildfire_or_pred_class'],  'Target', 'Scale', 'Target')

In [53]:
compare_models_2d(df, df.Department.unique(), dept_markers, ['iou_wildfire_and_pred_class'],  'Target', 'Scale', 'Target')

In [54]:
compare_models_2d(df, df.Department.unique(), dept_markers, ['iou_class'],  'Target', 'Scale', 'Target')

In [51]:
compare_models_2d(df, df.Department.unique(), dept_markers, ['iou_wildfire_or_pred_elt_1_class', 'iou_wildfire_or_pred_elt_2_class'],  'Target', 'Scale', 'Target')

In [52]:
compare_models_2d(df, df.Department.unique(), dept_markers, ['iou_wildfire_or_pred_elt_3_class', 'iou_wildfire_or_pred_elt_4_class'],  'Target', 'Scale', 'Target')