### Dipendenze

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import os
import matplotlib.ticker as ticker

In [None]:
def load_variables_from_file(file_path):
    with open(file_path, 'r') as f:
        code = f.read()
        local_vars = {}
        exec(code, {}, local_vars)
        return local_vars
vars = load_variables_from_file('Configurazioni.txt')

In [None]:
DATASET = vars['DATASET']
MYPATH = os.getcwd() + '/data/data_total/'
datasets_to_process = ['XGB','VARIANZA']
TEMPO = False

if DATASET == 'MultiPositionWearable':
  NOMI_FILE = {
    'baseline_crowd': 'MultiPositionWearable_baseline_crowd',
    'modello_base_crowd': 'MultiPositionWearable_base_crowd',
    'modello_varianza_crowd': 'MultiPositionWearable_varianza_crowd',
  }
elif DATASET == 'selfBACK':
  NOMI_FILE = {
    'baseline_crowd': 'selfBACK_baseline_crowd',
    'modello_base_crowd': 'selfBACK_base_crowd',
    'modello_varianza_crowd': 'selfBACK_varianza_crowd',
  }
elif DATASET == 'SDALLE':
  NOMI_FILE = {
    'baseline_crowd': 'SDALLE_baseline_crowd',
    'modello_base_crowd': 'SDALLE_base_crowd',
    'modello_varianza_crowd': 'SDALLE_varianza_crowd',
  }
else:
    raise ValueError("DATASET non valido.")

In [None]:
df_list = []

try:
    df_base = pd.read_csv(MYPATH + NOMI_FILE['modello_base_crowd'] + '.csv', header=0)
    df_base['dataset'] = 'XGB'
    df_list.append(df_base)
except FileNotFoundError:
    print("No file xgb")
except Exception as e:
    print(e)

try:
    df_varianza = pd.read_csv(MYPATH + NOMI_FILE['modello_varianza_crowd'] + '.csv', header=0)
    df_varianza['dataset'] = 'VARIANZA'
    df_list.append(df_varianza)
except FileNotFoundError:
    print("No file varianza")
except Exception as e:
    print(e)

if df_list:
    df_all_data = pd.concat(df_list, ignore_index=True)
    PESI = sorted(df_all_data['weight'].unique())
else:
    raise Exception("Serve almeno un file")

try:
    df_baseline = pd.read_csv(MYPATH + NOMI_FILE['baseline_crowd'] + '.csv', header=0)
except FileNotFoundError:
    print("No file baseline")
except Exception as e:
    print(e)


In [None]:
def format_seconds_to_minutes(val,pos):
    if val < 60:
        return f"{val:.0f} sec"
    else:
        minutes = int(val // 60)
        seconds = int(val % 60)
        return f"{minutes}:{seconds:02d} min"

In [None]:
for position_to_plot in df_all_data['position'].unique():
    print(f"Generazione plot {position_to_plot}")

    df_pos_main = df_all_data[df_all_data['position'] == position_to_plot]
    df_pos_main = df_pos_main.sort_values(by='weight')
    df_baseline_pos = df_baseline[df_baseline['position'] == position_to_plot]
    
    baseline_f1_value = df_baseline_pos['f1-score'].mean() if not df_baseline_pos.empty else np.nan
    df_plot_data_avg = df_pos_main.groupby(['weight', 'dataset', 'timeUsed'])[['f1-score', 'time']].mean().reset_index()
    
    df_plot_data_avg['Tempo Usato'] = df_plot_data_avg['timeUsed'].apply(
        lambda x: format_seconds_to_minutes(x, None)
    )

    fig, ax1 = plt.subplots(figsize=(18, 9))

    unique_datasets = df_plot_data_avg['dataset'].unique()
    dataset_colors = dict(zip(unique_datasets, sns.color_palette("tab10", n_colors=len(unique_datasets))))

    # sns.lineplot(
    #     data=df_pos_main,
    #     x='weight',
    #     y='f1-score',
    #     hue='dataset',
    #     style='timeUsed',
    #     palette=dataset_colors,
    #     units='randomState',
    #     estimator=None,
    #     linewidth=0.7,
    #     alpha=0.3,
    #     legend=False,
    #     ax=ax1
    # )

    sns.lineplot(
        data=df_plot_data_avg,
        x='weight',
        y='f1-score',
        hue='dataset',
        style='Tempo Usato',
        palette=dataset_colors,
        marker='o',
        markersize=7,
        linewidth=2.0,
        ax=ax1
    )

    if np.isfinite(baseline_f1_value):
        ax1.axhline(y=baseline_f1_value, color='black', linestyle=':', linewidth=2, 
                    label=f'Baseline F1 ({baseline_f1_value:.3f})')

    ax1.set_ylabel('f1-score', color='tab:blue', fontsize=14)
    ax1.set_ylim(baseline_f1_value-0.01, 1.01)
    ax1.tick_params(axis='y', labelcolor='tab:blue')
    ax1.grid(True, which='major', axis='y', linestyle='--', linewidth=0.5)
    ax1.yaxis.set_major_locator(ticker.MultipleLocator(0.05))

    ax1.set_xlabel("Peso (Weight)", fontsize=14)
    ax1.set_xscale('log')
    ax1.set_xticks(PESI)
    ax1.set_xticklabels(PESI)
    ax1.grid(True, which='both', axis='x', linestyle='--', linewidth=0.5, alpha=0.4)
    
    if TEMPO:
        ax2 = ax1.twinx()
        sns.lineplot(
            data=df_plot_data_avg,
            x='weight',
            y='time',
            hue='dataset',
            style='Tempo Usato',
            palette=dataset_colors,
            marker='s',
            markersize=6,
            legend=False,
            ax=ax2,
            linestyle='dotted'
        )
        ax2.set_ylabel(f'Tempo Addestramento', color='tab:green', fontsize=14)
        ax2.tick_params(axis='y', labelcolor='tab:green')
        ax2.set_ylim(bottom=0)
        ax2.yaxis.set_major_formatter(ticker.FuncFormatter(format_seconds_to_minutes))

    handles, labels = ax1.get_legend_handles_labels()
    ax1.legend(handles, labels, title='Modello / TimeUsed', loc='best')
    
    plt.title(f'Posizione: {position_to_plot}', fontsize=18, pad=20)
    
    fig.tight_layout()

    # Salvataggio del grafico
    output_dir = os.path.join(os.getcwd(), 'images', DATASET)
    os.makedirs(output_dir, exist_ok=True)
    output_path = os.path.join(output_dir, f'plot_{position_to_plot}.png')
    plt.savefig(output_path, dpi=150)
    plt.close(fig)