### Dipendenze

In [15]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import os
import matplotlib.ticker as ticker

In [16]:
def load_variables_from_file(file_path):
    with open(file_path, 'r') as f:
        code = f.read()
        local_vars = {}
        exec(code, {}, local_vars)
        return local_vars
vars = load_variables_from_file('Configurazioni.txt')

In [17]:
DATASET = vars['DATASET']
MYPATH = os.getcwd() + '/data/data_total/'
TEMPO = False

if DATASET == 'MultiPositionWearable':
  NOMI_FILE = {
    'baseline_crowd': 'MultiPositionWearable_baseline_crowd',
    'modello_base_crowd': 'MultiPositionWearable_base_crowd',
    'modello_varianza_crowd': 'MultiPositionWearable_varianza_crowd',
  }
elif DATASET == 'selfBACK':
  NOMI_FILE = {
    'baseline_crowd': 'selfBACK_baseline_crowd',
    'modello_base_crowd': 'selfBACK_base_crowd',
    'modello_varianza_crowd': 'selfBACK_varianza_crowd',
  }
elif DATASET == 'SDALLE':
  NOMI_FILE = {
    'baseline_crowd': 'SDALLE_baseline_crowd',
    'modello_base_crowd': 'SDALLE_base_crowd',
    'modello_varianza_crowd': 'SDALLE_varianza_crowd',
  }
else:
    raise ValueError("DATASET non valido.")

In [18]:
df_list = []

try:
    df_base = pd.read_csv(MYPATH + NOMI_FILE['modello_base_crowd'] + '.csv', header=0)
    df_base['Dataset'] = 'Base'
    df_list.append(df_base)
except FileNotFoundError:
    print("No file xgb")
except Exception as e:
    print(e)

try:
    df_varianza = pd.read_csv(MYPATH + NOMI_FILE['modello_varianza_crowd'] + '.csv', header=0)
    df_varianza['Dataset'] = 'Varianza'
    df_list.append(df_varianza)
except FileNotFoundError:
    print("No file varianza")
except Exception as e:
    print(e)

if df_list:
    df_all_data = pd.concat(df_list, ignore_index=True)
    PESI = sorted(df_all_data['weight'].unique())
else:
    raise Exception("Serve almeno un file")

try:
    df_baseline = pd.read_csv(MYPATH + NOMI_FILE['baseline_crowd'] + '.csv', header=0)
except FileNotFoundError:
    print("No file baseline")
except Exception as e:
    print(e)

min_baseline_f1 = df_baseline.groupby('position')['f1-score'].mean().min()

In [19]:
def format_seconds_to_minutes(val,pos):
    if val < 60:
        return f"{val:.0f} sec"
    else:
        minutes = int(val // 60)
        seconds = int(val % 60)
        return f"{minutes}:{seconds:02d} min"

In [20]:
for position_to_plot in df_all_data['position'].unique():
    print(f"Generazione plot {position_to_plot}")

    df_pos_main = df_all_data[df_all_data['position'] == position_to_plot]
    df_pos_main = df_pos_main.sort_values(by='weight')
    df_baseline_pos = df_baseline[df_baseline['position'] == position_to_plot]
    
    baseline_f1_value = df_baseline_pos['f1-score'].mean() if not df_baseline_pos.empty else np.nan
    df_plot_data_avg = df_pos_main.groupby(['weight', 'Dataset', 'timeUsed'])[['f1-score', 'time']].mean().reset_index()
    
    df_plot_data_avg['Tempo Usato'] = df_plot_data_avg['timeUsed'].apply(
        lambda x: format_seconds_to_minutes(x, None)
    )

    fig, ax1 = plt.subplots(figsize=(18, 9))

    unique_datasets = df_plot_data_avg['Dataset'].unique()
    dataset_colors = dict(zip(unique_datasets, sns.color_palette("tab10", n_colors=len(unique_datasets))))

    # sns.lineplot(
    #     data=df_pos_main,
    #     x='weight',
    #     y='f1-score',
    #     hue='dataset',
    #     style='timeUsed',
    #     palette=dataset_colors,
    #     units='randomState',
    #     estimator=None,
    #     linewidth=0.7,
    #     alpha=0.3,
    #     legend=False,
    #     ax=ax1
    # )

    sns.lineplot(
        data=df_plot_data_avg,
        x='weight',
        y='f1-score',
        hue='Dataset',
        style='Tempo Usato',
        palette=dataset_colors,
        marker='o',
        markersize=7,
        linewidth=2.0,
        ax=ax1
    )

    if np.isfinite(baseline_f1_value):
        ax1.axhline(y=baseline_f1_value, color='black', linestyle=':', linewidth=2, 
                    label=f'Baseline F1 ({baseline_f1_value:.3f})')

    ax1.set_ylabel('F1-Score', color='tab:blue', fontsize=14)
    ax1.set_ylim(min_baseline_f1-0.01, 1.01)
    ax1.tick_params(axis='y', labelcolor='tab:blue')
    ax1.grid(True, which='major', axis='y', linestyle='--', linewidth=0.5)
    ax1.yaxis.set_major_locator(ticker.MultipleLocator(0.05))

    ax1.set_xlabel("Peso (Weight)", fontsize=14)
    ax1.set_xscale('log')
    ax1.set_xticks(PESI)
    ax1.set_xticklabels(PESI)
    ax1.grid(True, which='both', axis='x', linestyle='--', linewidth=0.5, alpha=0.4)
    
    if TEMPO:
        ax2 = ax1.twinx()
        sns.lineplot(
            data=df_plot_data_avg,
            x='weight',
            y='time',
            hue='Dataset',
            style='Tempo Usato',
            palette=dataset_colors,
            marker='s',
            markersize=6,
            legend=False,
            ax=ax2,
            linestyle='dotted'
        )
        ax2.set_ylabel(f'Tempo Addestramento', color='tab:green', fontsize=14)
        ax2.tick_params(axis='y', labelcolor='tab:green')
        ax2.set_ylim(bottom=0)
        ax2.yaxis.set_major_formatter(ticker.FuncFormatter(format_seconds_to_minutes))

    handles, labels = ax1.get_legend_handles_labels()
    ax1.legend(handles, labels, title='', loc='best')
    
    plt.title(f'Posizione: {position_to_plot}', fontsize=18, pad=20)
    
    fig.tight_layout()

    # Salvataggio del grafico
    output_dir = os.path.join(os.getcwd(), 'images','andamento_modelli', DATASET, 'guadagno_assoluto')
    os.makedirs(output_dir, exist_ok=True)
    output_path = os.path.join(output_dir, f'plot_{position_to_plot}.png')
    plt.savefig(output_path, dpi=150)
    plt.close(fig)

Generazione plot all sensors
Generazione plot belt
Generazione plot right wrist
Generazione plot chest
Generazione plot right pocket
Generazione plot left pocket
Generazione plot left wrist
Generazione plot left ankle
Generazione plot right ankle


In [21]:
for position_to_plot in df_all_data['position'].unique():
    print(f"Generazione griglia per: {position_to_plot}")

    df_pos_main = df_all_data[df_all_data['position'] == position_to_plot]
    
    all_gain_results = []
    for time_val in sorted(df_pos_main['timeUsed'].unique()):
        df_pos_time = df_pos_main[df_pos_main['timeUsed'] == time_val]
        
        df_weight_1 = df_pos_time[df_pos_time['weight'] == 1]
        if df_weight_1.empty: continue
            
        f1_baseline_by_dataset = df_weight_1.groupby('Dataset')['f1-score'].mean().to_dict()

        weights_to_compare = [w for w in PESI if w != 1]
        for weight_val in weights_to_compare:
            df_current_weight = df_pos_time[df_pos_time['weight'] == weight_val]
            f1_current_by_dataset = df_current_weight.groupby('Dataset')['f1-score'].mean().to_dict()

            for dataset_name, current_f1 in f1_current_by_dataset.items():
                baseline_f1 = f1_baseline_by_dataset.get(dataset_name)
                if baseline_f1 and baseline_f1 > 0:
                    relative_gain = ((current_f1 - baseline_f1) / baseline_f1)
                    all_gain_results.append({
                        'Peso': weight_val,
                        'Dataset': dataset_name,
                        'Guadagno Relativo': relative_gain,
                        'timeUsed': f'{time_val}s'
                    })

    df_gain_all_times = pd.DataFrame(all_gain_results)

    g = sns.catplot(
        data=df_gain_all_times,
        x='Peso',
        y='Guadagno Relativo',
        hue='Dataset',
        col='timeUsed',
        kind='bar',
        col_wrap=3,
        height=4,
        aspect=1.8,
        legend_out=False,
        sharex=False
    )

    sns.move_legend(
        g,
        "lower center",
        bbox_to_anchor=(.5, 1),
        ncol=len(df_gain_all_times['Dataset'].unique()),
        title=None,
        frameon=False,
    )

    g.figure.suptitle(f'Guadagno Relativo di F1-Score Peso N vs Peso 1 (Posizione: {position_to_plot})', y=1.08, fontsize=16)
    g.set_axis_labels("", "Guadagno Relativo (%)")
    g.set_titles("Tempo Usato: {col_name} per ogni azione")
    
    for ax in g.axes.flat:
        ax.yaxis.set_major_formatter(ticker.PercentFormatter(xmax=1.0))
        ax.axhline(0, color='black', linewidth=0.8, linestyle='--')
        ax.grid(axis='y', linestyle='--', alpha=0.3)

        for container in ax.containers:
            labels = [f'{v*100:.1f}%' for v in container.datavalues]
            ax.bar_label(container, labels=labels, padding=3, fontsize=8)

    output_dir = os.path.join(os.getcwd(), 'images', 'andamento_modelli', DATASET, 'guadagno_relativo')
    os.makedirs(output_dir, exist_ok=True)
    output_path = os.path.join(output_dir, f'guadagno_griglia_{position_to_plot}.png')
    g.savefig(output_path, dpi=150)
    plt.close('all')

Generazione griglia per: all sensors
Generazione griglia per: belt
Generazione griglia per: right wrist
Generazione griglia per: chest
Generazione griglia per: right pocket
Generazione griglia per: left pocket
Generazione griglia per: left wrist
Generazione griglia per: left ankle
Generazione griglia per: right ankle
