# Model Selection for SLP Prediction

This notebook performs model selection to predict the `slp` column using various machine learning algorithms with time series cross-validation.


In [1]:
import pandas as pd
import numpy as np
from scipy.stats import pearsonr, spearmanr
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.base import clone
from lightgbm import LGBMRegressor
from joblib import Parallel, delayed
import joblib

import itertools
import warnings
warnings.filterwarnings('ignore')


## 1. Load and Prepare Data


In [2]:
# Load the dataset
data_name = 'full'
out_path = f'results/{data_name}'
dataset = f'dataset/data_v3_{data_name}.csv'
df = pd.read_csv(dataset, sep=';', decimal=',')

print(f"Dataset shape: {df.shape}")
print(f"\nColumns: {df.columns.tolist()}")
df.head()

Dataset shape: (1460, 27)

Columns: ['date', 'slp', 'holiday', 'weathercode', 'temperature_2m_max', 'temperature_2m_min', 'temperature_2m_mean', 'apparent_temperature_max', 'apparent_temperature_min', 'apparent_temperature_mean', 'sunrise', 'sunset', 'daylight_duration', 'sunshine_duration', 'rain_sum', 'snowfall_sum', 'precipitation_hours', 'windspeed_10m_max', 'windgusts_10m_max', 'shortwave_radiation_sum', 'et0_fao_evapotranspiration', 'day_of_week_sin', 'day_of_week_cos', 'winddirection_10m_dominant_sin', 'winddirection_10m_dominant_cos', 'day_of_year_sin', 'day_of_year_cos']


Unnamed: 0,date,slp,holiday,weathercode,temperature_2m_max,temperature_2m_min,temperature_2m_mean,apparent_temperature_max,apparent_temperature_min,apparent_temperature_mean,...,windspeed_10m_max,windgusts_10m_max,shortwave_radiation_sum,et0_fao_evapotranspiration,day_of_week_sin,day_of_week_cos,winddirection_10m_dominant_sin,winddirection_10m_dominant_cos,day_of_year_sin,day_of_year_cos
0,2021-10-01,638555.753,0,3,0.38953,0.248483,0.271326,0.228208,0.105172,0.150625,...,-0.068171,-0.101238,0.129082,0.290285,-0.433884,-0.900969,0.241922,-0.970296,-0.999963,-0.008583
1,2021-10-02,556131.836,0,3,0.520484,0.21863,0.47233,0.51948,0.251962,0.439128,...,0.28256,0.124197,-0.023733,0.055099,-0.974928,-0.222521,-0.104528,-0.994522,-0.999963,0.008583
2,2021-10-03,487753.707,1,51,0.972872,1.412763,1.115543,0.911192,1.022609,0.916267,...,1.063735,0.769408,-0.652673,0.612119,-0.781831,0.62349,0.173648,-0.984808,-0.999668,0.025748
3,2021-10-04,615408.403,0,51,0.3181,0.382823,0.338327,0.368822,0.472147,0.450224,...,-0.052229,-0.241164,-1.121221,-0.910402,0.0,1.0,-0.731354,-0.681998,-0.999079,0.042905
4,2021-10-05,757789.344,0,63,-0.086669,0.53209,0.097122,0.127769,0.594472,0.217203,...,-0.450788,-0.552109,-1.156583,-1.071319,0.781831,0.62349,0.406737,-0.913545,-0.998195,0.060049


In [3]:
# Parse date and sort by date (important for time series)
df['date'] = pd.to_datetime(df['date'])
df = df.sort_values('date').reset_index(drop=True)

In [4]:
# Separate features and target

X = df.drop(columns=['date', 'slp'])
y = df['slp']

# Define feature types for proper preprocessing
boolean_cols = ['holiday']
categorical_cols = ['weathercode']  # Leave as-is for tree-based models
cyclical_cols = ['day_of_week', 'day_of_year', 'winddirection_10m_dominant']

# All other columns are continuous and should be scaled
continuous_cols = [col for col in X.columns 
                   if col not in boolean_cols + categorical_cols + cyclical_cols]

test_days = 365
X_test = X.iloc[-test_days:]
y_test = y.iloc[-test_days:]

X_train = X.iloc[:-test_days]
y_train = y.iloc[:-test_days]


## 3. Time Series Split

In [5]:
n_split = 10
tscv = TimeSeriesSplit(n_splits = n_split)

In [6]:
def evaluate_model_params(estimator, X, y, tscv):
    """Valuta un singolo estimator (clonato) con TimeSeriesSplit.
    Ritorna dizionario di metriche medie.
    """
    rmse_scores = []
    mae_scores = []
    r2_scores = []
    for train_idx, test_idx in tscv.split(X):
        X_train, X_test = X.iloc[train_idx], X.iloc[test_idx]
        y_train, y_test = y.iloc[train_idx], y.iloc[test_idx]
        model = clone(estimator)
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        rmse_scores.append(np.sqrt(mean_squared_error(y_test, y_pred)))
        mae_scores.append(mean_absolute_error(y_test, y_pred))
        r2_scores.append(r2_score(y_test, y_pred))
    return {
        'RMSE_mean': np.mean(rmse_scores),
        'RMSE_std': np.std(rmse_scores),
        'MAE_mean': np.mean(mae_scores),
        'MAE_std': np.std(mae_scores),
        'R2_mean': np.mean(r2_scores),
        'R2_std': np.std(r2_scores),
    }

In [7]:
def param_grid_iter(grid_dict):
    """Genera tutte le combinazioni dalla dict di liste come sklearn.model_selection.ParameterGrid.
    grid_dict: {'param': [v1,v2,...], ...}
    """
    keys = list(grid_dict.keys())
    for values in itertools.product(*(grid_dict[k] for k in keys)):
        yield dict(zip(keys, values))

In [8]:
def top_k_results(results_list, k=5, metric='R2_mean'):
    """Ordina la lista di dict (ognuno con 'params' e metriche) e ritorna top k.
    metric: campo su cui ordinare (default R2_mean decrescente).
    """
    return sorted(results_list, key=lambda r: r.get(metric, -np.inf), reverse=True)[:k]


In [9]:
def make_fine_grid_around(best_params, param_specs, factor=0.5, n_points=5):
    """Crea una lista di param dict per la fine search intorno ai best_params.
    param_specs for each param: {'type':'int'/'float'/'cat', 'bounds':(min,max)}
    factor: estensione percentuale (es 0.5 = +/-50%)
    n_points: quanti punti generare per ogni parametro
    """
    fine_specs = {}
    for p, spec in param_specs.items():
        best = best_params.get(p, None)
        if best is None:
            # se non presente, usa bounds
            lo, hi = spec.get('bounds', (None, None))
            if spec['type'] == 'cat':
                fine_specs[p] = spec['values']
            elif spec['type'] == 'int':
                fine_specs[p] = list(range(
                    max(1, int(lo)),
                    int(hi) + 1,
                    max(1, int((int(hi)-int(lo))//(n_points-1) if n_points>1 else 1))
                ))
            else:
                fine_specs[p] = list(np.linspace(lo, hi, n_points))
            continue

        if spec['type'] == 'cat':
            fine_specs[p] = spec['values']

        elif spec['type'] == 'int':
            lo = max(spec['bounds'][0], int(best - max(1, factor * best)))
            hi = min(spec['bounds'][1], int(best + max(1, factor * best)))
            if lo >= hi:
                fine_specs[p] = [int(best)]
            else:
                fine_specs[p] = sorted(list(set([int(x) for x in np.linspace(lo, hi, n_points)])))

        else:  # float
            lo = max(spec['bounds'][0], best * (1 - factor))
            hi = min(spec['bounds'][1], best * (1 + factor))
            fine_specs[p] = list(np.linspace(lo, hi, n_points))

    combos = list(itertools.islice(param_grid_iter(fine_specs), 10000))
    return combos


# Define Models

In [10]:
models_space = {
    'RandomForest': {
        'estimator': RandomForestRegressor(random_state=42, n_jobs=-1),
        'coarse': {
            'n_estimators': [50, 100, 300],
            'max_depth': [5, 10, 20, None],
            'min_samples_split': [2, 5, 10],
            'max_features': ['sqrt', 'log2', 0.5]
        },
        'specs': {
            'n_estimators': {'type':'int', 'bounds':(10,1000)},
            'max_depth': {'type':'int', 'bounds':(3,50)},
            'min_samples_split': {'type':'int', 'bounds':(2,50)},
            'max_features': {'type':'cat', 'values':['sqrt','log2',0.2,0.3,0.4,0.5,None]}
        }
    },
    'GradientBoosting': {
        'estimator': GradientBoostingRegressor(random_state=42),
        'coarse': {
            'n_estimators': [100, 300, 800],
            'learning_rate': [0.01, 0.05, 0.1],
            'max_depth': [3, 5, 8],
            'subsample': [0.6, 0.8, 1.0]
        },
        'specs': {
            'n_estimators': {'type':'int', 'bounds':(50,2000)},
            'learning_rate': {'type':'float', 'bounds':(1e-4,1.0)},
            'max_depth': {'type':'int', 'bounds':(1,20)},
            'subsample': {'type':'float', 'bounds':(0.3,1.0)}
        }
    },
    # 'LightGBM': {
    #     'estimator': LGBMRegressor(random_state=42, n_jobs=-1, verbose=-1),
    #     'coarse': {
    #         'n_estimators': [100, 300, 1000],
    #         'learning_rate': [0.01, 0.05, 0.1],
    #         'num_leaves': [15, 31, 63],
    #         'max_depth': [-1, 5, 10]
    #     },
    #     'specs': {
    #         'n_estimators': {'type':'int', 'bounds':(50,3000)},
    #         'learning_rate': {'type':'float', 'bounds':(1e-4,1.0)},
    #         'num_leaves': {'type':'int', 'bounds':(6,2048)},
    #         'max_depth': {'type':'int', 'bounds':(-1,50)},
    #         'min_child_samples': {'type':'int', 'bounds':(1,100)}
    #     }
    # }
}

In [11]:
def evaluate_param_set(estimator, params, X, y, tscv):
    est = clone(estimator).set_params(**params)
    metrics = evaluate_model_params(est, X, y, tscv)
    return {'params': params, **metrics}

In [12]:
def coarse_to_fine_search(name, model_info, X, y, tscv, top_k=5):
    print('\n' + '='*60)
    print(f'Inizio ricerca per: {name}')
    estimator = model_info['estimator']
    coarse_grid = model_info['coarse']
    specs = model_info['specs']

    # ------- COARSE SEARCH PARALLEL -------
    param_list = list(param_grid_iter(coarse_grid))
    print(f'Coarse grid size: {len(param_list)} combinazioni')

    coarse_results = Parallel(n_jobs=-1, verbose=10)(
        delayed(evaluate_param_set)(estimator, params, X, y, tscv)
        for params in param_list
    )

    # top-k
    top_coarse = top_k_results(coarse_results, k=top_k, metric='R2_mean')
    print('\nTop risultati (coarse):')
    for r in top_coarse:
        print(f"  R2={r['R2_mean']:.4f} — params={r['params']}")

    # ------- FINE SEARCH PARALLEL -------
    best_coarse = top_coarse[0]
    best_params = best_coarse['params']

    fine_param_list = make_fine_grid_around(best_params, specs, factor=0.5, n_points=7)
    print(f"\nFine grid size (limitata): {len(fine_param_list)} combinazioni\n")

    fine_results = Parallel(n_jobs=-1, verbose=10)(
        delayed(evaluate_param_set)(estimator, params, X, y, tscv)
        for params in fine_param_list
    )

    top_fine = top_k_results(fine_results, k=3, metric='R2_mean')
    print('\nTop risultati (fine):')
    for r in top_fine:
        print(f"  R2={r['R2_mean']:.4f} — params={r['params']}")

    best_final = top_fine[0]
    return {
        'coarse_results': coarse_results,
        'top_coarse': top_coarse,
        'fine_results': fine_results,
        'top_fine': top_fine,
        'best': best_final
    }


In [13]:
all_best = {}
for name, info in models_space.items():
    res = coarse_to_fine_search(name, info, X_train, y_train, tscv, top_k=3)
    best_entry = res['best']
    all_best[name] = best_entry
    # salva i risultati intermedi su disco per controllo laterale
    joblib.dump(res, f'{out_path}/results_{name}_coarse_to_fine_{data_name}.pkl')
    print(f"Risultati salvati in {out_path}/results_{name}_coarse_to_fine_{data_name}.pkl")


Inizio ricerca per: RandomForest
Coarse grid size: 108 combinazioni


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 112 concurrent workers.
[Parallel(n_jobs=-1)]: Done   6 out of 108 | elapsed:    6.0s remaining:  1.7min
[Parallel(n_jobs=-1)]: Done  17 out of 108 | elapsed:    6.3s remaining:   33.7s
[Parallel(n_jobs=-1)]: Done  28 out of 108 | elapsed:    6.5s remaining:   18.5s
[Parallel(n_jobs=-1)]: Done  39 out of 108 | elapsed:    8.0s remaining:   14.2s
[Parallel(n_jobs=-1)]: Done  50 out of 108 | elapsed:    8.4s remaining:    9.7s
[Parallel(n_jobs=-1)]: Done  61 out of 108 | elapsed:    8.6s remaining:    6.6s
[Parallel(n_jobs=-1)]: Done  72 out of 108 | elapsed:    9.2s remaining:    4.6s
[Parallel(n_jobs=-1)]: Done  83 out of 108 | elapsed:   12.5s remaining:    3.8s
[Parallel(n_jobs=-1)]: Done  94 out of 108 | elapsed:   12.6s remaining:    1.9s
[Parallel(n_jobs=-1)]: Done 105 out of 108 | elapsed:   12.8s remaining:    0.4s
[Parallel(n_jobs=-1)]: Done 108 out of 108 | elapsed:   13.1s finished
[Parallel(n_jobs=-1)]: Using backend Loky


Top risultati (coarse):
  R2=-0.3963 — params={'n_estimators': 50, 'max_depth': 20, 'min_samples_split': 2, 'max_features': 'sqrt'}
  R2=-0.3963 — params={'n_estimators': 50, 'max_depth': None, 'min_samples_split': 2, 'max_features': 'sqrt'}
  R2=-0.4725 — params={'n_estimators': 100, 'max_depth': None, 'min_samples_split': 2, 'max_features': 'sqrt'}

Fine grid size (limitata): 686 combinazioni



[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed:    2.2s
[Parallel(n_jobs=-1)]: Done  41 tasks      | elapsed:    2.9s
[Parallel(n_jobs=-1)]: Done  64 tasks      | elapsed:    3.4s
[Parallel(n_jobs=-1)]: Done  89 tasks      | elapsed:    3.7s
[Parallel(n_jobs=-1)]: Done 114 tasks      | elapsed:    5.0s
[Parallel(n_jobs=-1)]: Done 141 tasks      | elapsed:    6.2s
[Parallel(n_jobs=-1)]: Done 168 tasks      | elapsed:    7.0s
[Parallel(n_jobs=-1)]: Done 197 tasks      | elapsed:    7.4s
[Parallel(n_jobs=-1)]: Done 226 tasks      | elapsed:    9.3s
[Parallel(n_jobs=-1)]: Done 257 tasks      | elapsed:   10.7s
[Parallel(n_jobs=-1)]: Done 288 tasks      | elapsed:   11.5s
[Parallel(n_jobs=-1)]: Done 321 tasks      | elapsed:   12.7s
[Parallel(n_jobs=-1)]: Done 354 tasks      | elapsed:   15.2s
[Parallel(n_jobs=-1)]: Done 389 tasks      | elapsed:   16.6s
[Parallel(n_jobs=-1)]: Done 424 tasks      | elapsed:   18.1s
[Parallel(n_jobs=-1)]: Done 461 tasks      | elapsed:   20.4s
[Paralle


Top risultati (fine):
  R2=-0.1971 — params={'n_estimators': 25, 'max_depth': 23, 'min_samples_split': 3, 'max_features': 0.2}
  R2=-0.1971 — params={'n_estimators': 25, 'max_depth': 30, 'min_samples_split': 3, 'max_features': 'sqrt'}
  R2=-0.1971 — params={'n_estimators': 25, 'max_depth': 30, 'min_samples_split': 3, 'max_features': 0.2}
Risultati salvati in results/full/results_RandomForest_coarse_to_fine_full.pkl

Inizio ricerca per: GradientBoosting
Coarse grid size: 81 combinazioni


[Parallel(n_jobs=-1)]: Done   2 out of  81 | elapsed:    2.8s remaining:  1.8min
[Parallel(n_jobs=-1)]: Done  11 out of  81 | elapsed:    4.9s remaining:   31.0s
[Parallel(n_jobs=-1)]: Done  20 out of  81 | elapsed:    6.8s remaining:   20.8s
[Parallel(n_jobs=-1)]: Done  29 out of  81 | elapsed:    9.4s remaining:   16.8s
[Parallel(n_jobs=-1)]: Done  38 out of  81 | elapsed:   13.0s remaining:   14.7s
[Parallel(n_jobs=-1)]: Done  47 out of  81 | elapsed:   18.2s remaining:   13.2s
[Parallel(n_jobs=-1)]: Done  56 out of  81 | elapsed:   23.4s remaining:   10.5s
[Parallel(n_jobs=-1)]: Done  65 out of  81 | elapsed:   29.0s remaining:    7.1s
[Parallel(n_jobs=-1)]: Done  74 out of  81 | elapsed:   40.6s remaining:    3.8s
[Parallel(n_jobs=-1)]: Done  81 out of  81 | elapsed:   59.7s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 112 concurrent workers.



Top risultati (coarse):
  R2=-0.4369 — params={'n_estimators': 800, 'learning_rate': 0.05, 'max_depth': 3, 'subsample': 1.0}
  R2=-0.4473 — params={'n_estimators': 300, 'learning_rate': 0.05, 'max_depth': 3, 'subsample': 1.0}
  R2=-0.4866 — params={'n_estimators': 300, 'learning_rate': 0.1, 'max_depth': 3, 'subsample': 1.0}

Fine grid size (limitata): 1372 combinazioni



[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed:    9.5s
[Parallel(n_jobs=-1)]: Done  41 tasks      | elapsed:   13.5s
[Parallel(n_jobs=-1)]: Done  64 tasks      | elapsed:   15.9s
[Parallel(n_jobs=-1)]: Done  89 tasks      | elapsed:   19.5s
[Parallel(n_jobs=-1)]: Done 114 tasks      | elapsed:   22.3s
[Parallel(n_jobs=-1)]: Done 141 tasks      | elapsed:   26.5s
[Parallel(n_jobs=-1)]: Done 168 tasks      | elapsed:   31.1s
[Parallel(n_jobs=-1)]: Done 197 tasks      | elapsed:   36.3s
[Parallel(n_jobs=-1)]: Done 226 tasks      | elapsed:   41.8s
[Parallel(n_jobs=-1)]: Done 257 tasks      | elapsed:   47.6s
[Parallel(n_jobs=-1)]: Done 288 tasks      | elapsed:   53.7s
[Parallel(n_jobs=-1)]: Done 321 tasks      | elapsed:   59.4s
[Parallel(n_jobs=-1)]: Done 354 tasks      | elapsed:  1.1min
[Parallel(n_jobs=-1)]: Done 389 tasks      | elapsed:  1.2min
[Parallel(n_jobs=-1)]: Done 424 tasks      | elapsed:  1.4min
[Parallel(n_jobs=-1)]: Done 461 tasks      | elapsed:  1.5min
[Paralle


Top risultati (fine):
  R2=-0.2546 — params={'n_estimators': 533, 'learning_rate': np.float64(0.07500000000000001), 'max_depth': 2, 'subsample': np.float64(1.0)}
  R2=-0.2604 — params={'n_estimators': 400, 'learning_rate': np.float64(0.07500000000000001), 'max_depth': 2, 'subsample': np.float64(1.0)}
  R2=-0.2642 — params={'n_estimators': 666, 'learning_rate': np.float64(0.07500000000000001), 'max_depth': 2, 'subsample': np.float64(1.0)}
Risultati salvati in results/full/results_GradientBoosting_coarse_to_fine_full.pkl


In [14]:
summary = []
for name, best in all_best.items():
    summary.append({
        'model': name,
        'R2_mean': best['R2_mean'],
        'RMSE_mean': best['RMSE_mean'],
        'MAE_mean': best['MAE_mean'],
        'best_params': best['params']
    })
summary_df = pd.DataFrame(summary).sort_values('R2_mean', ascending=False).reset_index(drop=True)
print('\n' + '='*80)
print('CONFRONTO FINALE: modelli ottimizzati')
print(summary_df)


CONFRONTO FINALE: modelli ottimizzati
              model   R2_mean      RMSE_mean       MAE_mean  \
0      RandomForest -0.197116  197910.978956  163080.573899   
1  GradientBoosting -0.254629  198981.388650  162770.187043   

                                         best_params  
0  {'n_estimators': 25, 'max_depth': 23, 'min_sam...  
1  {'n_estimators': 533, 'learning_rate': 0.07500...  


In [15]:
print('\nFit e salvataggio dei modelli finali (su tutto il dataset):')
for idx, row in summary_df.iterrows():
    name = row['model']
    best_params = row['best_params']
    estimator = models_space[name]['estimator'].set_params(**best_params)
    print(f"  Fit model: {name} con params: {best_params}")
    estimator.fit(X_train, y_train)
    joblib.dump(estimator, f'{out_path}/best_model_{name}_{data_name}.pkl')
    print(f"  Salvato: {out_path}/best_model_{name}_{data_name}.pkl")

print('\nDONE')


Fit e salvataggio dei modelli finali (su tutto il dataset):
  Fit model: RandomForest con params: {'n_estimators': 25, 'max_depth': 23, 'min_samples_split': 3, 'max_features': 0.2}
  Salvato: results/full/best_model_RandomForest_full.pkl
  Fit model: GradientBoosting con params: {'n_estimators': 533, 'learning_rate': np.float64(0.07500000000000001), 'max_depth': 2, 'subsample': np.float64(1.0)}
  Salvato: results/full/best_model_GradientBoosting_full.pkl

DONE


In [16]:
def evaluate_on_test(estimator, X_test, y_test):
    """Valuta un modello già fit su un test set con metriche aggiuntive."""
    y_pred = estimator.predict(X_test)
    
    bias = np.mean(y_pred - y_test)
    max_error = np.max(np.abs(y_pred - y_test))
    pearson_corr = pearsonr(y_test, y_pred)[0]
    spearman_corr = spearmanr(y_test, y_pred)[0]
    mape = np.mean(np.abs((y_test - y_pred)/y_test)) * 100  # attenzione valori vicino a zero
    smape = np.mean(np.abs(y_test - y_pred)/((np.abs(y_test)+np.abs(y_pred))/2)) * 100
    
    return {
        'R2': r2_score(y_test, y_pred),
        'RMSE': np.sqrt(mean_squared_error(y_test, y_pred)),
        'MAE': mean_absolute_error(y_test, y_pred),
        'Bias': bias,
        'Max_Error': max_error,
        'Pearson': pearson_corr,
        'Spearman': spearman_corr,
        'MAPE': mape,
        'sMAPE': smape
    }


In [17]:
test_results = []

for idx, row in summary_df.iterrows():
    name = row['model']
    model_file = f'{out_path}/best_model_{name}_{data_name}.pkl'
    
    # Carica modello già fit
    model = joblib.load(model_file)
    
    # Valutazione sul test set
    metrics = evaluate_on_test(model, X_test, y_test)
    
    metrics['model'] = name
    metrics['best_params'] = row['best_params']
    test_results.append(metrics)

# Trasforma in DataFrame
test_results_df = pd.DataFrame(test_results).sort_values('R2', ascending=False).reset_index(drop=True)

# Stampa
print('\n=== Risultati sui test set ===')
print(test_results_df)



=== Risultati sui test set ===
         R2           RMSE           MAE         Bias      Max_Error  \
0  0.977110  104122.234332  71225.292883  3804.382210  534701.875687   
1  0.976345  105848.636728  72343.276206  3442.395176  540722.101063   

    Pearson  Spearman      MAPE     sMAPE             model  \
0  0.988930  0.985001  9.063128  9.145328  GradientBoosting   
1  0.988188  0.987877  8.751026  8.566838      RandomForest   

                                         best_params  
0  {'n_estimators': 533, 'learning_rate': 0.07500...  
1  {'n_estimators': 25, 'max_depth': 23, 'min_sam...  


In [18]:
# Salva in CSV
test_results_df.to_csv(f'{out_path}/test_set_results_{data_name}.csv', index=False)

# Salva anche in pickle per uso successivo
joblib.dump(test_results_df, f'{out_path}/test_set_results_{data_name}.pkl')

print(f"\nRisultati test set salvati in '{out_path}/test_set_results_{data_name}.csv' e '{out_path}/test_set_results_{data_name}.pkl'")


Risultati test set salvati in 'results/full/test_set_results_full.csv' e 'results/full/test_set_results_full.pkl'
