# Model Selection for SLP Prediction

This notebook performs model selection to predict the `slp` column using various machine learning algorithms with time series cross-validation.


In [1]:
import pandas as pd
import numpy as np
from scipy.stats import pearsonr, spearmanr
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.base import clone
from lightgbm import LGBMRegressor
from joblib import Parallel, delayed

import time
import joblib
import itertools
import warnings
warnings.filterwarnings('ignore')


## 1. Load and Prepare Data


In [2]:
# Dataset configurations
data_names = ['full', 'stat', 'rf']

## 3. Time Series Split

In [3]:
n_split = 10
tscv = TimeSeriesSplit(n_splits = n_split)

In [4]:
def evaluate_model_params(estimator, X, y, tscv):
    """Valuta un singolo estimator (clonato) con TimeSeriesSplit.
    Ritorna dizionario di metriche medie.
    """
    rmse_scores = []
    mae_scores = []
    r2_scores = []
    for train_idx, test_idx in tscv.split(X):
        X_train, X_test = X.iloc[train_idx], X.iloc[test_idx]
        y_train, y_test = y.iloc[train_idx], y.iloc[test_idx]
        model = clone(estimator)
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        rmse_scores.append(np.sqrt(mean_squared_error(y_test, y_pred)))
        mae_scores.append(mean_absolute_error(y_test, y_pred))
        r2_scores.append(r2_score(y_test, y_pred))
    return {
        'RMSE_mean': np.mean(rmse_scores),
        'RMSE_std': np.std(rmse_scores),
        'MAE_mean': np.mean(mae_scores),
        'MAE_std': np.std(mae_scores),
        'R2_mean': np.mean(r2_scores),
        'R2_std': np.std(r2_scores),
    }

In [5]:
def param_grid_iter(grid_dict):
    """Genera tutte le combinazioni dalla dict di liste come sklearn.model_selection.ParameterGrid.
    grid_dict: {'param': [v1,v2,...], ...}
    """
    keys = list(grid_dict.keys())
    for values in itertools.product(*(grid_dict[k] for k in keys)):
        yield dict(zip(keys, values))

In [6]:
def top_k_results(results_list, k=5, metric='R2_mean'):
    """Ordina la lista di dict (ognuno con 'params' e metriche) e ritorna top k.
    metric: campo su cui ordinare (default R2_mean decrescente).
    """
    return sorted(results_list, key=lambda r: r.get(metric, -np.inf), reverse=True)[:k]


In [7]:
def make_fine_grid_around(best_params, param_specs, factor=0.5, n_points=5):
    """Crea una lista di param dict per la fine search intorno ai best_params.
    param_specs for each param: {'type':'int'/'float'/'cat', 'bounds':(min,max)}
    factor: estensione percentuale (es 0.5 = +/-50%)
    n_points: quanti punti generare per ogni parametro
    """
    fine_specs = {}
    for p, spec in param_specs.items():
        best = best_params.get(p, None)
        if best is None:
            # se non presente, usa bounds
            lo, hi = spec.get('bounds', (None, None))
            if spec['type'] == 'cat':
                fine_specs[p] = spec['values']
            elif spec['type'] == 'int':
                fine_specs[p] = list(range(
                    max(1, int(lo)),
                    int(hi) + 1,
                    max(1, int((int(hi)-int(lo))//(n_points-1) if n_points>1 else 1))
                ))
            else:
                fine_specs[p] = list(np.linspace(lo, hi, n_points))
            continue

        if spec['type'] == 'cat':
            fine_specs[p] = spec['values']

        elif spec['type'] == 'int':
            lo = max(spec['bounds'][0], int(best - max(1, factor * best)))
            hi = min(spec['bounds'][1], int(best + max(1, factor * best)))
            if lo >= hi:
                fine_specs[p] = [int(best)]
            else:
                fine_specs[p] = sorted(list(set([int(x) for x in np.linspace(lo, hi, n_points)])))

        else:  # float
            lo = max(spec['bounds'][0], best * (1 - factor))
            hi = min(spec['bounds'][1], best * (1 + factor))
            fine_specs[p] = list(np.linspace(lo, hi, n_points))

    combos = list(itertools.islice(param_grid_iter(fine_specs), 10000))
    return combos


# Define Models

In [8]:
models_space = {
    'RandomForest': {
        'estimator': RandomForestRegressor(random_state=42, n_jobs=-1),
        'coarse': {
            'n_estimators': [50, 100, 300],
            'max_depth': [5, 10, 20, None],
            'min_samples_split': [2, 5, 10],
            'max_features': ['sqrt', 'log2', 0.5]
        },
        'specs': {
            'n_estimators': {'type':'int', 'bounds':(10,1000)},
            'max_depth': {'type':'int', 'bounds':(3,50)},
            'min_samples_split': {'type':'int', 'bounds':(2,50)},
            'max_features': {'type':'cat', 'values':['sqrt','log2',0.2,0.3,0.4,0.5,None]}
        }
    },
    'GradientBoosting': {
        'estimator': GradientBoostingRegressor(random_state=42),
        'coarse': {
            'n_estimators': [100, 300, 800],
            'learning_rate': [0.01, 0.05, 0.1],
            'max_depth': [3, 5, 8],
            'subsample': [0.6, 0.8, 1.0]
        },
        'specs': {
            'n_estimators': {'type':'int', 'bounds':(50,1500)},
            'learning_rate': {'type':'float', 'bounds':(1e-4,1.0)},
            'max_depth': {'type':'int', 'bounds':(1,20)},
            'subsample': {'type':'float', 'bounds':(0.3,1.0)}
        }
    }
}

In [9]:
def evaluate_param_set(estimator, params, X, y, tscv):
    est = clone(estimator).set_params(**params)
    metrics = evaluate_model_params(est, X, y, tscv)
    return {'params': params, **metrics}

In [10]:
def coarse_to_fine_search(name, model_info, X, y, tscv, top_k=5):
    print('\n' + '='*60)
    print(f'Inizio ricerca per: {name}')
    estimator = model_info['estimator']
    coarse_grid = model_info['coarse']
    specs = model_info['specs']

    # ------- COARSE SEARCH PARALLEL -------
    param_list = list(param_grid_iter(coarse_grid))
    print(f'Coarse grid size: {len(param_list)} combinazioni')

    start_time = time.time()
    coarse_results = Parallel(n_jobs=-1, verbose=10)(
        delayed(evaluate_param_set)(estimator, params, X, y, tscv)
        for params in param_list
    )

    # top-k
    top_coarse = top_k_results(coarse_results, k=top_k, metric='R2_mean')
    print('\nTop risultati (coarse):')
    for r in top_coarse:
        print(f"  R2={r['R2_mean']:.4f} — params={r['params']}")

    # ------- FINE SEARCH PARALLEL -------
    best_coarse = top_coarse[0]
    best_params = best_coarse['params']

    fine_param_list = make_fine_grid_around(best_params, specs, factor=0.5, n_points=7)
    print(f"\nFine grid size (limitata): {len(fine_param_list)} combinazioni\n")

    fine_results = Parallel(n_jobs=-1, verbose=10)(
        delayed(evaluate_param_set)(estimator, params, X, y, tscv)
        for params in fine_param_list
    )
    time_elapsed = time.time() - start_time

    top_fine = top_k_results(fine_results, k=3, metric='R2_mean')
    print('\nTop risultati (fine):')
    for r in top_fine:
        print(f"  R2={r['R2_mean']:.4f} — params={r['params']}")

    best_final = top_fine[0]
    best_final["search_time"] = time_elapsed
    return {
        'search_time': time_elapsed,
        'coarse_results': coarse_results,
        'top_coarse': top_coarse,
        'fine_results': fine_results,
        'top_fine': top_fine,
        'best': best_final,
    }


In [11]:
def evaluate_on_test(estimator, X_test, y_test):
    """Valuta un modello già fit su un test set con metriche aggiuntive."""
    y_pred = estimator.predict(X_test)
    
    bias = np.mean(y_pred - y_test)
    max_error = np.max(np.abs(y_pred - y_test))
    pearson_corr = pearsonr(y_test, y_pred)[0]
    spearman_corr = spearmanr(y_test, y_pred)[0]
    mape = np.mean(np.abs((y_test - y_pred)/y_test)) * 100  # attenzione valori vicino a zero
    smape = np.mean(np.abs(y_test - y_pred)/((np.abs(y_test)+np.abs(y_pred))/2)) * 100
    
    return {
        'R2': r2_score(y_test, y_pred),
        'RMSE': np.sqrt(mean_squared_error(y_test, y_pred)),
        'MAE': mean_absolute_error(y_test, y_pred),
        'Bias': bias,
        'Max_Error': max_error,
        'Pearson': pearson_corr,
        'Spearman': spearman_corr,
        'MAPE': mape,
        'sMAPE': smape
    }

In [12]:
def fine_tune_pipeline(data_name):    
    out_path = f'results/{data_name}'
    dataset = f'dataset/data_v3_{data_name}.csv'
    df = pd.read_csv(dataset, sep=';', decimal=',')

    print(f"Dataset shape: {df.shape}")
    print(f"\nColumns: {df.columns.tolist()}")
    
    # Parse date and sort by date (important for time series)
    df['date'] = pd.to_datetime(df['date'])
    df = df.sort_values('date').reset_index(drop=True)
    
    # Separate features and target
    X = df.drop(columns=['date', 'slp'])
    y = df['slp']

    test_days = 365
    X_test = X.iloc[-test_days:]
    y_test = y.iloc[-test_days:]

    X_train = X.iloc[:-test_days]
    y_train = y.iloc[:-test_days]  
    
    all_best = {}
    for name, info in models_space.items():
        res = coarse_to_fine_search(name, info, X_train, y_train, tscv, top_k=3)
        best_entry = res['best']
        all_best[name] = best_entry
        # salva i risultati intermedi su disco per controllo laterale
        joblib.dump(res, f'{out_path}/results_{name}_coarse_to_fine_{data_name}.pkl')
        print(f"Risultati salvati in {out_path}/results_{name}_coarse_to_fine_{data_name}.pkl")
        
    summary = []
    for name, best in all_best.items():
        summary.append({
            'model': name,
            'search_time': best['search_time'],
            'R2_mean': best['R2_mean'],
            'RMSE_mean': best['RMSE_mean'],
            'MAE_mean': best['MAE_mean'],
            'best_params': best['params']
        })
    summary_df = pd.DataFrame(summary).sort_values('R2_mean', ascending=False).reset_index(drop=True)
    print('\n' + '='*80)
    print('CONFRONTO FINALE: modelli ottimizzati')
    print(summary_df)
    
    print('\nFit e salvataggio dei modelli finali (su tutto il dataset):')
    for idx, row in summary_df.iterrows():
        name = row['model']
        best_params = row['best_params']
        estimator = models_space[name]['estimator'].set_params(**best_params)
        print(f"  Fit model: {name} con params: {best_params}")
        start_time = time.time()
        estimator.fit(X_train, y_train)
        fit_time = time.time() - start_time
        summary_df.at[idx, 'fit_time'] = fit_time
        joblib.dump(estimator, f'{out_path}/best_model_{name}_{data_name}.pkl')
        print(f"  Salvato: {out_path}/best_model_{name}_{data_name}.pkl")

    print('\nDONE')
    
    test_results = []

    for _, row in summary_df.iterrows():
        name = row['model']
        model_file = f'{out_path}/best_model_{name}_{data_name}.pkl'
        
        # Carica modello già fit
        model = joblib.load(model_file)
        
        # Valutazione sul test set
        metrics = evaluate_on_test(model, X_test, y_test)
        
        metrics['model'] = name
        metrics['best_params'] = row['best_params']
        metrics['search_time'] = row['search_time']
        metrics['fit_time'] = row['fit_time']
        test_results.append(metrics)
        
    print(test_results)        

    # Trasforma in DataFrame
    test_results_df = pd.DataFrame(test_results).sort_values('R2', ascending=False).reset_index(drop=True)

    # Stampa
    print('\n=== Risultati sui test set ===')
    print(test_results_df)
    
    # Salva in CSV
    test_results_df.to_csv(f'{out_path}/test_set_results_{data_name}.csv', index=False)

    # Salva anche in pickle per uso successivo
    joblib.dump(test_results_df, f'{out_path}/test_set_results_{data_name}.pkl')

    print(f"\nRisultati test set salvati in '{out_path}/test_set_results_{data_name}.csv' e '{out_path}/test_set_results_{data_name}.pkl'")


In [13]:
for data_name in data_names:
    fine_tune_pipeline(data_name)

Dataset shape: (1460, 27)

Columns: ['date', 'slp', 'holiday', 'weathercode', 'temperature_2m_max', 'temperature_2m_min', 'temperature_2m_mean', 'apparent_temperature_max', 'apparent_temperature_min', 'apparent_temperature_mean', 'sunrise', 'sunset', 'daylight_duration', 'sunshine_duration', 'rain_sum', 'snowfall_sum', 'precipitation_hours', 'windspeed_10m_max', 'windgusts_10m_max', 'shortwave_radiation_sum', 'et0_fao_evapotranspiration', 'day_of_week_sin', 'day_of_week_cos', 'winddirection_10m_dominant_sin', 'winddirection_10m_dominant_cos', 'day_of_year_sin', 'day_of_year_cos']

Inizio ricerca per: RandomForest
Coarse grid size: 108 combinazioni


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 112 concurrent workers.


[Parallel(n_jobs=-1)]: Done   6 out of 108 | elapsed:    6.0s remaining:  1.7min
[Parallel(n_jobs=-1)]: Done  17 out of 108 | elapsed:    6.3s remaining:   33.5s
[Parallel(n_jobs=-1)]: Done  28 out of 108 | elapsed:    6.6s remaining:   18.9s
[Parallel(n_jobs=-1)]: Done  39 out of 108 | elapsed:    8.0s remaining:   14.1s
[Parallel(n_jobs=-1)]: Done  50 out of 108 | elapsed:    8.3s remaining:    9.6s
[Parallel(n_jobs=-1)]: Done  61 out of 108 | elapsed:    8.5s remaining:    6.6s
[Parallel(n_jobs=-1)]: Done  72 out of 108 | elapsed:    8.8s remaining:    4.4s
[Parallel(n_jobs=-1)]: Done  83 out of 108 | elapsed:   12.5s remaining:    3.8s
[Parallel(n_jobs=-1)]: Done  94 out of 108 | elapsed:   12.5s remaining:    1.9s
[Parallel(n_jobs=-1)]: Done 105 out of 108 | elapsed:   12.8s remaining:    0.4s
[Parallel(n_jobs=-1)]: Done 108 out of 108 | elapsed:   12.9s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 112 concurrent workers.



Top risultati (coarse):
  R2=-0.3963 — params={'n_estimators': 50, 'max_depth': 20, 'min_samples_split': 2, 'max_features': 'sqrt'}
  R2=-0.3964 — params={'n_estimators': 50, 'max_depth': None, 'min_samples_split': 2, 'max_features': 'sqrt'}
  R2=-0.4727 — params={'n_estimators': 100, 'max_depth': None, 'min_samples_split': 2, 'max_features': 'sqrt'}

Fine grid size (limitata): 686 combinazioni



[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed:    2.4s
[Parallel(n_jobs=-1)]: Done  41 tasks      | elapsed:    3.0s
[Parallel(n_jobs=-1)]: Done  64 tasks      | elapsed:    3.2s
[Parallel(n_jobs=-1)]: Done  89 tasks      | elapsed:    3.4s
[Parallel(n_jobs=-1)]: Done 114 tasks      | elapsed:    5.2s
[Parallel(n_jobs=-1)]: Done 141 tasks      | elapsed:    6.2s
[Parallel(n_jobs=-1)]: Done 168 tasks      | elapsed:    6.8s
[Parallel(n_jobs=-1)]: Done 197 tasks      | elapsed:    7.2s
[Parallel(n_jobs=-1)]: Done 226 tasks      | elapsed:    9.5s
[Parallel(n_jobs=-1)]: Done 257 tasks      | elapsed:   10.7s
[Parallel(n_jobs=-1)]: Done 288 tasks      | elapsed:   11.4s
[Parallel(n_jobs=-1)]: Done 321 tasks      | elapsed:   12.3s
[Parallel(n_jobs=-1)]: Done 354 tasks      | elapsed:   15.2s
[Parallel(n_jobs=-1)]: Done 389 tasks      | elapsed:   16.4s
[Parallel(n_jobs=-1)]: Done 424 tasks      | elapsed:   17.8s
[Parallel(n_jobs=-1)]: Done 461 tasks      | elapsed:   20.6s
[Paralle


Top risultati (fine):
  R2=-0.1997 — params={'n_estimators': 25, 'max_depth': 26, 'min_samples_split': 3, 'max_features': 'sqrt'}
  R2=-0.1997 — params={'n_estimators': 25, 'max_depth': 26, 'min_samples_split': 3, 'max_features': 0.2}
  R2=-0.1997 — params={'n_estimators': 25, 'max_depth': 23, 'min_samples_split': 3, 'max_features': 0.2}
Risultati salvati in results/full/results_RandomForest_coarse_to_fine_full.pkl

Inizio ricerca per: GradientBoosting
Coarse grid size: 81 combinazioni


[Parallel(n_jobs=-1)]: Done   2 out of  81 | elapsed:    3.7s remaining:  2.4min
[Parallel(n_jobs=-1)]: Done  11 out of  81 | elapsed:    5.0s remaining:   31.6s
[Parallel(n_jobs=-1)]: Done  20 out of  81 | elapsed:    7.3s remaining:   22.2s
[Parallel(n_jobs=-1)]: Done  29 out of  81 | elapsed:    9.3s remaining:   16.8s
[Parallel(n_jobs=-1)]: Done  38 out of  81 | elapsed:   12.9s remaining:   14.5s
[Parallel(n_jobs=-1)]: Done  47 out of  81 | elapsed:   18.0s remaining:   13.0s
[Parallel(n_jobs=-1)]: Done  56 out of  81 | elapsed:   23.1s remaining:   10.3s
[Parallel(n_jobs=-1)]: Done  65 out of  81 | elapsed:   29.4s remaining:    7.2s
[Parallel(n_jobs=-1)]: Done  74 out of  81 | elapsed:   40.0s remaining:    3.8s
[Parallel(n_jobs=-1)]: Done  81 out of  81 | elapsed:   58.6s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 112 concurrent workers.



Top risultati (coarse):
  R2=-0.4365 — params={'n_estimators': 800, 'learning_rate': 0.05, 'max_depth': 3, 'subsample': 1.0}
  R2=-0.4468 — params={'n_estimators': 300, 'learning_rate': 0.05, 'max_depth': 3, 'subsample': 1.0}
  R2=-0.4867 — params={'n_estimators': 300, 'learning_rate': 0.1, 'max_depth': 3, 'subsample': 1.0}

Fine grid size (limitata): 1372 combinazioni



[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed:    9.4s
[Parallel(n_jobs=-1)]: Done  41 tasks      | elapsed:   13.5s
[Parallel(n_jobs=-1)]: Done  64 tasks      | elapsed:   15.7s
[Parallel(n_jobs=-1)]: Done  89 tasks      | elapsed:   19.7s
[Parallel(n_jobs=-1)]: Done 114 tasks      | elapsed:   22.4s
[Parallel(n_jobs=-1)]: Done 141 tasks      | elapsed:   26.5s
[Parallel(n_jobs=-1)]: Done 168 tasks      | elapsed:   31.2s
[Parallel(n_jobs=-1)]: Done 197 tasks      | elapsed:   36.2s
[Parallel(n_jobs=-1)]: Done 226 tasks      | elapsed:   41.8s
[Parallel(n_jobs=-1)]: Done 257 tasks      | elapsed:   47.4s
[Parallel(n_jobs=-1)]: Done 288 tasks      | elapsed:   53.4s
[Parallel(n_jobs=-1)]: Done 321 tasks      | elapsed:   59.1s
[Parallel(n_jobs=-1)]: Done 354 tasks      | elapsed:  1.1min
[Parallel(n_jobs=-1)]: Done 389 tasks      | elapsed:  1.2min
[Parallel(n_jobs=-1)]: Done 424 tasks      | elapsed:  1.4min
[Parallel(n_jobs=-1)]: Done 461 tasks      | elapsed:  1.5min
[Paralle


Top risultati (fine):
  R2=-0.2551 — params={'n_estimators': 533, 'learning_rate': np.float64(0.07500000000000001), 'max_depth': 2, 'subsample': np.float64(1.0)}
  R2=-0.2609 — params={'n_estimators': 400, 'learning_rate': np.float64(0.07500000000000001), 'max_depth': 2, 'subsample': np.float64(1.0)}
  R2=-0.2644 — params={'n_estimators': 666, 'learning_rate': np.float64(0.07500000000000001), 'max_depth': 2, 'subsample': np.float64(1.0)}
Risultati salvati in results/full/results_GradientBoosting_coarse_to_fine_full.pkl

CONFRONTO FINALE: modelli ottimizzati
              model  search_time   R2_mean      RMSE_mean       MAE_mean  \
0      RandomForest    44.519522 -0.199708  198013.771787  163214.386378   
1  GradientBoosting   471.148737 -0.255093  198960.502801  162985.383495   

                                         best_params  
0  {'n_estimators': 25, 'max_depth': 26, 'min_sam...  
1  {'n_estimators': 533, 'learning_rate': 0.07500...  

Fit e salvataggio dei modelli finali (su

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 112 concurrent workers.
[Parallel(n_jobs=-1)]: Done   6 out of 108 | elapsed:    3.1s remaining:   52.0s
[Parallel(n_jobs=-1)]: Done  17 out of 108 | elapsed:    3.2s remaining:   17.0s
[Parallel(n_jobs=-1)]: Done  28 out of 108 | elapsed:    3.4s remaining:    9.8s
[Parallel(n_jobs=-1)]: Done  39 out of 108 | elapsed:    4.8s remaining:    8.6s
[Parallel(n_jobs=-1)]: Done  50 out of 108 | elapsed:    5.1s remaining:    5.9s
[Parallel(n_jobs=-1)]: Done  61 out of 108 | elapsed:    5.3s remaining:    4.1s
[Parallel(n_jobs=-1)]: Done  72 out of 108 | elapsed:    5.4s remaining:    2.7s
[Parallel(n_jobs=-1)]: Done  83 out of 108 | elapsed:    8.6s remaining:    2.6s
[Parallel(n_jobs=-1)]: Done  94 out of 108 | elapsed:    8.8s remaining:    1.3s
[Parallel(n_jobs=-1)]: Done 105 out of 108 | elapsed:    9.4s remaining:    0.3s
[Parallel(n_jobs=-1)]: Done 108 out of 108 | elapsed:    9.6s finished
[Parallel(n_jobs=-1)]: Using backend Loky


Top risultati (coarse):
  R2=-6.1646 — params={'n_estimators': 50, 'max_depth': None, 'min_samples_split': 2, 'max_features': 0.5}
  R2=-6.1664 — params={'n_estimators': 50, 'max_depth': 20, 'min_samples_split': 2, 'max_features': 0.5}
  R2=-6.2884 — params={'n_estimators': 300, 'max_depth': None, 'min_samples_split': 2, 'max_features': 0.5}

Fine grid size (limitata): 686 combinazioni



[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed:    2.1s
[Parallel(n_jobs=-1)]: Done  41 tasks      | elapsed:    2.4s
[Parallel(n_jobs=-1)]: Done  64 tasks      | elapsed:    2.9s
[Parallel(n_jobs=-1)]: Done  89 tasks      | elapsed:    3.3s
[Parallel(n_jobs=-1)]: Done 114 tasks      | elapsed:    4.0s
[Parallel(n_jobs=-1)]: Done 141 tasks      | elapsed:    4.6s
[Parallel(n_jobs=-1)]: Done 168 tasks      | elapsed:    5.3s
[Parallel(n_jobs=-1)]: Done 197 tasks      | elapsed:    6.0s
[Parallel(n_jobs=-1)]: Done 226 tasks      | elapsed:    7.0s
[Parallel(n_jobs=-1)]: Done 257 tasks      | elapsed:    7.8s
[Parallel(n_jobs=-1)]: Done 288 tasks      | elapsed:    8.4s
[Parallel(n_jobs=-1)]: Done 321 tasks      | elapsed:    9.9s
[Parallel(n_jobs=-1)]: Done 354 tasks      | elapsed:   10.7s
[Parallel(n_jobs=-1)]: Done 389 tasks      | elapsed:   11.6s
[Parallel(n_jobs=-1)]: Done 424 tasks      | elapsed:   13.3s
[Parallel(n_jobs=-1)]: Done 461 tasks      | elapsed:   14.4s
[Paralle


Top risultati (fine):
  R2=-5.9840 — params={'n_estimators': 75, 'max_depth': 10, 'min_samples_split': 3, 'max_features': None}
  R2=-6.0379 — params={'n_estimators': 58, 'max_depth': 10, 'min_samples_split': 2, 'max_features': None}
  R2=-6.0868 — params={'n_estimators': 41, 'max_depth': 10, 'min_samples_split': 2, 'max_features': None}
Risultati salvati in results/stat/results_RandomForest_coarse_to_fine_stat.pkl

Inizio ricerca per: GradientBoosting
Coarse grid size: 81 combinazioni


[Parallel(n_jobs=-1)]: Done   2 out of  81 | elapsed:    1.3s remaining:   51.1s
[Parallel(n_jobs=-1)]: Done  11 out of  81 | elapsed:    1.9s remaining:   12.1s
[Parallel(n_jobs=-1)]: Done  20 out of  81 | elapsed:    2.6s remaining:    8.0s
[Parallel(n_jobs=-1)]: Done  29 out of  81 | elapsed:    4.5s remaining:    8.1s
[Parallel(n_jobs=-1)]: Done  38 out of  81 | elapsed:    5.7s remaining:    6.5s
[Parallel(n_jobs=-1)]: Done  47 out of  81 | elapsed:    7.1s remaining:    5.1s
[Parallel(n_jobs=-1)]: Done  56 out of  81 | elapsed:   10.7s remaining:    4.8s
[Parallel(n_jobs=-1)]: Done  65 out of  81 | elapsed:   12.6s remaining:    3.1s
[Parallel(n_jobs=-1)]: Done  74 out of  81 | elapsed:   15.8s remaining:    1.5s
[Parallel(n_jobs=-1)]: Done  81 out of  81 | elapsed:   20.1s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 112 concurrent workers.



Top risultati (coarse):
  R2=-4.0370 — params={'n_estimators': 300, 'learning_rate': 0.1, 'max_depth': 8, 'subsample': 1.0}
  R2=-4.0370 — params={'n_estimators': 800, 'learning_rate': 0.1, 'max_depth': 8, 'subsample': 1.0}
  R2=-4.0381 — params={'n_estimators': 100, 'learning_rate': 0.1, 'max_depth': 8, 'subsample': 1.0}

Fine grid size (limitata): 2401 combinazioni



[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed:    3.8s
[Parallel(n_jobs=-1)]: Done  41 tasks      | elapsed:    5.1s
[Parallel(n_jobs=-1)]: Done  64 tasks      | elapsed:    6.2s
[Parallel(n_jobs=-1)]: Done  89 tasks      | elapsed:    7.1s
[Parallel(n_jobs=-1)]: Done 114 tasks      | elapsed:    8.3s
[Parallel(n_jobs=-1)]: Done 141 tasks      | elapsed:    9.8s
[Parallel(n_jobs=-1)]: Done 168 tasks      | elapsed:   11.2s
[Parallel(n_jobs=-1)]: Done 197 tasks      | elapsed:   12.6s
[Parallel(n_jobs=-1)]: Done 226 tasks      | elapsed:   14.0s
[Parallel(n_jobs=-1)]: Done 257 tasks      | elapsed:   15.6s
[Parallel(n_jobs=-1)]: Done 288 tasks      | elapsed:   17.0s
[Parallel(n_jobs=-1)]: Done 321 tasks      | elapsed:   19.0s
[Parallel(n_jobs=-1)]: Done 354 tasks      | elapsed:   21.1s
[Parallel(n_jobs=-1)]: Done 389 tasks      | elapsed:   23.5s
[Parallel(n_jobs=-1)]: Done 424 tasks      | elapsed:   25.7s
[Parallel(n_jobs=-1)]: Done 461 tasks      | elapsed:   27.9s
[Paralle


Top risultati (fine):
  R2=-3.7851 — params={'n_estimators': 200, 'learning_rate': np.float64(0.15000000000000002), 'max_depth': 12, 'subsample': np.float64(0.75)}
  R2=-3.7851 — params={'n_estimators': 250, 'learning_rate': np.float64(0.15000000000000002), 'max_depth': 12, 'subsample': np.float64(0.75)}
  R2=-3.7851 — params={'n_estimators': 300, 'learning_rate': np.float64(0.15000000000000002), 'max_depth': 12, 'subsample': np.float64(0.75)}
Risultati salvati in results/stat/results_GradientBoosting_coarse_to_fine_stat.pkl

CONFRONTO FINALE: modelli ottimizzati
              model  search_time   R2_mean      RMSE_mean       MAE_mean  \
0  GradientBoosting   253.428339 -3.785145  202705.221992  165805.204160   
1      RandomForest    31.614191 -5.983962  208499.839007  171739.081523   

                                         best_params  
0  {'n_estimators': 200, 'learning_rate': 0.15000...  
1  {'n_estimators': 75, 'max_depth': 10, 'min_sam...  

Fit e salvataggio dei modelli fina

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 112 concurrent workers.
[Parallel(n_jobs=-1)]: Done   6 out of 108 | elapsed:    2.6s remaining:   43.5s
[Parallel(n_jobs=-1)]: Done  17 out of 108 | elapsed:    3.0s remaining:   15.8s
[Parallel(n_jobs=-1)]: Done  28 out of 108 | elapsed:    3.2s remaining:    9.3s
[Parallel(n_jobs=-1)]: Done  39 out of 108 | elapsed:    5.0s remaining:    8.8s
[Parallel(n_jobs=-1)]: Done  50 out of 108 | elapsed:    5.3s remaining:    6.2s
[Parallel(n_jobs=-1)]: Done  61 out of 108 | elapsed:    5.5s remaining:    4.3s
[Parallel(n_jobs=-1)]: Done  72 out of 108 | elapsed:    5.8s remaining:    2.9s
[Parallel(n_jobs=-1)]: Done  83 out of 108 | elapsed:    9.2s remaining:    2.8s
[Parallel(n_jobs=-1)]: Done  94 out of 108 | elapsed:    9.3s remaining:    1.4s
[Parallel(n_jobs=-1)]: Done 105 out of 108 | elapsed:    9.8s remaining:    0.3s
[Parallel(n_jobs=-1)]: Done 108 out of 108 | elapsed:    9.9s finished
[Parallel(n_jobs=-1)]: Using backend Loky


Top risultati (coarse):
  R2=-2.5269 — params={'n_estimators': 300, 'max_depth': 10, 'min_samples_split': 5, 'max_features': 'sqrt'}
  R2=-2.5269 — params={'n_estimators': 300, 'max_depth': 10, 'min_samples_split': 5, 'max_features': 'log2'}
  R2=-2.5554 — params={'n_estimators': 50, 'max_depth': 20, 'min_samples_split': 2, 'max_features': 'sqrt'}

Fine grid size (limitata): 2058 combinazioni



[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed:    6.5s
[Parallel(n_jobs=-1)]: Done  41 tasks      | elapsed:    7.2s
[Parallel(n_jobs=-1)]: Done  64 tasks      | elapsed:    8.2s
[Parallel(n_jobs=-1)]: Done  89 tasks      | elapsed:    9.1s
[Parallel(n_jobs=-1)]: Done 114 tasks      | elapsed:   12.9s
[Parallel(n_jobs=-1)]: Done 141 tasks      | elapsed:   14.6s
[Parallel(n_jobs=-1)]: Done 168 tasks      | elapsed:   15.3s
[Parallel(n_jobs=-1)]: Done 197 tasks      | elapsed:   16.2s
[Parallel(n_jobs=-1)]: Done 226 tasks      | elapsed:   20.3s
[Parallel(n_jobs=-1)]: Done 257 tasks      | elapsed:   22.2s
[Parallel(n_jobs=-1)]: Done 288 tasks      | elapsed:   23.5s
[Parallel(n_jobs=-1)]: Done 321 tasks      | elapsed:   25.8s
[Parallel(n_jobs=-1)]: Done 354 tasks      | elapsed:   30.7s
[Parallel(n_jobs=-1)]: Done 389 tasks      | elapsed:   32.3s
[Parallel(n_jobs=-1)]: Done 424 tasks      | elapsed:   34.6s
[Parallel(n_jobs=-1)]: Done 461 tasks      | elapsed:   39.7s
[Paralle


Top risultati (fine):
  R2=-1.7765 — params={'n_estimators': 200, 'max_depth': 11, 'min_samples_split': 2, 'max_features': 0.2}
  R2=-1.7765 — params={'n_estimators': 200, 'max_depth': 11, 'min_samples_split': 2, 'max_features': 0.3}
  R2=-1.8394 — params={'n_estimators': 150, 'max_depth': 11, 'min_samples_split': 2, 'max_features': 0.3}
Risultati salvati in results/rf/results_RandomForest_coarse_to_fine_rf.pkl

Inizio ricerca per: GradientBoosting
Coarse grid size: 81 combinazioni


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 112 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 out of  81 | elapsed:    1.2s remaining:   46.0s
[Parallel(n_jobs=-1)]: Done  11 out of  81 | elapsed:    1.8s remaining:   11.2s
[Parallel(n_jobs=-1)]: Done  20 out of  81 | elapsed:    2.3s remaining:    6.9s
[Parallel(n_jobs=-1)]: Done  29 out of  81 | elapsed:    4.4s remaining:    7.9s
[Parallel(n_jobs=-1)]: Done  38 out of  81 | elapsed:    5.4s remaining:    6.2s
[Parallel(n_jobs=-1)]: Done  47 out of  81 | elapsed:    6.3s remaining:    4.6s
[Parallel(n_jobs=-1)]: Done  56 out of  81 | elapsed:   10.0s remaining:    4.5s
[Parallel(n_jobs=-1)]: Done  65 out of  81 | elapsed:   11.7s remaining:    2.9s
[Parallel(n_jobs=-1)]: Done  74 out of  81 | elapsed:   13.9s remaining:    1.3s
[Parallel(n_jobs=-1)]: Done  81 out of  81 | elapsed:   17.1s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 112 concurrent workers.



Top risultati (coarse):
  R2=-4.1399 — params={'n_estimators': 300, 'learning_rate': 0.1, 'max_depth': 8, 'subsample': 0.6}
  R2=-4.1405 — params={'n_estimators': 800, 'learning_rate': 0.1, 'max_depth': 8, 'subsample': 0.6}
  R2=-4.1566 — params={'n_estimators': 100, 'learning_rate': 0.1, 'max_depth': 8, 'subsample': 0.6}

Fine grid size (limitata): 2401 combinazioni



[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed:    3.1s
[Parallel(n_jobs=-1)]: Done  41 tasks      | elapsed:    4.0s
[Parallel(n_jobs=-1)]: Done  64 tasks      | elapsed:    5.2s
[Parallel(n_jobs=-1)]: Done  89 tasks      | elapsed:    6.2s
[Parallel(n_jobs=-1)]: Done 114 tasks      | elapsed:    7.3s
[Parallel(n_jobs=-1)]: Done 141 tasks      | elapsed:    8.2s
[Parallel(n_jobs=-1)]: Done 168 tasks      | elapsed:    9.3s
[Parallel(n_jobs=-1)]: Done 197 tasks      | elapsed:   10.4s
[Parallel(n_jobs=-1)]: Done 226 tasks      | elapsed:   11.5s
[Parallel(n_jobs=-1)]: Done 257 tasks      | elapsed:   13.0s
[Parallel(n_jobs=-1)]: Done 288 tasks      | elapsed:   14.0s
[Parallel(n_jobs=-1)]: Done 321 tasks      | elapsed:   15.6s
[Parallel(n_jobs=-1)]: Done 354 tasks      | elapsed:   17.3s
[Parallel(n_jobs=-1)]: Done 389 tasks      | elapsed:   19.6s
[Parallel(n_jobs=-1)]: Done 424 tasks      | elapsed:   21.2s
[Parallel(n_jobs=-1)]: Done 461 tasks      | elapsed:   23.0s
[Paralle


Top risultati (fine):
  R2=-3.5226 — params={'n_estimators': 250, 'learning_rate': np.float64(0.08333333333333334), 'max_depth': 10, 'subsample': np.float64(0.3)}
  R2=-3.5298 — params={'n_estimators': 300, 'learning_rate': np.float64(0.13333333333333336), 'max_depth': 8, 'subsample': np.float64(0.3)}
  R2=-3.5387 — params={'n_estimators': 200, 'learning_rate': np.float64(0.13333333333333336), 'max_depth': 8, 'subsample': np.float64(0.3)}
Risultati salvati in results/rf/results_GradientBoosting_coarse_to_fine_rf.pkl

CONFRONTO FINALE: modelli ottimizzati
              model  search_time   R2_mean      RMSE_mean       MAE_mean  \
0      RandomForest   256.706529 -1.776504  178091.860426  145217.034567   
1  GradientBoosting   197.330156 -3.522570  180649.333017  144916.773736   

                                         best_params  
0  {'n_estimators': 200, 'max_depth': 11, 'min_sa...  
1  {'n_estimators': 250, 'learning_rate': 0.08333...  

Fit e salvataggio dei modelli finali (su tu