# XGBoost - Cerca d'hiperparàmetres

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn.ensemble import HistGradientBoostingRegressor
import datetime
import random
import pickle
import time

from xgboost import XGBRegressor



## Càrrega de dades

In [2]:
X_data = np.load('/kaggle/input/tfm2a-preparar-dades/X_minMaxScaled_opt.npy')
Y_data = np.load('/kaggle/input/tfm2a-preparar-dades/Y_FM.npy')
param_names = np.load('/kaggle/input/tfm2a-preparar-dades/Y_FM_names.npy')


In [3]:
# Dividim els conjunts de test i train
X_train, X_test, y_train_all, y_test_all = train_test_split(X_data, Y_data, test_size = 0.25, random_state = 42)

# Comprovem les dimensions de train i test
print('Training Features Shape:', X_train.shape)
print('Training Labels Shape:', y_train_all.shape)
print('Testing Features Shape:', X_test.shape)
print('Testing Labels Shape:', y_test_all.shape)

Training Features Shape: (68544, 52)
Training Labels Shape: (68544, 6)
Testing Features Shape: (22848, 52)
Testing Labels Shape: (22848, 6)


## Cerca en malla dels hiperparàmetres

Utilitzem la validació creuada per trobar els paràmetres òptims

In [4]:
def XGB_GridSearch(nParam):
    start_time = time.time()
    print(datetime.datetime.now(), " - Calculant grid search CV del paràmetre ", nParam)
    
    y_train = y_train_all[:,nParam]
    y_test = y_test_all[:, nParam]


    # Definim una nova instància del model
    model = XGBRegressor(random_state = 24, tree_method='gpu_hist') # per GPU = 'gpu_hist'

    # Definim el rang de valors pels paràmetres que volem optimitzar
    parameters = {
        'n_estimators': [6000, 8000, 10000],        
        'learning_rate': [0.1],
        'max_depth': [3],
        'min_child_weight': [1],
        'gamma': [0],        
        'subsample': [0.8],
        'colsample_bytree': [0.8]
    }

    # Definim una instància de GridSearchCV
    grid = GridSearchCV(model, param_grid=parameters, cv=4)

    # Entrenem el GridSearchCV
    grid.fit(X_train, y_train)

    # Quina és la millor combinació de paràmetres:
    print("Millor score: ", grid.best_score_)
    print("Millors paràmetres: ", grid.best_params_)
    print("Millor model: ", grid.best_estimator_)

    # Resultats
    grid_results = pd.DataFrame(grid.cv_results_)[[
        'param_n_estimators',
        'param_learning_rate',
        'param_max_depth', 
        'param_min_child_weight',
        'param_gamma',        
        'param_subsample',
        'param_colsample_bytree',
        'mean_test_score', 
        'std_test_score', 
        'rank_test_score']]
    grid_results['nParam'] = nParam
    
    pickle.dump(grid_results, open("grid_results_"+str(nParam), "wb"))
    
    end_time = time.time()
    total_time = end_time - start_time
    print("El temps total de l'execució és:", total_time, "segons, o bé ", total_time/60, " minuts.")
    
    return grid_results.sort_values(by=['rank_test_score'])

In [5]:
XGB_GridSearch(0)

2023-06-18 10:57:48.167987  - Calculant grid search CV del paràmetre  0
Millor score:  0.9791409206895112
Millors paràmetres:  {'colsample_bytree': 0.8, 'gamma': 0, 'learning_rate': 0.1, 'max_depth': 3, 'min_child_weight': 1, 'n_estimators': 10000, 'subsample': 0.8}
Millor model:  XGBRegressor(base_score=None, booster=None, callbacks=None,
             colsample_bylevel=None, colsample_bynode=None,
             colsample_bytree=0.8, early_stopping_rounds=None,
             enable_categorical=False, eval_metric=None, feature_types=None,
             gamma=0, gpu_id=None, grow_policy=None, importance_type=None,
             interaction_constraints=None, learning_rate=0.1, max_bin=None,
             max_cat_threshold=None, max_cat_to_onehot=None,
             max_delta_step=None, max_depth=3, max_leaves=None,
             min_child_weight=1, missing=nan, monotone_constraints=None,
             n_estimators=10000, n_jobs=None, num_parallel_tree=None,
             predictor=None, random_sta

Unnamed: 0,param_n_estimators,param_learning_rate,param_max_depth,param_min_child_weight,param_gamma,param_subsample,param_colsample_bytree,mean_test_score,std_test_score,rank_test_score,nParam
2,10000,0.1,3,1,0,0.8,0.8,0.979141,0.001083,1,0
1,8000,0.1,3,1,0,0.8,0.8,0.978787,0.001066,2,0
0,6000,0.1,3,1,0,0.8,0.8,0.978213,0.001145,3,0


In [6]:
XGB_GridSearch(1)

2023-06-18 11:00:31.564037  - Calculant grid search CV del paràmetre  1
Millor score:  0.9502486520278761
Millors paràmetres:  {'colsample_bytree': 0.8, 'gamma': 0, 'learning_rate': 0.1, 'max_depth': 3, 'min_child_weight': 1, 'n_estimators': 10000, 'subsample': 0.8}
Millor model:  XGBRegressor(base_score=None, booster=None, callbacks=None,
             colsample_bylevel=None, colsample_bynode=None,
             colsample_bytree=0.8, early_stopping_rounds=None,
             enable_categorical=False, eval_metric=None, feature_types=None,
             gamma=0, gpu_id=None, grow_policy=None, importance_type=None,
             interaction_constraints=None, learning_rate=0.1, max_bin=None,
             max_cat_threshold=None, max_cat_to_onehot=None,
             max_delta_step=None, max_depth=3, max_leaves=None,
             min_child_weight=1, missing=nan, monotone_constraints=None,
             n_estimators=10000, n_jobs=None, num_parallel_tree=None,
             predictor=None, random_sta

Unnamed: 0,param_n_estimators,param_learning_rate,param_max_depth,param_min_child_weight,param_gamma,param_subsample,param_colsample_bytree,mean_test_score,std_test_score,rank_test_score,nParam
2,10000,0.1,3,1,0,0.8,0.8,0.950249,0.000442,1,1
1,8000,0.1,3,1,0,0.8,0.8,0.949015,0.000444,2,1
0,6000,0.1,3,1,0,0.8,0.8,0.947078,0.000503,3,1


In [7]:
XGB_GridSearch(2)

2023-06-18 11:03:12.706164  - Calculant grid search CV del paràmetre  2
Millor score:  0.9919474727642232
Millors paràmetres:  {'colsample_bytree': 0.8, 'gamma': 0, 'learning_rate': 0.1, 'max_depth': 3, 'min_child_weight': 1, 'n_estimators': 10000, 'subsample': 0.8}
Millor model:  XGBRegressor(base_score=None, booster=None, callbacks=None,
             colsample_bylevel=None, colsample_bynode=None,
             colsample_bytree=0.8, early_stopping_rounds=None,
             enable_categorical=False, eval_metric=None, feature_types=None,
             gamma=0, gpu_id=None, grow_policy=None, importance_type=None,
             interaction_constraints=None, learning_rate=0.1, max_bin=None,
             max_cat_threshold=None, max_cat_to_onehot=None,
             max_delta_step=None, max_depth=3, max_leaves=None,
             min_child_weight=1, missing=nan, monotone_constraints=None,
             n_estimators=10000, n_jobs=None, num_parallel_tree=None,
             predictor=None, random_sta

Unnamed: 0,param_n_estimators,param_learning_rate,param_max_depth,param_min_child_weight,param_gamma,param_subsample,param_colsample_bytree,mean_test_score,std_test_score,rank_test_score,nParam
2,10000,0.1,3,1,0,0.8,0.8,0.991947,0.000423,1,2
1,8000,0.1,3,1,0,0.8,0.8,0.991641,0.000429,2,2
0,6000,0.1,3,1,0,0.8,0.8,0.991113,0.000434,3,2


In [8]:
XGB_GridSearch(3)

2023-06-18 11:05:54.330279  - Calculant grid search CV del paràmetre  3
Millor score:  0.9767514865037265
Millors paràmetres:  {'colsample_bytree': 0.8, 'gamma': 0, 'learning_rate': 0.1, 'max_depth': 3, 'min_child_weight': 1, 'n_estimators': 10000, 'subsample': 0.8}
Millor model:  XGBRegressor(base_score=None, booster=None, callbacks=None,
             colsample_bylevel=None, colsample_bynode=None,
             colsample_bytree=0.8, early_stopping_rounds=None,
             enable_categorical=False, eval_metric=None, feature_types=None,
             gamma=0, gpu_id=None, grow_policy=None, importance_type=None,
             interaction_constraints=None, learning_rate=0.1, max_bin=None,
             max_cat_threshold=None, max_cat_to_onehot=None,
             max_delta_step=None, max_depth=3, max_leaves=None,
             min_child_weight=1, missing=nan, monotone_constraints=None,
             n_estimators=10000, n_jobs=None, num_parallel_tree=None,
             predictor=None, random_sta

Unnamed: 0,param_n_estimators,param_learning_rate,param_max_depth,param_min_child_weight,param_gamma,param_subsample,param_colsample_bytree,mean_test_score,std_test_score,rank_test_score,nParam
2,10000,0.1,3,1,0,0.8,0.8,0.976751,0.000605,1,3
1,8000,0.1,3,1,0,0.8,0.8,0.976443,0.000617,2,3
0,6000,0.1,3,1,0,0.8,0.8,0.975976,0.000656,3,3


In [9]:
XGB_GridSearch(4)

2023-06-18 11:08:35.920141  - Calculant grid search CV del paràmetre  4
Millor score:  0.9496283211991987
Millors paràmetres:  {'colsample_bytree': 0.8, 'gamma': 0, 'learning_rate': 0.1, 'max_depth': 3, 'min_child_weight': 1, 'n_estimators': 10000, 'subsample': 0.8}
Millor model:  XGBRegressor(base_score=None, booster=None, callbacks=None,
             colsample_bylevel=None, colsample_bynode=None,
             colsample_bytree=0.8, early_stopping_rounds=None,
             enable_categorical=False, eval_metric=None, feature_types=None,
             gamma=0, gpu_id=None, grow_policy=None, importance_type=None,
             interaction_constraints=None, learning_rate=0.1, max_bin=None,
             max_cat_threshold=None, max_cat_to_onehot=None,
             max_delta_step=None, max_depth=3, max_leaves=None,
             min_child_weight=1, missing=nan, monotone_constraints=None,
             n_estimators=10000, n_jobs=None, num_parallel_tree=None,
             predictor=None, random_sta

Unnamed: 0,param_n_estimators,param_learning_rate,param_max_depth,param_min_child_weight,param_gamma,param_subsample,param_colsample_bytree,mean_test_score,std_test_score,rank_test_score,nParam
2,10000,0.1,3,1,0,0.8,0.8,0.949628,0.000549,1,4
1,8000,0.1,3,1,0,0.8,0.8,0.94695,0.000513,2,4
0,6000,0.1,3,1,0,0.8,0.8,0.942718,0.000506,3,4


In [10]:
XGB_GridSearch(5)

2023-06-18 11:11:19.927265  - Calculant grid search CV del paràmetre  5
Millor score:  0.9019840278294324
Millors paràmetres:  {'colsample_bytree': 0.8, 'gamma': 0, 'learning_rate': 0.1, 'max_depth': 3, 'min_child_weight': 1, 'n_estimators': 10000, 'subsample': 0.8}
Millor model:  XGBRegressor(base_score=None, booster=None, callbacks=None,
             colsample_bylevel=None, colsample_bynode=None,
             colsample_bytree=0.8, early_stopping_rounds=None,
             enable_categorical=False, eval_metric=None, feature_types=None,
             gamma=0, gpu_id=None, grow_policy=None, importance_type=None,
             interaction_constraints=None, learning_rate=0.1, max_bin=None,
             max_cat_threshold=None, max_cat_to_onehot=None,
             max_delta_step=None, max_depth=3, max_leaves=None,
             min_child_weight=1, missing=nan, monotone_constraints=None,
             n_estimators=10000, n_jobs=None, num_parallel_tree=None,
             predictor=None, random_sta

Unnamed: 0,param_n_estimators,param_learning_rate,param_max_depth,param_min_child_weight,param_gamma,param_subsample,param_colsample_bytree,mean_test_score,std_test_score,rank_test_score,nParam
2,10000,0.1,3,1,0,0.8,0.8,0.901984,0.000953,1,5
1,8000,0.1,3,1,0,0.8,0.8,0.901056,0.000782,2,5
0,6000,0.1,3,1,0,0.8,0.8,0.899599,0.000816,3,5


In [11]:
# Definim la millor n_estimator per cada paràmetre atmosfèric
def get_best_n_estimator(nParam):
    if nParam == 0:
        return 2000
    elif nParam == 1:
        return 2000
    elif nParam == 2:
        return 2000
    elif nParam == 3:
        return 2000
    elif nParam == 4:
        return 6000
    elif nParam == 5:
        return 2000

In [12]:
def XGB_GridSearch_rnd2(nParam):
    start_time = time.time()
    print(datetime.datetime.now(), " - Calculant grid search CV del paràmetre ", nParam)
    
    y_train = y_train_all[:,nParam]
    y_test = y_test_all[:, nParam]


    # Definim una nova instància del model
    model = XGBRegressor(random_state = 24, tree_method='gpu_hist') # per GPU = 'gpu_hist'

    # Definim el rang de valors pels paràmetres que volem optimitzar
    parameters = {
        'n_estimators': [get_best_n_estimator(nParam)],        
        'learning_rate': [0.1],
        'max_depth': [3,5,10],
        'min_child_weight': [1],
        'gamma': [0],        
        'subsample': [0.8],
        'colsample_bytree': [0.8]
    }

    # Definim una instància de GridSearchCV 
    grid = GridSearchCV(model, param_grid=parameters, cv=4)

    # Entrenem el GridSearchCV
    grid.fit(X_train, y_train)

    # Quina és la millor combinació de paràmetres:
    print("Millor score: ", grid.best_score_)
    print("Millors paràmetres: ", grid.best_params_)
    print("Millor model: ", grid.best_estimator_)

    # Resultats
    grid_results = pd.DataFrame(grid.cv_results_)[[
        'param_n_estimators',
        'param_learning_rate',
        'param_max_depth', 
        'param_min_child_weight',
        'param_gamma',        
        'param_subsample',
        'param_colsample_bytree',
        'mean_test_score', 
        'std_test_score', 
        'rank_test_score']]
    grid_results['nParam'] = nParam
    
    pickle.dump(grid_results, open("grid_results_"+str(nParam), "wb"))
    
    end_time = time.time()
    total_time = end_time - start_time
    print("El temps total de l'execució és:", total_time, "segons, o bé ", total_time/60, " minuts.")
    
    return grid_results.sort_values(by=['rank_test_score'])

In [13]:
XGB_GridSearch_rnd2(0)

2023-06-18 11:13:59.999297  - Calculant grid search CV del paràmetre  0
Millor score:  0.9791765657145837
Millors paràmetres:  {'colsample_bytree': 0.8, 'gamma': 0, 'learning_rate': 0.1, 'max_depth': 5, 'min_child_weight': 1, 'n_estimators': 2000, 'subsample': 0.8}
Millor model:  XGBRegressor(base_score=None, booster=None, callbacks=None,
             colsample_bylevel=None, colsample_bynode=None,
             colsample_bytree=0.8, early_stopping_rounds=None,
             enable_categorical=False, eval_metric=None, feature_types=None,
             gamma=0, gpu_id=None, grow_policy=None, importance_type=None,
             interaction_constraints=None, learning_rate=0.1, max_bin=None,
             max_cat_threshold=None, max_cat_to_onehot=None,
             max_delta_step=None, max_depth=5, max_leaves=None,
             min_child_weight=1, missing=nan, monotone_constraints=None,
             n_estimators=2000, n_jobs=None, num_parallel_tree=None,
             predictor=None, random_state

Unnamed: 0,param_n_estimators,param_learning_rate,param_max_depth,param_min_child_weight,param_gamma,param_subsample,param_colsample_bytree,mean_test_score,std_test_score,rank_test_score,nParam
1,2000,0.1,5,1,0,0.8,0.8,0.979177,0.00132,1,0
2,2000,0.1,10,1,0,0.8,0.8,0.976669,0.001362,2,0
0,2000,0.1,3,1,0,0.8,0.8,0.974549,0.001568,3,0


In [14]:
XGB_GridSearch_rnd2(1)

2023-06-18 11:16:40.423961  - Calculant grid search CV del paràmetre  1
Millor score:  0.9477926220709036
Millors paràmetres:  {'colsample_bytree': 0.8, 'gamma': 0, 'learning_rate': 0.1, 'max_depth': 5, 'min_child_weight': 1, 'n_estimators': 2000, 'subsample': 0.8}
Millor model:  XGBRegressor(base_score=None, booster=None, callbacks=None,
             colsample_bylevel=None, colsample_bynode=None,
             colsample_bytree=0.8, early_stopping_rounds=None,
             enable_categorical=False, eval_metric=None, feature_types=None,
             gamma=0, gpu_id=None, grow_policy=None, importance_type=None,
             interaction_constraints=None, learning_rate=0.1, max_bin=None,
             max_cat_threshold=None, max_cat_to_onehot=None,
             max_delta_step=None, max_depth=5, max_leaves=None,
             min_child_weight=1, missing=nan, monotone_constraints=None,
             n_estimators=2000, n_jobs=None, num_parallel_tree=None,
             predictor=None, random_state

Unnamed: 0,param_n_estimators,param_learning_rate,param_max_depth,param_min_child_weight,param_gamma,param_subsample,param_colsample_bytree,mean_test_score,std_test_score,rank_test_score,nParam
1,2000,0.1,5,1,0,0.8,0.8,0.947793,0.000542,1,1
2,2000,0.1,10,1,0,0.8,0.8,0.943312,0.000343,2,1
0,2000,0.1,3,1,0,0.8,0.8,0.935307,0.000665,3,1


In [15]:
XGB_GridSearch_rnd2(2)

2023-06-18 11:19:17.598112  - Calculant grid search CV del paràmetre  2
Millor score:  0.9909642228458454
Millors paràmetres:  {'colsample_bytree': 0.8, 'gamma': 0, 'learning_rate': 0.1, 'max_depth': 5, 'min_child_weight': 1, 'n_estimators': 2000, 'subsample': 0.8}
Millor model:  XGBRegressor(base_score=None, booster=None, callbacks=None,
             colsample_bylevel=None, colsample_bynode=None,
             colsample_bytree=0.8, early_stopping_rounds=None,
             enable_categorical=False, eval_metric=None, feature_types=None,
             gamma=0, gpu_id=None, grow_policy=None, importance_type=None,
             interaction_constraints=None, learning_rate=0.1, max_bin=None,
             max_cat_threshold=None, max_cat_to_onehot=None,
             max_delta_step=None, max_depth=5, max_leaves=None,
             min_child_weight=1, missing=nan, monotone_constraints=None,
             n_estimators=2000, n_jobs=None, num_parallel_tree=None,
             predictor=None, random_state

Unnamed: 0,param_n_estimators,param_learning_rate,param_max_depth,param_min_child_weight,param_gamma,param_subsample,param_colsample_bytree,mean_test_score,std_test_score,rank_test_score,nParam
1,2000,0.1,5,1,0,0.8,0.8,0.990964,0.000326,1,2
2,2000,0.1,10,1,0,0.8,0.8,0.989873,0.000318,2,2
0,2000,0.1,3,1,0,0.8,0.8,0.987664,0.000335,3,2


In [16]:
XGB_GridSearch_rnd2(3)

2023-06-18 11:21:57.510361  - Calculant grid search CV del paràmetre  3
Millor score:  0.9776164984611244
Millors paràmetres:  {'colsample_bytree': 0.8, 'gamma': 0, 'learning_rate': 0.1, 'max_depth': 5, 'min_child_weight': 1, 'n_estimators': 2000, 'subsample': 0.8}
Millor model:  XGBRegressor(base_score=None, booster=None, callbacks=None,
             colsample_bylevel=None, colsample_bynode=None,
             colsample_bytree=0.8, early_stopping_rounds=None,
             enable_categorical=False, eval_metric=None, feature_types=None,
             gamma=0, gpu_id=None, grow_policy=None, importance_type=None,
             interaction_constraints=None, learning_rate=0.1, max_bin=None,
             max_cat_threshold=None, max_cat_to_onehot=None,
             max_delta_step=None, max_depth=5, max_leaves=None,
             min_child_weight=1, missing=nan, monotone_constraints=None,
             n_estimators=2000, n_jobs=None, num_parallel_tree=None,
             predictor=None, random_state

Unnamed: 0,param_n_estimators,param_learning_rate,param_max_depth,param_min_child_weight,param_gamma,param_subsample,param_colsample_bytree,mean_test_score,std_test_score,rank_test_score,nParam
1,2000,0.1,5,1,0,0.8,0.8,0.977616,0.000251,1,3
2,2000,0.1,10,1,0,0.8,0.8,0.97713,0.000419,2,3
0,2000,0.1,3,1,0,0.8,0.8,0.972347,0.000773,3,3


In [17]:
XGB_GridSearch_rnd2(4)

2023-06-18 11:24:33.099499  - Calculant grid search CV del paràmetre  4
Millor score:  0.9515598658737551
Millors paràmetres:  {'colsample_bytree': 0.8, 'gamma': 0, 'learning_rate': 0.1, 'max_depth': 5, 'min_child_weight': 1, 'n_estimators': 6000, 'subsample': 0.8}
Millor model:  XGBRegressor(base_score=None, booster=None, callbacks=None,
             colsample_bylevel=None, colsample_bynode=None,
             colsample_bytree=0.8, early_stopping_rounds=None,
             enable_categorical=False, eval_metric=None, feature_types=None,
             gamma=0, gpu_id=None, grow_policy=None, importance_type=None,
             interaction_constraints=None, learning_rate=0.1, max_bin=None,
             max_cat_threshold=None, max_cat_to_onehot=None,
             max_delta_step=None, max_depth=5, max_leaves=None,
             min_child_weight=1, missing=nan, monotone_constraints=None,
             n_estimators=6000, n_jobs=None, num_parallel_tree=None,
             predictor=None, random_state

Unnamed: 0,param_n_estimators,param_learning_rate,param_max_depth,param_min_child_weight,param_gamma,param_subsample,param_colsample_bytree,mean_test_score,std_test_score,rank_test_score,nParam
1,6000,0.1,5,1,0,0.8,0.8,0.95156,0.000489,1,4
0,6000,0.1,3,1,0,0.8,0.8,0.942718,0.000506,2,4
2,6000,0.1,10,1,0,0.8,0.8,0.928915,0.000644,3,4


In [18]:
XGB_GridSearch_rnd2(5)

2023-06-18 11:29:31.300019  - Calculant grid search CV del paràmetre  5
Millor score:  0.9008575671318297
Millors paràmetres:  {'colsample_bytree': 0.8, 'gamma': 0, 'learning_rate': 0.1, 'max_depth': 5, 'min_child_weight': 1, 'n_estimators': 2000, 'subsample': 0.8}
Millor model:  XGBRegressor(base_score=None, booster=None, callbacks=None,
             colsample_bylevel=None, colsample_bynode=None,
             colsample_bytree=0.8, early_stopping_rounds=None,
             enable_categorical=False, eval_metric=None, feature_types=None,
             gamma=0, gpu_id=None, grow_policy=None, importance_type=None,
             interaction_constraints=None, learning_rate=0.1, max_bin=None,
             max_cat_threshold=None, max_cat_to_onehot=None,
             max_delta_step=None, max_depth=5, max_leaves=None,
             min_child_weight=1, missing=nan, monotone_constraints=None,
             n_estimators=2000, n_jobs=None, num_parallel_tree=None,
             predictor=None, random_state

Unnamed: 0,param_n_estimators,param_learning_rate,param_max_depth,param_min_child_weight,param_gamma,param_subsample,param_colsample_bytree,mean_test_score,std_test_score,rank_test_score,nParam
1,2000,0.1,5,1,0,0.8,0.8,0.900858,0.000552,1,5
2,2000,0.1,10,1,0,0.8,0.8,0.898682,0.000383,2,5
0,2000,0.1,3,1,0,0.8,0.8,0.888619,0.000609,3,5


In [19]:
def XGB_GridSearch_rnd3(nParam):
    start_time = time.time()
    print(datetime.datetime.now(), " - Calculant grid search CV del paràmetre ", nParam)
    
    y_train = y_train_all[:,nParam]
    y_test = y_test_all[:, nParam]


    # Definim una nova instància del model
    model = XGBRegressor(random_state = 24, tree_method='gpu_hist') # per GPU = 'gpu_hist'

    # Definim el rang de valors pels paràmetres que volem optimitzar
    parameters = {
        'n_estimators': [get_best_n_estimator(nParam)],        
        'learning_rate': [0.1],
        'max_depth': [5],
        'min_child_weight': [1,3,6],
        'gamma': [0],        
        'subsample': [0.8],
        'colsample_bytree': [0.8]
    }

    # Definim una instància de GridSearchCV
    grid = GridSearchCV(model, param_grid=parameters, cv=4)

    # Entrenem el GridSearchCV
    grid.fit(X_train, y_train)

    # Quina és la millor combinació de paràmetres:
    print("Millor score: ", grid.best_score_)
    print("Millors paràmetres: ", grid.best_params_)
    print("Millor model: ", grid.best_estimator_)

    # Resultats
    grid_results = pd.DataFrame(grid.cv_results_)[[
        'param_n_estimators',
        'param_learning_rate',
        'param_max_depth', 
        'param_min_child_weight',
        'param_gamma',        
        'param_subsample',
        'param_colsample_bytree',
        'mean_test_score', 
        'std_test_score', 
        'rank_test_score']]
    grid_results['nParam'] = nParam
    
    pickle.dump(grid_results, open("grid_results_"+str(nParam), "wb"))
    
    end_time = time.time()
    total_time = end_time - start_time
    print("El temps total de l'execució és:", total_time, "segons, o bé ", total_time/60, " minuts.")
    
    return grid_results.sort_values(by=['rank_test_score'])

In [20]:
XGB_GridSearch_rnd3(0)

2023-06-18 11:32:04.883639  - Calculant grid search CV del paràmetre  0
Millor score:  0.9791765657145837
Millors paràmetres:  {'colsample_bytree': 0.8, 'gamma': 0, 'learning_rate': 0.1, 'max_depth': 5, 'min_child_weight': 1, 'n_estimators': 2000, 'subsample': 0.8}
Millor model:  XGBRegressor(base_score=None, booster=None, callbacks=None,
             colsample_bylevel=None, colsample_bynode=None,
             colsample_bytree=0.8, early_stopping_rounds=None,
             enable_categorical=False, eval_metric=None, feature_types=None,
             gamma=0, gpu_id=None, grow_policy=None, importance_type=None,
             interaction_constraints=None, learning_rate=0.1, max_bin=None,
             max_cat_threshold=None, max_cat_to_onehot=None,
             max_delta_step=None, max_depth=5, max_leaves=None,
             min_child_weight=1, missing=nan, monotone_constraints=None,
             n_estimators=2000, n_jobs=None, num_parallel_tree=None,
             predictor=None, random_state

Unnamed: 0,param_n_estimators,param_learning_rate,param_max_depth,param_min_child_weight,param_gamma,param_subsample,param_colsample_bytree,mean_test_score,std_test_score,rank_test_score,nParam
0,2000,0.1,5,1,0,0.8,0.8,0.979177,0.00132,1,0
2,2000,0.1,5,6,0,0.8,0.8,0.978994,0.001281,2,0
1,2000,0.1,5,3,0,0.8,0.8,0.978732,0.001371,3,0


In [21]:
XGB_GridSearch_rnd3(1)

2023-06-18 11:33:16.197975  - Calculant grid search CV del paràmetre  1
Millor score:  0.9477948470197679
Millors paràmetres:  {'colsample_bytree': 0.8, 'gamma': 0, 'learning_rate': 0.1, 'max_depth': 5, 'min_child_weight': 6, 'n_estimators': 2000, 'subsample': 0.8}
Millor model:  XGBRegressor(base_score=None, booster=None, callbacks=None,
             colsample_bylevel=None, colsample_bynode=None,
             colsample_bytree=0.8, early_stopping_rounds=None,
             enable_categorical=False, eval_metric=None, feature_types=None,
             gamma=0, gpu_id=None, grow_policy=None, importance_type=None,
             interaction_constraints=None, learning_rate=0.1, max_bin=None,
             max_cat_threshold=None, max_cat_to_onehot=None,
             max_delta_step=None, max_depth=5, max_leaves=None,
             min_child_weight=6, missing=nan, monotone_constraints=None,
             n_estimators=2000, n_jobs=None, num_parallel_tree=None,
             predictor=None, random_state

Unnamed: 0,param_n_estimators,param_learning_rate,param_max_depth,param_min_child_weight,param_gamma,param_subsample,param_colsample_bytree,mean_test_score,std_test_score,rank_test_score,nParam
2,2000,0.1,5,6,0,0.8,0.8,0.947795,0.000409,1,1
0,2000,0.1,5,1,0,0.8,0.8,0.947793,0.000542,2,1
1,2000,0.1,5,3,0,0.8,0.8,0.947685,0.000482,3,1


In [22]:
XGB_GridSearch_rnd3(2)

2023-06-18 11:34:27.387063  - Calculant grid search CV del paràmetre  2
Millor score:  0.9909642228458454
Millors paràmetres:  {'colsample_bytree': 0.8, 'gamma': 0, 'learning_rate': 0.1, 'max_depth': 5, 'min_child_weight': 1, 'n_estimators': 2000, 'subsample': 0.8}
Millor model:  XGBRegressor(base_score=None, booster=None, callbacks=None,
             colsample_bylevel=None, colsample_bynode=None,
             colsample_bytree=0.8, early_stopping_rounds=None,
             enable_categorical=False, eval_metric=None, feature_types=None,
             gamma=0, gpu_id=None, grow_policy=None, importance_type=None,
             interaction_constraints=None, learning_rate=0.1, max_bin=None,
             max_cat_threshold=None, max_cat_to_onehot=None,
             max_delta_step=None, max_depth=5, max_leaves=None,
             min_child_weight=1, missing=nan, monotone_constraints=None,
             n_estimators=2000, n_jobs=None, num_parallel_tree=None,
             predictor=None, random_state

Unnamed: 0,param_n_estimators,param_learning_rate,param_max_depth,param_min_child_weight,param_gamma,param_subsample,param_colsample_bytree,mean_test_score,std_test_score,rank_test_score,nParam
0,2000,0.1,5,1,0,0.8,0.8,0.990964,0.000326,1,2
1,2000,0.1,5,3,0,0.8,0.8,0.990957,0.00029,2,2
2,2000,0.1,5,6,0,0.8,0.8,0.990951,0.000295,3,2


In [23]:
XGB_GridSearch_rnd3(3)

2023-06-18 11:35:40.082790  - Calculant grid search CV del paràmetre  3
Millor score:  0.9776164984611244
Millors paràmetres:  {'colsample_bytree': 0.8, 'gamma': 0, 'learning_rate': 0.1, 'max_depth': 5, 'min_child_weight': 1, 'n_estimators': 2000, 'subsample': 0.8}
Millor model:  XGBRegressor(base_score=None, booster=None, callbacks=None,
             colsample_bylevel=None, colsample_bynode=None,
             colsample_bytree=0.8, early_stopping_rounds=None,
             enable_categorical=False, eval_metric=None, feature_types=None,
             gamma=0, gpu_id=None, grow_policy=None, importance_type=None,
             interaction_constraints=None, learning_rate=0.1, max_bin=None,
             max_cat_threshold=None, max_cat_to_onehot=None,
             max_delta_step=None, max_depth=5, max_leaves=None,
             min_child_weight=1, missing=nan, monotone_constraints=None,
             n_estimators=2000, n_jobs=None, num_parallel_tree=None,
             predictor=None, random_state

Unnamed: 0,param_n_estimators,param_learning_rate,param_max_depth,param_min_child_weight,param_gamma,param_subsample,param_colsample_bytree,mean_test_score,std_test_score,rank_test_score,nParam
0,2000,0.1,5,1,0,0.8,0.8,0.977616,0.000251,1,3
1,2000,0.1,5,3,0,0.8,0.8,0.977541,0.000435,2,3
2,2000,0.1,5,6,0,0.8,0.8,0.977286,0.000257,3,3


In [24]:
XGB_GridSearch_rnd3(4)

2023-06-18 11:36:51.692755  - Calculant grid search CV del paràmetre  4
Millor score:  0.9521163354184752
Millors paràmetres:  {'colsample_bytree': 0.8, 'gamma': 0, 'learning_rate': 0.1, 'max_depth': 5, 'min_child_weight': 6, 'n_estimators': 6000, 'subsample': 0.8}
Millor model:  XGBRegressor(base_score=None, booster=None, callbacks=None,
             colsample_bylevel=None, colsample_bynode=None,
             colsample_bytree=0.8, early_stopping_rounds=None,
             enable_categorical=False, eval_metric=None, feature_types=None,
             gamma=0, gpu_id=None, grow_policy=None, importance_type=None,
             interaction_constraints=None, learning_rate=0.1, max_bin=None,
             max_cat_threshold=None, max_cat_to_onehot=None,
             max_delta_step=None, max_depth=5, max_leaves=None,
             min_child_weight=6, missing=nan, monotone_constraints=None,
             n_estimators=6000, n_jobs=None, num_parallel_tree=None,
             predictor=None, random_state

Unnamed: 0,param_n_estimators,param_learning_rate,param_max_depth,param_min_child_weight,param_gamma,param_subsample,param_colsample_bytree,mean_test_score,std_test_score,rank_test_score,nParam
2,6000,0.1,5,6,0,0.8,0.8,0.952116,0.000381,1,4
0,6000,0.1,5,1,0,0.8,0.8,0.95156,0.000489,2,4
1,6000,0.1,5,3,0,0.8,0.8,0.951415,0.000656,3,4


In [25]:
XGB_GridSearch_rnd3(5)

2023-06-18 11:40:19.893430  - Calculant grid search CV del paràmetre  5
Millor score:  0.9012052973824465
Millors paràmetres:  {'colsample_bytree': 0.8, 'gamma': 0, 'learning_rate': 0.1, 'max_depth': 5, 'min_child_weight': 3, 'n_estimators': 2000, 'subsample': 0.8}
Millor model:  XGBRegressor(base_score=None, booster=None, callbacks=None,
             colsample_bylevel=None, colsample_bynode=None,
             colsample_bytree=0.8, early_stopping_rounds=None,
             enable_categorical=False, eval_metric=None, feature_types=None,
             gamma=0, gpu_id=None, grow_policy=None, importance_type=None,
             interaction_constraints=None, learning_rate=0.1, max_bin=None,
             max_cat_threshold=None, max_cat_to_onehot=None,
             max_delta_step=None, max_depth=5, max_leaves=None,
             min_child_weight=3, missing=nan, monotone_constraints=None,
             n_estimators=2000, n_jobs=None, num_parallel_tree=None,
             predictor=None, random_state

Unnamed: 0,param_n_estimators,param_learning_rate,param_max_depth,param_min_child_weight,param_gamma,param_subsample,param_colsample_bytree,mean_test_score,std_test_score,rank_test_score,nParam
1,2000,0.1,5,3,0,0.8,0.8,0.901205,0.000353,1,5
2,2000,0.1,5,6,0,0.8,0.8,0.900874,0.000856,2,5
0,2000,0.1,5,1,0,0.8,0.8,0.900858,0.000552,3,5


In [26]:
def XGB_GridSearch_rnd4(nParam):
    start_time = time.time()
    print(datetime.datetime.now(), " - Calculant grid search CV del paràmetre ", nParam)
    
    y_train = y_train_all[:,nParam]
    y_test = y_test_all[:, nParam]


    # Definim una nova instància del model
    model = XGBRegressor(random_state = 24, tree_method='gpu_hist') # per GPU = 'gpu_hist'

    # Definim el rang de valors pels paràmetres que volem optimitzar
    parameters = {
        'n_estimators': [get_best_n_estimator(nParam)],        
        'learning_rate': [0.1],
        'max_depth': [5],
        'min_child_weight': [1],
        'gamma': [0],        
        'subsample': [0.8, 1],
        'colsample_bytree': [0.8, 1]
    }

    # Definim una instància de GridSearchCV 
    grid = GridSearchCV(model, param_grid=parameters, cv=4)

    # Entrenem el GridSearchCV
    grid.fit(X_train, y_train)

    # Quina és la millor combinació de paràmetres:
    print("Millor score: ", grid.best_score_)
    print("Millors paràmetres: ", grid.best_params_)
    print("Millor model: ", grid.best_estimator_)

    # Resultats
    grid_results = pd.DataFrame(grid.cv_results_)[[
        'param_n_estimators',
        'param_learning_rate',
        'param_max_depth', 
        'param_min_child_weight',
        'param_gamma',        
        'param_subsample',
        'param_colsample_bytree',
        'mean_test_score', 
        'std_test_score', 
        'rank_test_score']]
    grid_results['nParam'] = nParam
    
    pickle.dump(grid_results, open("grid_results_"+str(nParam), "wb"))
    
    end_time = time.time()
    total_time = end_time - start_time
    print("El temps total de l'execució és:", total_time, "segons, o bé ", total_time/60, " minuts.")
    
    return grid_results.sort_values(by=['rank_test_score'])

In [27]:
XGB_GridSearch_rnd4(0)

2023-06-18 11:41:31.034322  - Calculant grid search CV del paràmetre  0
Millor score:  0.9791765657145837
Millors paràmetres:  {'colsample_bytree': 0.8, 'gamma': 0, 'learning_rate': 0.1, 'max_depth': 5, 'min_child_weight': 1, 'n_estimators': 2000, 'subsample': 0.8}
Millor model:  XGBRegressor(base_score=None, booster=None, callbacks=None,
             colsample_bylevel=None, colsample_bynode=None,
             colsample_bytree=0.8, early_stopping_rounds=None,
             enable_categorical=False, eval_metric=None, feature_types=None,
             gamma=0, gpu_id=None, grow_policy=None, importance_type=None,
             interaction_constraints=None, learning_rate=0.1, max_bin=None,
             max_cat_threshold=None, max_cat_to_onehot=None,
             max_delta_step=None, max_depth=5, max_leaves=None,
             min_child_weight=1, missing=nan, monotone_constraints=None,
             n_estimators=2000, n_jobs=None, num_parallel_tree=None,
             predictor=None, random_state

Unnamed: 0,param_n_estimators,param_learning_rate,param_max_depth,param_min_child_weight,param_gamma,param_subsample,param_colsample_bytree,mean_test_score,std_test_score,rank_test_score,nParam
0,2000,0.1,5,1,0,0.8,0.8,0.979177,0.00132,1,0
2,2000,0.1,5,1,0,0.8,1.0,0.978762,0.001106,2,0
3,2000,0.1,5,1,0,1.0,1.0,0.978426,0.001052,3,0
1,2000,0.1,5,1,0,1.0,0.8,0.978382,0.001457,4,0


In [28]:
XGB_GridSearch_rnd4(1)

2023-06-18 11:43:05.743606  - Calculant grid search CV del paràmetre  1
Millor score:  0.9484525469317774
Millors paràmetres:  {'colsample_bytree': 1, 'gamma': 0, 'learning_rate': 0.1, 'max_depth': 5, 'min_child_weight': 1, 'n_estimators': 2000, 'subsample': 0.8}
Millor model:  XGBRegressor(base_score=None, booster=None, callbacks=None,
             colsample_bylevel=None, colsample_bynode=None, colsample_bytree=1,
             early_stopping_rounds=None, enable_categorical=False,
             eval_metric=None, feature_types=None, gamma=0, gpu_id=None,
             grow_policy=None, importance_type=None,
             interaction_constraints=None, learning_rate=0.1, max_bin=None,
             max_cat_threshold=None, max_cat_to_onehot=None,
             max_delta_step=None, max_depth=5, max_leaves=None,
             min_child_weight=1, missing=nan, monotone_constraints=None,
             n_estimators=2000, n_jobs=None, num_parallel_tree=None,
             predictor=None, random_state=24,

Unnamed: 0,param_n_estimators,param_learning_rate,param_max_depth,param_min_child_weight,param_gamma,param_subsample,param_colsample_bytree,mean_test_score,std_test_score,rank_test_score,nParam
2,2000,0.1,5,1,0,0.8,1.0,0.948453,0.000655,1,1
0,2000,0.1,5,1,0,0.8,0.8,0.947793,0.000542,2,1
3,2000,0.1,5,1,0,1.0,1.0,0.947364,0.000469,3,1
1,2000,0.1,5,1,0,1.0,0.8,0.946974,0.000612,4,1


In [29]:
XGB_GridSearch_rnd4(2)

2023-06-18 11:44:40.518306  - Calculant grid search CV del paràmetre  2
Millor score:  0.9913305687416252
Millors paràmetres:  {'colsample_bytree': 1, 'gamma': 0, 'learning_rate': 0.1, 'max_depth': 5, 'min_child_weight': 1, 'n_estimators': 2000, 'subsample': 0.8}
Millor model:  XGBRegressor(base_score=None, booster=None, callbacks=None,
             colsample_bylevel=None, colsample_bynode=None, colsample_bytree=1,
             early_stopping_rounds=None, enable_categorical=False,
             eval_metric=None, feature_types=None, gamma=0, gpu_id=None,
             grow_policy=None, importance_type=None,
             interaction_constraints=None, learning_rate=0.1, max_bin=None,
             max_cat_threshold=None, max_cat_to_onehot=None,
             max_delta_step=None, max_depth=5, max_leaves=None,
             min_child_weight=1, missing=nan, monotone_constraints=None,
             n_estimators=2000, n_jobs=None, num_parallel_tree=None,
             predictor=None, random_state=24,

Unnamed: 0,param_n_estimators,param_learning_rate,param_max_depth,param_min_child_weight,param_gamma,param_subsample,param_colsample_bytree,mean_test_score,std_test_score,rank_test_score,nParam
2,2000,0.1,5,1,0,0.8,1.0,0.991331,0.000254,1,2
3,2000,0.1,5,1,0,1.0,1.0,0.991058,0.000333,2,2
0,2000,0.1,5,1,0,0.8,0.8,0.990964,0.000326,3,2
1,2000,0.1,5,1,0,1.0,0.8,0.990733,0.000364,4,2


In [30]:
XGB_GridSearch_rnd4(3)

2023-06-18 11:46:16.377048  - Calculant grid search CV del paràmetre  3
Millor score:  0.9776391875603537
Millors paràmetres:  {'colsample_bytree': 1, 'gamma': 0, 'learning_rate': 0.1, 'max_depth': 5, 'min_child_weight': 1, 'n_estimators': 2000, 'subsample': 0.8}
Millor model:  XGBRegressor(base_score=None, booster=None, callbacks=None,
             colsample_bylevel=None, colsample_bynode=None, colsample_bytree=1,
             early_stopping_rounds=None, enable_categorical=False,
             eval_metric=None, feature_types=None, gamma=0, gpu_id=None,
             grow_policy=None, importance_type=None,
             interaction_constraints=None, learning_rate=0.1, max_bin=None,
             max_cat_threshold=None, max_cat_to_onehot=None,
             max_delta_step=None, max_depth=5, max_leaves=None,
             min_child_weight=1, missing=nan, monotone_constraints=None,
             n_estimators=2000, n_jobs=None, num_parallel_tree=None,
             predictor=None, random_state=24,

Unnamed: 0,param_n_estimators,param_learning_rate,param_max_depth,param_min_child_weight,param_gamma,param_subsample,param_colsample_bytree,mean_test_score,std_test_score,rank_test_score,nParam
2,2000,0.1,5,1,0,0.8,1.0,0.977639,0.000541,1,3
0,2000,0.1,5,1,0,0.8,0.8,0.977616,0.000251,2,3
3,2000,0.1,5,1,0,1.0,1.0,0.977485,0.000356,3,3
1,2000,0.1,5,1,0,1.0,0.8,0.977193,0.000501,4,3


In [31]:
XGB_GridSearch_rnd4(4)

2023-06-18 11:47:50.264528  - Calculant grid search CV del paràmetre  4
Millor score:  0.9518876275159356
Millors paràmetres:  {'colsample_bytree': 1, 'gamma': 0, 'learning_rate': 0.1, 'max_depth': 5, 'min_child_weight': 1, 'n_estimators': 6000, 'subsample': 0.8}
Millor model:  XGBRegressor(base_score=None, booster=None, callbacks=None,
             colsample_bylevel=None, colsample_bynode=None, colsample_bytree=1,
             early_stopping_rounds=None, enable_categorical=False,
             eval_metric=None, feature_types=None, gamma=0, gpu_id=None,
             grow_policy=None, importance_type=None,
             interaction_constraints=None, learning_rate=0.1, max_bin=None,
             max_cat_threshold=None, max_cat_to_onehot=None,
             max_delta_step=None, max_depth=5, max_leaves=None,
             min_child_weight=1, missing=nan, monotone_constraints=None,
             n_estimators=6000, n_jobs=None, num_parallel_tree=None,
             predictor=None, random_state=24,

Unnamed: 0,param_n_estimators,param_learning_rate,param_max_depth,param_min_child_weight,param_gamma,param_subsample,param_colsample_bytree,mean_test_score,std_test_score,rank_test_score,nParam
2,6000,0.1,5,1,0,0.8,1.0,0.951888,0.000704,1,4
0,6000,0.1,5,1,0,0.8,0.8,0.95156,0.000489,2,4
1,6000,0.1,5,1,0,1.0,0.8,0.949288,0.000671,3,4
3,6000,0.1,5,1,0,1.0,1.0,0.949115,0.001271,4,4


In [32]:
XGB_GridSearch_rnd4(5)

2023-06-18 11:52:23.817744  - Calculant grid search CV del paràmetre  5
Millor score:  0.9015486665758781
Millors paràmetres:  {'colsample_bytree': 1, 'gamma': 0, 'learning_rate': 0.1, 'max_depth': 5, 'min_child_weight': 1, 'n_estimators': 2000, 'subsample': 0.8}
Millor model:  XGBRegressor(base_score=None, booster=None, callbacks=None,
             colsample_bylevel=None, colsample_bynode=None, colsample_bytree=1,
             early_stopping_rounds=None, enable_categorical=False,
             eval_metric=None, feature_types=None, gamma=0, gpu_id=None,
             grow_policy=None, importance_type=None,
             interaction_constraints=None, learning_rate=0.1, max_bin=None,
             max_cat_threshold=None, max_cat_to_onehot=None,
             max_delta_step=None, max_depth=5, max_leaves=None,
             min_child_weight=1, missing=nan, monotone_constraints=None,
             n_estimators=2000, n_jobs=None, num_parallel_tree=None,
             predictor=None, random_state=24,

Unnamed: 0,param_n_estimators,param_learning_rate,param_max_depth,param_min_child_weight,param_gamma,param_subsample,param_colsample_bytree,mean_test_score,std_test_score,rank_test_score,nParam
2,2000,0.1,5,1,0,0.8,1.0,0.901549,0.00036,1,5
1,2000,0.1,5,1,0,1.0,0.8,0.900934,0.000399,2,5
0,2000,0.1,5,1,0,0.8,0.8,0.900858,0.000552,3,5
3,2000,0.1,5,1,0,1.0,1.0,0.900668,0.000613,4,5


In [33]:
def XGB_GridSearch_rnd4(nParam):
    start_time = time.time()
    print(datetime.datetime.now(), " - Calculant grid search CV del paràmetre ", nParam)
    
    y_train = y_train_all[:,nParam]
    y_test = y_test_all[:, nParam]


    # Definim una nova instància del model
    model = XGBRegressor(random_state = 24, tree_method='gpu_hist') # per GPU = 'gpu_hist'

    # Definim el rang de valors pels paràmetres que volem optimitzar
    parameters = {
        'n_estimators': [get_best_n_estimator(nParam)],        
        'learning_rate': [0.1],
        'max_depth': [5],
        'min_child_weight': [1],
        'gamma': [0, 0.25, 0.5],        
        'subsample': [0.8],
        'colsample_bytree': [1]
    }

    # Definim una instància de GridSearchCV
    grid = GridSearchCV(model, param_grid=parameters, cv=4)

    # Entrenem el GridSearchCV
    grid.fit(X_train, y_train)

    # Quina és la millor combinació de paràmetres:
    print("Millor score: ", grid.best_score_)
    print("Millors paràmetres: ", grid.best_params_)
    print("Millor model: ", grid.best_estimator_)

    # Resultats
    grid_results = pd.DataFrame(grid.cv_results_)[[
        'param_n_estimators',
        'param_learning_rate',
        'param_max_depth', 
        'param_min_child_weight',
        'param_gamma',        
        'param_subsample',
        'param_colsample_bytree',
        'mean_test_score', 
        'std_test_score', 
        'rank_test_score']]
    grid_results['nParam'] = nParam
    
    pickle.dump(grid_results, open("grid_results_"+str(nParam), "wb"))
    
    end_time = time.time()
    total_time = end_time - start_time
    print("El temps total de l'execució és:", total_time, "segons, o bé ", total_time/60, " minuts.")
    
    return grid_results.sort_values(by=['rank_test_score'])

In [34]:
XGB_GridSearch_rnd4(0)

2023-06-18 11:53:57.193283  - Calculant grid search CV del paràmetre  0
Millor score:  0.978761904128733
Millors paràmetres:  {'colsample_bytree': 1, 'gamma': 0, 'learning_rate': 0.1, 'max_depth': 5, 'min_child_weight': 1, 'n_estimators': 2000, 'subsample': 0.8}
Millor model:  XGBRegressor(base_score=None, booster=None, callbacks=None,
             colsample_bylevel=None, colsample_bynode=None, colsample_bytree=1,
             early_stopping_rounds=None, enable_categorical=False,
             eval_metric=None, feature_types=None, gamma=0, gpu_id=None,
             grow_policy=None, importance_type=None,
             interaction_constraints=None, learning_rate=0.1, max_bin=None,
             max_cat_threshold=None, max_cat_to_onehot=None,
             max_delta_step=None, max_depth=5, max_leaves=None,
             min_child_weight=1, missing=nan, monotone_constraints=None,
             n_estimators=2000, n_jobs=None, num_parallel_tree=None,
             predictor=None, random_state=24, 

Unnamed: 0,param_n_estimators,param_learning_rate,param_max_depth,param_min_child_weight,param_gamma,param_subsample,param_colsample_bytree,mean_test_score,std_test_score,rank_test_score,nParam
0,2000,0.1,5,1,0.0,0.8,1,0.978762,0.001106,1,0
1,2000,0.1,5,1,0.25,0.8,1,0.978759,0.001101,2,0
2,2000,0.1,5,1,0.5,0.8,1,0.978748,0.001099,3,0


In [35]:
XGB_GridSearch_rnd4(1)

2023-06-18 11:55:10.012005  - Calculant grid search CV del paràmetre  1
Millor score:  0.9484525469317774
Millors paràmetres:  {'colsample_bytree': 1, 'gamma': 0, 'learning_rate': 0.1, 'max_depth': 5, 'min_child_weight': 1, 'n_estimators': 2000, 'subsample': 0.8}
Millor model:  XGBRegressor(base_score=None, booster=None, callbacks=None,
             colsample_bylevel=None, colsample_bynode=None, colsample_bytree=1,
             early_stopping_rounds=None, enable_categorical=False,
             eval_metric=None, feature_types=None, gamma=0, gpu_id=None,
             grow_policy=None, importance_type=None,
             interaction_constraints=None, learning_rate=0.1, max_bin=None,
             max_cat_threshold=None, max_cat_to_onehot=None,
             max_delta_step=None, max_depth=5, max_leaves=None,
             min_child_weight=1, missing=nan, monotone_constraints=None,
             n_estimators=2000, n_jobs=None, num_parallel_tree=None,
             predictor=None, random_state=24,

Unnamed: 0,param_n_estimators,param_learning_rate,param_max_depth,param_min_child_weight,param_gamma,param_subsample,param_colsample_bytree,mean_test_score,std_test_score,rank_test_score,nParam
0,2000,0.1,5,1,0.0,0.8,1,0.948453,0.000655,1,1
1,2000,0.1,5,1,0.25,0.8,1,0.944238,0.000722,2,1
2,2000,0.1,5,1,0.5,0.8,1,0.93963,0.000397,3,1


In [36]:
XGB_GridSearch_rnd4(2)

2023-06-18 11:56:07.460437  - Calculant grid search CV del paràmetre  2
Millor score:  0.9913305687416252
Millors paràmetres:  {'colsample_bytree': 1, 'gamma': 0, 'learning_rate': 0.1, 'max_depth': 5, 'min_child_weight': 1, 'n_estimators': 2000, 'subsample': 0.8}
Millor model:  XGBRegressor(base_score=None, booster=None, callbacks=None,
             colsample_bylevel=None, colsample_bynode=None, colsample_bytree=1,
             early_stopping_rounds=None, enable_categorical=False,
             eval_metric=None, feature_types=None, gamma=0, gpu_id=None,
             grow_policy=None, importance_type=None,
             interaction_constraints=None, learning_rate=0.1, max_bin=None,
             max_cat_threshold=None, max_cat_to_onehot=None,
             max_delta_step=None, max_depth=5, max_leaves=None,
             min_child_weight=1, missing=nan, monotone_constraints=None,
             n_estimators=2000, n_jobs=None, num_parallel_tree=None,
             predictor=None, random_state=24,

Unnamed: 0,param_n_estimators,param_learning_rate,param_max_depth,param_min_child_weight,param_gamma,param_subsample,param_colsample_bytree,mean_test_score,std_test_score,rank_test_score,nParam
0,2000,0.1,5,1,0.0,0.8,1,0.991331,0.000254,1,2
1,2000,0.1,5,1,0.25,0.8,1,0.985586,0.000416,2,2
2,2000,0.1,5,1,0.5,0.8,1,0.982662,0.000183,3,2


In [37]:
XGB_GridSearch_rnd4(3)

2023-06-18 11:56:55.124613  - Calculant grid search CV del paràmetre  3
Millor score:  0.9776391875603537
Millors paràmetres:  {'colsample_bytree': 1, 'gamma': 0, 'learning_rate': 0.1, 'max_depth': 5, 'min_child_weight': 1, 'n_estimators': 2000, 'subsample': 0.8}
Millor model:  XGBRegressor(base_score=None, booster=None, callbacks=None,
             colsample_bylevel=None, colsample_bynode=None, colsample_bytree=1,
             early_stopping_rounds=None, enable_categorical=False,
             eval_metric=None, feature_types=None, gamma=0, gpu_id=None,
             grow_policy=None, importance_type=None,
             interaction_constraints=None, learning_rate=0.1, max_bin=None,
             max_cat_threshold=None, max_cat_to_onehot=None,
             max_delta_step=None, max_depth=5, max_leaves=None,
             min_child_weight=1, missing=nan, monotone_constraints=None,
             n_estimators=2000, n_jobs=None, num_parallel_tree=None,
             predictor=None, random_state=24,

Unnamed: 0,param_n_estimators,param_learning_rate,param_max_depth,param_min_child_weight,param_gamma,param_subsample,param_colsample_bytree,mean_test_score,std_test_score,rank_test_score,nParam
0,2000,0.1,5,1,0.0,0.8,1,0.977639,0.000541,1,3
1,2000,0.1,5,1,0.25,0.8,1,0.973947,0.000668,2,3
2,2000,0.1,5,1,0.5,0.8,1,0.971455,0.000885,3,3


In [38]:
XGB_GridSearch_rnd4(4)

2023-06-18 11:57:44.845063  - Calculant grid search CV del paràmetre  4
Millor score:  0.9518876275159356
Millors paràmetres:  {'colsample_bytree': 1, 'gamma': 0, 'learning_rate': 0.1, 'max_depth': 5, 'min_child_weight': 1, 'n_estimators': 6000, 'subsample': 0.8}
Millor model:  XGBRegressor(base_score=None, booster=None, callbacks=None,
             colsample_bylevel=None, colsample_bynode=None, colsample_bytree=1,
             early_stopping_rounds=None, enable_categorical=False,
             eval_metric=None, feature_types=None, gamma=0, gpu_id=None,
             grow_policy=None, importance_type=None,
             interaction_constraints=None, learning_rate=0.1, max_bin=None,
             max_cat_threshold=None, max_cat_to_onehot=None,
             max_delta_step=None, max_depth=5, max_leaves=None,
             min_child_weight=1, missing=nan, monotone_constraints=None,
             n_estimators=6000, n_jobs=None, num_parallel_tree=None,
             predictor=None, random_state=24,

Unnamed: 0,param_n_estimators,param_learning_rate,param_max_depth,param_min_child_weight,param_gamma,param_subsample,param_colsample_bytree,mean_test_score,std_test_score,rank_test_score,nParam
0,6000,0.1,5,1,0.0,0.8,1,0.951888,0.000704,1,4
1,6000,0.1,5,1,0.25,0.8,1,0.924049,0.000428,2,4
2,6000,0.1,5,1,0.5,0.8,1,0.910267,0.000822,3,4


In [39]:
XGB_GridSearch_rnd4(5)

2023-06-18 11:59:55.876496  - Calculant grid search CV del paràmetre  5
Millor score:  0.9015486665758781
Millors paràmetres:  {'colsample_bytree': 1, 'gamma': 0, 'learning_rate': 0.1, 'max_depth': 5, 'min_child_weight': 1, 'n_estimators': 2000, 'subsample': 0.8}
Millor model:  XGBRegressor(base_score=None, booster=None, callbacks=None,
             colsample_bylevel=None, colsample_bynode=None, colsample_bytree=1,
             early_stopping_rounds=None, enable_categorical=False,
             eval_metric=None, feature_types=None, gamma=0, gpu_id=None,
             grow_policy=None, importance_type=None,
             interaction_constraints=None, learning_rate=0.1, max_bin=None,
             max_cat_threshold=None, max_cat_to_onehot=None,
             max_delta_step=None, max_depth=5, max_leaves=None,
             min_child_weight=1, missing=nan, monotone_constraints=None,
             n_estimators=2000, n_jobs=None, num_parallel_tree=None,
             predictor=None, random_state=24,

Unnamed: 0,param_n_estimators,param_learning_rate,param_max_depth,param_min_child_weight,param_gamma,param_subsample,param_colsample_bytree,mean_test_score,std_test_score,rank_test_score,nParam
0,2000,0.1,5,1,0.0,0.8,1,0.901549,0.00036,1,5
1,2000,0.1,5,1,0.25,0.8,1,0.898632,0.000332,2,5
2,2000,0.1,5,1,0.5,0.8,1,0.894243,0.000478,3,5


In [40]:
def XGB_GridSearch_rnd5(nParam):
    start_time = time.time()
    print(datetime.datetime.now(), " - Calculant grid search CV del paràmetre ", nParam)
    
    y_train = y_train_all[:,nParam]
    y_test = y_test_all[:, nParam]


    # Definim una nova instància del model
    model = XGBRegressor(random_state = 24, tree_method='gpu_hist') # per GPU = 'gpu_hist'

    # Definim el rang de valors pels paràmetres que volem optimitzar
    parameters = {
        'n_estimators': [get_best_n_estimator(nParam)],        
        'learning_rate': [0.5],
        'max_depth': [5],
        'min_child_weight': [1],
        'gamma': [0],        
        'subsample': [0.8],
        'colsample_bytree': [1]
    }

    # Definim una instància de GridSearchCV
    grid = GridSearchCV(model, param_grid=parameters, cv=4)

    # Entrenem el GridSearchCV
    grid.fit(X_train, y_train)

    # Quina és la millor combinació de paràmetres:
    print("Millor score: ", grid.best_score_)
    print("Millors paràmetres: ", grid.best_params_)
    print("Millor model: ", grid.best_estimator_)

    # Resultats
    grid_results = pd.DataFrame(grid.cv_results_)[[
        'param_n_estimators',
        'param_learning_rate',
        'param_max_depth', 
        'param_min_child_weight',
        'param_gamma',        
        'param_subsample',
        'param_colsample_bytree',
        'mean_test_score', 
        'std_test_score', 
        'rank_test_score']]
    grid_results['nParam'] = nParam
    
    pickle.dump(grid_results, open("grid_results_"+str(nParam), "wb"))
    
    end_time = time.time()
    total_time = end_time - start_time
    print("El temps total de l'execució és:", total_time, "segons, o bé ", total_time/60, " minuts.")
    
    return grid_results.sort_values(by=['rank_test_score'])

In [41]:
XGB_GridSearch_rnd5(0)

2023-06-18 12:00:54.785006  - Calculant grid search CV del paràmetre  0
Millor score:  0.9639636054032084
Millors paràmetres:  {'colsample_bytree': 1, 'gamma': 0, 'learning_rate': 0.5, 'max_depth': 5, 'min_child_weight': 1, 'n_estimators': 2000, 'subsample': 0.8}
Millor model:  XGBRegressor(base_score=None, booster=None, callbacks=None,
             colsample_bylevel=None, colsample_bynode=None, colsample_bytree=1,
             early_stopping_rounds=None, enable_categorical=False,
             eval_metric=None, feature_types=None, gamma=0, gpu_id=None,
             grow_policy=None, importance_type=None,
             interaction_constraints=None, learning_rate=0.5, max_bin=None,
             max_cat_threshold=None, max_cat_to_onehot=None,
             max_delta_step=None, max_depth=5, max_leaves=None,
             min_child_weight=1, missing=nan, monotone_constraints=None,
             n_estimators=2000, n_jobs=None, num_parallel_tree=None,
             predictor=None, random_state=24,

Unnamed: 0,param_n_estimators,param_learning_rate,param_max_depth,param_min_child_weight,param_gamma,param_subsample,param_colsample_bytree,mean_test_score,std_test_score,rank_test_score,nParam
0,2000,0.5,5,1,0,0.8,1,0.963964,0.00115,1,0


In [42]:
XGB_GridSearch_rnd5(1)

2023-06-18 12:01:23.041684  - Calculant grid search CV del paràmetre  1
Millor score:  0.9264770937595805
Millors paràmetres:  {'colsample_bytree': 1, 'gamma': 0, 'learning_rate': 0.5, 'max_depth': 5, 'min_child_weight': 1, 'n_estimators': 2000, 'subsample': 0.8}
Millor model:  XGBRegressor(base_score=None, booster=None, callbacks=None,
             colsample_bylevel=None, colsample_bynode=None, colsample_bytree=1,
             early_stopping_rounds=None, enable_categorical=False,
             eval_metric=None, feature_types=None, gamma=0, gpu_id=None,
             grow_policy=None, importance_type=None,
             interaction_constraints=None, learning_rate=0.5, max_bin=None,
             max_cat_threshold=None, max_cat_to_onehot=None,
             max_delta_step=None, max_depth=5, max_leaves=None,
             min_child_weight=1, missing=nan, monotone_constraints=None,
             n_estimators=2000, n_jobs=None, num_parallel_tree=None,
             predictor=None, random_state=24,

Unnamed: 0,param_n_estimators,param_learning_rate,param_max_depth,param_min_child_weight,param_gamma,param_subsample,param_colsample_bytree,mean_test_score,std_test_score,rank_test_score,nParam
0,2000,0.5,5,1,0,0.8,1,0.926477,0.001105,1,1


In [43]:
XGB_GridSearch_rnd5(2)

2023-06-18 12:01:51.802891  - Calculant grid search CV del paràmetre  2
Millor score:  0.9831044671967161
Millors paràmetres:  {'colsample_bytree': 1, 'gamma': 0, 'learning_rate': 0.5, 'max_depth': 5, 'min_child_weight': 1, 'n_estimators': 2000, 'subsample': 0.8}
Millor model:  XGBRegressor(base_score=None, booster=None, callbacks=None,
             colsample_bylevel=None, colsample_bynode=None, colsample_bytree=1,
             early_stopping_rounds=None, enable_categorical=False,
             eval_metric=None, feature_types=None, gamma=0, gpu_id=None,
             grow_policy=None, importance_type=None,
             interaction_constraints=None, learning_rate=0.5, max_bin=None,
             max_cat_threshold=None, max_cat_to_onehot=None,
             max_delta_step=None, max_depth=5, max_leaves=None,
             min_child_weight=1, missing=nan, monotone_constraints=None,
             n_estimators=2000, n_jobs=None, num_parallel_tree=None,
             predictor=None, random_state=24,

Unnamed: 0,param_n_estimators,param_learning_rate,param_max_depth,param_min_child_weight,param_gamma,param_subsample,param_colsample_bytree,mean_test_score,std_test_score,rank_test_score,nParam
0,2000,0.5,5,1,0,0.8,1,0.983104,0.000632,1,2


In [44]:
XGB_GridSearch_rnd5(3)

2023-06-18 12:02:20.188844  - Calculant grid search CV del paràmetre  3
Millor score:  0.9682570000087996
Millors paràmetres:  {'colsample_bytree': 1, 'gamma': 0, 'learning_rate': 0.5, 'max_depth': 5, 'min_child_weight': 1, 'n_estimators': 2000, 'subsample': 0.8}
Millor model:  XGBRegressor(base_score=None, booster=None, callbacks=None,
             colsample_bylevel=None, colsample_bynode=None, colsample_bytree=1,
             early_stopping_rounds=None, enable_categorical=False,
             eval_metric=None, feature_types=None, gamma=0, gpu_id=None,
             grow_policy=None, importance_type=None,
             interaction_constraints=None, learning_rate=0.5, max_bin=None,
             max_cat_threshold=None, max_cat_to_onehot=None,
             max_delta_step=None, max_depth=5, max_leaves=None,
             min_child_weight=1, missing=nan, monotone_constraints=None,
             n_estimators=2000, n_jobs=None, num_parallel_tree=None,
             predictor=None, random_state=24,

Unnamed: 0,param_n_estimators,param_learning_rate,param_max_depth,param_min_child_weight,param_gamma,param_subsample,param_colsample_bytree,mean_test_score,std_test_score,rank_test_score,nParam
0,2000,0.5,5,1,0,0.8,1,0.968257,0.000535,1,3


In [45]:
XGB_GridSearch_rnd5(4)

2023-06-18 12:02:48.439833  - Calculant grid search CV del paràmetre  4
Millor score:  0.906751449527488
Millors paràmetres:  {'colsample_bytree': 1, 'gamma': 0, 'learning_rate': 0.5, 'max_depth': 5, 'min_child_weight': 1, 'n_estimators': 6000, 'subsample': 0.8}
Millor model:  XGBRegressor(base_score=None, booster=None, callbacks=None,
             colsample_bylevel=None, colsample_bynode=None, colsample_bytree=1,
             early_stopping_rounds=None, enable_categorical=False,
             eval_metric=None, feature_types=None, gamma=0, gpu_id=None,
             grow_policy=None, importance_type=None,
             interaction_constraints=None, learning_rate=0.5, max_bin=None,
             max_cat_threshold=None, max_cat_to_onehot=None,
             max_delta_step=None, max_depth=5, max_leaves=None,
             min_child_weight=1, missing=nan, monotone_constraints=None,
             n_estimators=6000, n_jobs=None, num_parallel_tree=None,
             predictor=None, random_state=24, 

Unnamed: 0,param_n_estimators,param_learning_rate,param_max_depth,param_min_child_weight,param_gamma,param_subsample,param_colsample_bytree,mean_test_score,std_test_score,rank_test_score,nParam
0,6000,0.5,5,1,0,0.8,1,0.906751,0.000711,1,4


In [46]:
XGB_GridSearch_rnd5(5)

2023-06-18 12:04:09.737540  - Calculant grid search CV del paràmetre  5
Millor score:  0.8686810961689456
Millors paràmetres:  {'colsample_bytree': 1, 'gamma': 0, 'learning_rate': 0.5, 'max_depth': 5, 'min_child_weight': 1, 'n_estimators': 2000, 'subsample': 0.8}
Millor model:  XGBRegressor(base_score=None, booster=None, callbacks=None,
             colsample_bylevel=None, colsample_bynode=None, colsample_bytree=1,
             early_stopping_rounds=None, enable_categorical=False,
             eval_metric=None, feature_types=None, gamma=0, gpu_id=None,
             grow_policy=None, importance_type=None,
             interaction_constraints=None, learning_rate=0.5, max_bin=None,
             max_cat_threshold=None, max_cat_to_onehot=None,
             max_delta_step=None, max_depth=5, max_leaves=None,
             min_child_weight=1, missing=nan, monotone_constraints=None,
             n_estimators=2000, n_jobs=None, num_parallel_tree=None,
             predictor=None, random_state=24,

Unnamed: 0,param_n_estimators,param_learning_rate,param_max_depth,param_min_child_weight,param_gamma,param_subsample,param_colsample_bytree,mean_test_score,std_test_score,rank_test_score,nParam
0,2000,0.5,5,1,0,0.8,1,0.868681,0.001393,1,5
