In [1]:
import pandas as pd
from xgboost_regression_hyperopt import xgboost_regressor

# read training set 
df = pd.read_csv('original_data/train_one_hot.csv')
df.drop('Unnamed: 0',axis=1,inplace=True)

# read testing set
df_test = pd.read_csv('original_data/test_one_hot.csv')
df_test.drop('Unnamed: 0',axis=1,inplace=True)


# define func params: 
# - X_train_initial: features of training set
# - y_train_initial: target of training set
# - X_test: features of testing set
# - y_test: target of testing set
# - score: rmse or mape
# - space: the space over which hyperopt will search for optimal hyperparameters.
# - n_evals: n° of evals for hyperopt tuning

X_train_initial = df.drop('SalePrice',axis=1)
y_train_initial = df['SalePrice']

X_test = df_test.drop('SalePrice',axis=1)
y_test = df_test['SalePrice']

n_evals = 250

space = {'max_depth': scope.int(hp.quniform("max_depth", 1, 10, 1)),
            'gamma': hp.uniform ('gamma', 0,10),
            'reg_alpha' : hp.uniform('reg_alpha', 0,100),
            'reg_lambda' : hp.uniform('reg_lambda', 0,100),
            'colsample_bytree' : hp.uniform('colsample_bytree', 0,1),
            'subsample' : hp.uniform('subsample', 0, 1),
            'colsample_bylevel' : hp.uniform('colsample_bylevel', 0, 1),
            'min_child_weight' : hp.uniform('min_child_weight', 0, 12),
            'n_estimators': scope.int(hp.quniform("n_estimators", 150, 1500, 1)),
            'learning_rate': hp.uniform('learning_rate', 0, 1),
            'max_bin' : scope.int(hp.quniform('max_bin', 200, 550, 1)),
            'random_state': 5}

In [2]:
score = 'mape'
xgb_initial, xgb_opt = xgboost_regressor(X_train_initial,y_train_initial,X_test,y_test,score,space,n_evals)

-> Baseline error (mean of Y target):
Root Mean Sqarred Error: 57482.5697
Mean Absolute Percentage Error %: 19.4426


 -> Error on train data cv:
Root Mean Sqarred Error: 43222.3027
Mean Absolute Percentage Error %: 17.3174



 -> Error initial xgb model on val data:
Root Mean Sqarred Error: 46767.4067
Mean Absolute Percentage Error %: 17.2762


 -> Starting hyperparams tuning with hyperopt
100%|█████████████████████████████████████████████| 250/250 [02:52<00:00,  1.45trial/s, best loss: 0.15953739971168476]


best params mape: {'colsample_bylevel': 0.5348391630776407, 'colsample_bytree': 0.5364190076317654, 'gamma': 1.1321193364033895, 'learning_rate': 0.06937312157745071, 'max_bin': 219.0, 'max_depth': 4.0, 'min_child_weight': 3.867810574636146, 'n_estimators': 882.0, 'reg_alpha': 2.7132658196055957, 'reg_lambda': 48.31494840701946, 'subsample': 0.1867084063520722}


 -> Error on val data after tuning hyperopt:
Root Mean Sqarred Error: 52013.2486
Mean Absolute Percentage Error %: 15.

In [3]:
score = 'rmse'
xgb_initial, xgb_opt = xgboost_regressor(X_train_initial,y_train_initial,X_test,y_test,score,space,n_evals)

-> Baseline error (mean of Y target):
Root Mean Sqarred Error: 57482.5697
Mean Absolute Percentage Error %: 19.4426


 -> Error on train data cv:
Root Mean Sqarred Error: 43222.3027
Mean Absolute Percentage Error %: 17.3174



 -> Error initial xgb model on val data:
Root Mean Sqarred Error: 46767.4067
Mean Absolute Percentage Error %: 17.2762


 -> Starting hyperparams tuning with hyperopt
100%|██████████████████████████████████████████████| 250/250 [03:36<00:00,  1.16trial/s, best loss: 42068.847130831746]


best params rmse: {'colsample_bylevel': 0.49763324940852327, 'colsample_bytree': 0.6031966233377983, 'gamma': 9.505654791356779, 'learning_rate': 0.2596734600176408, 'max_bin': 205.0, 'max_depth': 2.0, 'min_child_weight': 5.105792831476495, 'n_estimators': 766.0, 'reg_alpha': 26.62060996824292, 'reg_lambda': 10.212475032250039, 'subsample': 0.6802136317262382}


 -> Error on val data after tuning hyperopt:
Root Mean Sqarred Error: 42068.8471
Mean Absolute Percentage Error %: 17.7