<a href="https://colab.research.google.com/github/ThierrryScotto/Optuna-XGBoost-hyperparameter/blob/main/Optuna_XGBoost_hyperparameter.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import warnings
import sklearn

sns.set()
%config InlineBeckend.figure_formar = 'retina'
warnings.filterwarnings("ignore")

## Loading the boston dataset

In [2]:
from sklearn.datasets import load_boston
boston = load_boston()

In [3]:
df = pd.DataFrame(boston.data, columns = boston.feature_names)
df['target']  = boston.target

In [4]:
df

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,target
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.0900,1.0,296.0,15.3,396.90,4.98,24.0
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.90,9.14,21.6
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3.0,222.0,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3.0,222.0,18.7,396.90,5.33,36.2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
501,0.06263,0.0,11.93,0.0,0.573,6.593,69.1,2.4786,1.0,273.0,21.0,391.99,9.67,22.4
502,0.04527,0.0,11.93,0.0,0.573,6.120,76.7,2.2875,1.0,273.0,21.0,396.90,9.08,20.6
503,0.06076,0.0,11.93,0.0,0.573,6.976,91.0,2.1675,1.0,273.0,21.0,396.90,5.64,23.9
504,0.10959,0.0,11.93,0.0,0.573,6.794,89.3,2.3889,1.0,273.0,21.0,393.45,6.48,22.0


In [5]:
X = df.iloc[:, df.columns != 'target']
y = df.target

### Scaling data

In [6]:
from sklearn.preprocessing import StandardScaler
se = StandardScaler()
X = se.fit_transform(X)

### Split data into training and test

In [7]:
import xgboost as xgb
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=12)

**Converting the training and test data into xgboost optimized matrix for efficiency**

In [8]:
df_train = xgb.DMatrix(X_train, y_train)
df_test = xgb.DMatrix(X_test, y_test)

### Testing a base model

In [9]:
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import KFold, cross_val_score, ShuffleSplit

cv_ = KFold(n_splits=10)
xg_reg = xgb.XGBRFRegressor()
scores = cross_val_score(xg_reg, X_train, y_train, scoring='neg_root_mean_squared_error', n_jobs=-1, cv=cv_)

In [10]:
print(np.mean(scores), np.std(scores))
print(scores)

-4.690851864521107 0.9137364320425729
[-4.46876379 -4.44228761 -4.73628169 -5.57790661 -3.89063134 -5.08357565
 -5.49483956 -3.32361937 -6.36116408 -3.52944895]


#### Creating a scoring funtion with the help of XGBOOST train inbuilt early stopping
here we are not tuning the n_estimators with the help of optuna

we will tune the n_estimators with the help of early_stopping by using the xgboost.train

In [11]:
import re
def return_rmse(params):
  model = xgb.train(params, df_train, num_boost_round=600, evals=[(df_test, 'eval')], early_stopping_rounds=20, verbose_eval=0)
  result = model.eval(df_test)
  result = np.float(re.search(r'[\d.]+$', result).group(0))
  print(result)
  return result

#### importing optuna

In [12]:
!pip install optuna

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting optuna
  Downloading optuna-3.1.0-py3-none-any.whl (365 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m365.3/365.3 KB[0m [31m14.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting colorlog
  Downloading colorlog-6.7.0-py2.py3-none-any.whl (11 kB)
Collecting cmaes>=0.9.1
  Downloading cmaes-0.9.1-py3-none-any.whl (21 kB)
Collecting alembic>=1.5.0
  Downloading alembic-1.9.2-py3-none-any.whl (210 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m210.6/210.6 KB[0m [31m5.8 MB/s[0m eta [36m0:00:00[0m
Collecting Mako
  Downloading Mako-1.2.4-py3-none-any.whl (78 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m78.7/78.7 KB[0m [31m6.1 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: Mako, colorlog, cmaes, alembic, optuna
Successfully installed Mako-1.2.4 alembic-1.9.2 cmaes-0.9.1 colorlog-6.7.0 optuna-3.1.0


In [13]:
import optuna
from optuna import Trial, visualization
from optuna.samplers import TPESampler

In [14]:
def objective(trial):
    param = {
                # "n_estimators" : trial.suggest_int('n_estimators', 0, 500),
                'max_depth':trial.suggest_int('max_depth', 3, 5),
                'reg_alpha':trial.suggest_uniform('reg_alpha', 0, 6),
                'reg_lambda':trial.suggest_uniform('reg_lambda', 0, 2),
                'min_child_weight':trial.suggest_int('min_child_weight', 0, 5),
                'gamma':trial.suggest_uniform('gamma', 0, 4),
                'learning_rate':trial.suggest_loguniform('learning_rate', 0.05, 0.5),
                'colsample_bytree':trial.suggest_uniform('colsample_bytree', 0.4, 0.9),
                'subsample':trial.suggest_uniform('subsample', 0.4, 0.9),
                'nthread' : -1
            }
    return(return_rmse(param)) # this will return the rmse score 

**calling the optuna study**

In [15]:
stady1 = optuna.create_study(direction='minimize', sampler=TPESampler())
stady1.optimize(objective, n_trials=1050, show_progress_bar=True)

[32m[I 2023-01-19 18:54:26,470][0m A new study created in memory with name: no-name-a1941b99-13b8-417f-a28a-9f7bb0c8bca5[0m


  0%|          | 0/600 [00:00<?, ?it/s]

3.468684
[32m[I 2023-01-19 18:54:26,728][0m Trial 0 finished with value: 3.468684 and parameters: {'max_depth': 3, 'reg_alpha': 2.265347421967942, 'reg_lambda': 1.1290769397379739, 'min_child_weight': 4, 'gamma': 3.251359379635875, 'learning_rate': 0.054882225673478814, 'colsample_bytree': 0.7242144102903918, 'subsample': 0.5003298237869453}. Best is trial 0 with value: 3.468684.[0m
3.542384
[32m[I 2023-01-19 18:54:26,906][0m Trial 1 finished with value: 3.542384 and parameters: {'max_depth': 3, 'reg_alpha': 3.6921165960313607, 'reg_lambda': 1.7073068524892399, 'min_child_weight': 3, 'gamma': 2.9720675529725638, 'learning_rate': 0.09178221589785004, 'colsample_bytree': 0.6538715903546402, 'subsample': 0.5970018594254421}. Best is trial 0 with value: 3.468684.[0m
3.344288
[32m[I 2023-01-19 18:54:27,036][0m Trial 2 finished with value: 3.344288 and parameters: {'max_depth': 3, 'reg_alpha': 3.325008971886185, 'reg_lambda': 0.2511914040743275, 'min_child_weight': 1, 'gamma': 0.5444

#### **Printing the best estimators**

In [16]:
return_rmse(stady1.best_params)

2.81926


2.81926

In [17]:
trial = stady1.best_trial
print('Accuracy: {}'.format(trial.value))

Accuracy: 2.81926


In [18]:
stady1.best_params

{'max_depth': 5,
 'reg_alpha': 3.368664485855749,
 'reg_lambda': 0.5526958952595741,
 'min_child_weight': 0,
 'gamma': 1.821893201719774,
 'learning_rate': 0.13815247495837762,
 'colsample_bytree': 0.4765102760758137,
 'subsample': 0.42283255162742595}

## Without using any hypeparameter tuning

In [20]:
params = {}
print(f"Without tuning {return_rmse(params)}\n")
print(f"With tuning {return_rmse(stady1.best_params)}")

3.214825
Without tuning 3.214825

2.81926
With tuning 2.81926


### Plotting of our search history

In [21]:
optuna.visualization.plot_optimization_history(stady1)

In [22]:
optuna.visualization.plot_slice(stady1)

## Tuning again with more narrower values to get the sweet spot

In [23]:
def objective(trial):
   
    param = {
                # "n_estimators" : trial.suggest_int('n_estimators', 0, 500),
                'max_depth':trial.suggest_int('max_depth',4,4),
                'reg_alpha':trial.suggest_uniform('reg_alpha',0,2),
                'reg_lambda':trial.suggest_uniform('reg_lambda',0.5,1),
                'min_child_weight':trial.suggest_int('min_child_weight',1,1),
                'gamma':trial.suggest_int('gamma',1,1),
                'learning_rate':trial.suggest_loguniform('learning_rate',0.2,0.4),
                'colsample_bytree':trial.suggest_uniform('colsample_bytree',0.4,0.6),
                'subsample':trial.suggest_uniform('subsample',0.4,0.5),
                'nthread' : -1
             }
    return(return_rmse(param))

**Calling the optuna study**

In [25]:
stady2 = optuna.create_study(direction='minimize', sampler=TPESampler())
stady2.optimize(objective, n_trials=1050, show_progress_bar=True)

[32m[I 2023-01-19 20:36:21,752][0m A new study created in memory with name: no-name-0b42abf8-3678-44e0-811b-be26438ee81c[0m


  0%|          | 0/1050 [00:00<?, ?it/s]

3.191886
[32m[I 2023-01-19 20:36:21,884][0m Trial 0 finished with value: 3.191886 and parameters: {'max_depth': 4, 'reg_alpha': 0.6174645616129819, 'reg_lambda': 0.9759749542127397, 'min_child_weight': 1, 'gamma': 1, 'learning_rate': 0.2542150562797708, 'colsample_bytree': 0.5698239690679803, 'subsample': 0.48013052955639646}. Best is trial 0 with value: 3.191886.[0m
3.039721
[32m[I 2023-01-19 20:36:22,001][0m Trial 1 finished with value: 3.039721 and parameters: {'max_depth': 4, 'reg_alpha': 1.917145785194054, 'reg_lambda': 0.8563336185944499, 'min_child_weight': 1, 'gamma': 1, 'learning_rate': 0.25306349528818295, 'colsample_bytree': 0.4671384625237996, 'subsample': 0.43767145986941963}. Best is trial 1 with value: 3.039721.[0m
2.994627
[32m[I 2023-01-19 20:36:22,147][0m Trial 2 finished with value: 2.994627 and parameters: {'max_depth': 4, 'reg_alpha': 1.1196262108283572, 'reg_lambda': 0.6658885390361273, 'min_child_weight': 1, 'gamma': 1, 'learning_rate': 0.2238759071227670

In [26]:
optuna.visualization.plot_optimization_history(stady2)

In [27]:
stady2.best_params

{'max_depth': 4,
 'reg_alpha': 0.5334492598739983,
 'reg_lambda': 0.9432950701807773,
 'min_child_weight': 1,
 'gamma': 1,
 'learning_rate': 0.3418663078988343,
 'colsample_bytree': 0.4960315732912387,
 'subsample': 0.43890823674388835}

In [29]:
params = {}
print(f"withou tuning: {return_rmse(params)}\n")
print(f"With tuning params 1: {return_rmse(stady1.best_params)}\n")
print(f"With tuning params 2: {return_rmse(stady2.best_params)}")

3.214825
withou tuning: 3.214825

2.81926
With tuning params 1: 2.81926

2.579911
With tuning params 2: 2.579911
