# Hyperopt guide

- [Link to source](https://www.kaggle.com/code/prashant111/a-guide-on-xgboost-hyperparameters-tuning#3.-Basic-Setup-)
---

In [None]:
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.metrics import accuracy_score
from hyperopt import STATUS_OK, Trials, fmin, hp, tpe
import os

for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [None]:
data = '/kaggle/input/wholesale-customers-data-set/Wholesale customers data.csv'

df = pd.read_csv(data)

In [None]:
X = df.drop('Channel', axis=1)

y = df['Channel']

In [None]:
y[y == 2] = 0

y[y == 1] = 1

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, 
                                                    y, 
                                                    test_size = 0.3, 
                                                    random_state = 0)

In [None]:
space={'max_depth': hp.quniform("max_depth", 3, 18, 1),
        'gamma': hp.uniform ('gamma', 1,9),
        'reg_alpha' : hp.quniform('reg_alpha', 40,180,1),
        'reg_lambda' : hp.uniform('reg_lambda', 0,1),
        'colsample_bytree' : hp.uniform('colsample_bytree', 0.5,1),
        'min_child_weight' : hp.quniform('min_child_weight', 0, 10, 1),
        'n_estimators': 180,
        'seed': 0
    }

The available hyperopt optimization algorithms are

- hp.choice(label, options) — Returns one of the options, which should be a list or tuple.
- hp.randint(label, upper) — Returns a random integer between the range [0, upper).

- hp.uniform(label, low, high) — Returns a value uniformly between low and high.

- hp.quniform(label, low, high, q) — Returns a value round(uniform(low, high) / q) * q, i.e it rounds the decimal values and returns an integer.

- hp.normal(label, mean, std) — Returns a real value that’s normally-distributed with mean and standard deviation sigma.

In [None]:
def objective(space):
    clf=xgb.XGBClassifier(
        n_estimators = space['n_estimators'], 
        max_depth = int(space['max_depth']), 
        gamma = space['gamma'],
        reg_alpha = int(space['reg_alpha']),
        min_child_weight = int(space['min_child_weight']),
        colsample_bytree = int(space['colsample_bytree']))
    
    evaluation = [( X_train, y_train), ( X_test, y_test)]
    
    clf.fit(
        X_train, 
        y_train,
        eval_set = evaluation, 
        eval_metric="auc",
        early_stopping_rounds = 10,
        verbose = False)
    

    pred = clf.predict(X_test)
    accuracy = accuracy_score(y_test, pred>0.5)
    print ("SCORE:", accuracy)
    return {'loss': -accuracy, 'status': STATUS_OK }

In [None]:
trials = Trials()

best_hyperparams = fmin(
    fn = objective,
    space = space,
    algo = tpe.suggest,
    max_evals = 100,
    trials = trials)

In [None]:
print("The best hyperparameters are : ","\n")
print(best_hyperparams)