# Hyperparameter Optimization - HyperOpt
### Table of Content
I. [**Data Preparation**](#i)<br>
II. [**HyperOpt**](#ii)<br>
<a id = 'top'>

Reference: <a href='https://medium.com/analytics-vidhya/hyperparameter-tuning-hyperopt-bayesian-optimization-for-xgboost-and-neural-network-8aedf278a1c9'>HyperParameter Tuning — Hyperopt Bayesian Optimization</a>

In [15]:
#!pip install pyspark
#!pip install mlflow
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score
from hyperopt import fmin, tpe, hp, SparkTrials, Trials, STATUS_OK
from hyperopt.pyll import scope
import xgboost as xgb
import pyspark
import warnings
warnings.filterwarnings('ignore')

<br><a id="i"></a><div align = "right"><a href = '#top'>Back to Top</a></div>

## I. Data Preparation
***

In [3]:
df_train = pd.read_csv('../data/Preprocessed_data_with_date/airplane_train_processed_date.csv')
df_val = pd.read_csv('../data/Preprocessed_data_with_date/airplane_test_processed_date.csv')

In [4]:
features = ['Gender_Female', 'Customer Type_Loyal Customer',
       'Type of Travel_Business travel',
       'Type of Travel_Personal Travel', 'Class_Business',
       'Class_Eco', 'Age',
       'Flight Distance', 'Departure Delay in Minutes',
       'Arrival Delay in Minutes', 'Inflight wifi service',
       'Departure/Arrival time convenient',
       'Ease of Online booking', 'Gate location',
       'Food and drink', 'Online boarding',
       'Seat comfort', 'Inflight entertainment',
       'On-board service', 'Leg room service',
       'Baggage handling', 'Checkin service',
       'Inflight service', 'Cleanliness']

le = LabelEncoder()
X_train = df_train[features]
y_train = df_train['satisfaction']
y_train = le.fit_transform(y_train)

X_val = df_val[features]
y_val = df_val['satisfaction']
y_val = le.fit_transform(y_val)

X_train.shape, X_val.shape, y_train.shape, y_val.shape

((102825, 24), (25976, 24), (102825,), (25976,))

<br><a id="ii"></a><div align = "right"><a href = '#top'>Back to Top</a></div>

## II. HyperOpt
***

In [45]:
search_space = {
    'reg_lambda': hp.loguniform('reg_lambda', 1e-3, 5.0),
    'alpha': hp.loguniform('alpha', 1e-3, 5.0),
    'colsample_bytree': hp.uniform('colsample_bytree', 0.5, 1.0),
    'subsample': hp.uniform('subsample', 0.5, 1.0),
    'learning_rate': hp.loguniform('learning_rate', -1, 0.05),
    'n_estimators': 1000,
    'max_depth': scope.int(hp.quniform('max_depth', 5, 40, 5)),
    'random_state': 15,
    'min_child_weight': hp.loguniform('min_child_weight', -1, 30),
     'objective': 'binary:logistic'
}    
    

In [46]:
def hyperopt_tune(search_space=search_space, X_train = X_train, y_train = y_train):
    train_x, test_x, train_y, test_y = train_test_split(X_train, y_train, test_size=0.3,random_state=15)
    
    model = xgb.XGBClassifier(**search_space)
    evaluation = [( train_x, train_y), ( test_x, test_y)]
    
    model.fit(train_x, train_y,
            eval_set=evaluation, eval_metric="error",
            early_stopping_rounds=100,verbose=False)

    pred_y = model.predict(test_x)
    accuracy = accuracy_score(test_y, pred_y)
    
    return {'loss': -accuracy, 'status': STATUS_OK, 'model': model}

In [47]:
trials = Trials()
best = fmin(fn=hyperopt_tune,
            space=search_space,
            algo=tpe.suggest,
            max_evals=100,
            trials=trials)

print (best)

100%|██████| 100/100 [26:03<00:00, 15.63s/trial, best loss: -0.9599974066390041]
{'alpha': 2.1473865088650417, 'colsample_bytree': 0.8446206452199106, 'learning_rate': 0.36844521941706876, 'max_depth': 10.0, 'min_child_weight': 0.3870279128386974, 'reg_lambda': 1.575148448345619, 'subsample': 0.871280112643366}


{'alpha': 2.1473865088650417, 'colsample_bytree': 0.8446206452199106, 'learning_rate': 0.36844521941706876, 'max_depth': 10.0, 'min_child_weight': 0.3870279128386974, 'reg_lambda': 1.575148448345619, 'subsample': 0.871280112643366}

In [50]:
final_model = xgb.XGBClassifier(**{'alpha': 2.1473865088650417, 'colsample_bytree': 0.8446206452199106, 'learning_rate': 0.36844521941706876, 'max_depth': 10, 'min_child_weight': 0.3870279128386974, 'reg_lambda': 1.575148448345619, 'subsample': 0.871280112643366})
final_model.fit(X_train, y_train)
y_val_pred = final_model.predict(X_val)
accuracy_score(y_val, y_val_pred)



0.9591161071758546