# Hyperparameter Optimization 
This notebook deals with the optimization of the hyperparameters of the forecasting algorithms. This task is done with the hyperopt and hpsklearn package.

## Import Statements

In [1]:
# import statements 
import numpy as np
import pandas as pd
import seaborn as sns
from matplotlib import pyplot as plt

# pre processing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, MinMaxScaler

# hyperopt library
from hpsklearn import HyperoptEstimator, gradient_boosting_regressor, mlp_regressor 
from hpsklearn import linear_regression, xgboost_regression, decision_tree_regressor
from hyperopt import tpe

## Data Preprocessing
The data gets preprocessed in the same way as it gets in the Pipeline.

In [2]:
# load data
data = pd.read_csv("data.csv")

# feature selection 
data = data.drop(['id', 'country'], axis=1)

# one hot encoding
data = pd.get_dummies(data=data, columns=['ship_mode', 'segment', 'region', 'state', 'city', 'postal_code', 'category', 'sub_category'])

X = data.drop('profit', axis=1)
y = data['profit']

# train test split 
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# scaling
scaler = MinMaxScaler()
X_train[['discount', 'sales', 'quantity']] = scaler.fit_transform(X_train[['discount', 'sales', 'quantity']])
X_test[['discount', 'sales', 'quantity']] = scaler.transform(X_test[['discount', 'sales', 'quantity']])

## Hyperparameter of gradient boosting regressor

In [3]:
estim = HyperoptEstimator(regressor=gradient_boosting_regressor('myGBR'), algo=tpe.suggest, max_evals=50, trial_timeout=120)
estim.fit(X_train, y_train)
print(estim.score(X_test, y_test))
print(estim.best_model())



100%|██████████| 1/1 [00:00<00:00,  1.29trial/s, best loss: 0.9733515736051027]
100%|██████████| 2/2 [00:00<00:00,  1.64trial/s, best loss: 0.9569086082668753]
100%|██████████| 3/3 [00:00<00:00,  1.33trial/s, best loss: 0.9569086082668753]
100%|██████████| 4/4 [00:02<00:00,  2.44s/trial, best loss: 0.9511208572013606]
100%|██████████| 5/5 [00:00<00:00,  1.26trial/s, best loss: 0.2753573255932402]
100%|██████████| 6/6 [00:10<00:00, 10.08s/trial, best loss: 0.2753573255932402]
100%|██████████| 7/7 [00:00<00:00,  1.36trial/s, best loss: 0.2753573255932402]
100%|██████████| 8/8 [00:19<00:00, 19.93s/trial, best loss: 0.2753573255932402]
100%|██████████| 9/9 [00:00<00:00,  1.29trial/s, best loss: 0.2753573255932402]
100%|██████████| 10/10 [00:00<00:00,  1.58trial/s, best loss: 0.2753573255932402]
100%|██████████| 11/11 [00:25<00:00, 25.51s/trial, best loss: 0.2753573255932402]
100%|██████████| 12/12 [00:03<00:00,  3.02s/trial, best loss: 0.2753573255932402]
100%|██████████| 13/13 [00:10<00:0



## Hyperparameter of linear regression

In [34]:
estim = HyperoptEstimator(regressor=linear_regression('linear'), algo=tpe.suggest, max_evals=50, trial_timeout=120)
estim.fit(X_train, y_train)
print(estim.score(X_test, y_test))
print(estim.best_model())

100%|██████████| 1/1 [00:00<00:00,  1.16trial/s, best loss: 1.37424546058474e+22]
100%|██████████| 2/2 [00:00<00:00,  1.05trial/s, best loss: 1.37424546058474e+22]
100%|██████████| 3/3 [00:00<00:00,  1.12trial/s, best loss: 1.37424546058474e+22]
100%|██████████| 4/4 [00:00<00:00,  1.51trial/s, best loss: 0.9401900300847753]
100%|██████████| 5/5 [00:00<00:00,  1.15trial/s, best loss: 0.9401900300847753]
100%|██████████| 6/6 [00:00<00:00,  1.17trial/s, best loss: 0.9401900300847753]
100%|██████████| 7/7 [00:00<00:00,  1.16trial/s, best loss: 0.9401900300847753]
100%|██████████| 8/8 [00:00<00:00,  1.32trial/s, best loss: 0.919073013318271]
100%|██████████| 9/9 [00:00<00:00,  1.20trial/s, best loss: 0.919073013318271]
100%|██████████| 10/10 [00:00<00:00,  1.20trial/s, best loss: 0.919073013318271]
100%|██████████| 11/11 [00:00<00:00,  1.22trial/s, best loss: 0.919073013318271]
100%|██████████| 12/12 [00:00<00:00,  1.47trial/s, best loss: 0.919073013318271]
100%|██████████| 13/13 [00:00<00:



## Hyperparameter of xgboost 

In [4]:
estim = HyperoptEstimator(regressor=xgboost_regression('xgboost'), algo=tpe.suggest, max_evals=50, trial_timeout=120)
estim.fit(X_train, y_train)
print(estim.score(X_test, y_test))
print(estim.best_model())

100%|██████████| 1/1 [00:19<00:00, 19.81s/trial, best loss: 0.1696678403711025]
100%|██████████| 2/2 [00:31<00:00, 31.56s/trial, best loss: 0.1696678403711025]
100%|██████████| 3/3 [00:06<00:00,  6.85s/trial, best loss: 0.1696678403711025]
100%|██████████| 4/4 [00:27<00:00, 27.05s/trial, best loss: 0.1696678403711025]
100%|██████████| 5/5 [02:00<00:00, 120.68s/trial, best loss: 0.1696678403711025]
100%|██████████| 6/6 [00:05<00:00,  5.08s/trial, best loss: 0.1696678403711025]
100%|██████████| 7/7 [00:59<00:00, 59.25s/trial, best loss: 0.1696678403711025]
100%|██████████| 8/8 [02:00<00:00, 120.68s/trial, best loss: 0.1696678403711025]
100%|██████████| 9/9 [00:08<00:00,  8.77s/trial, best loss: 0.1696678403711025]
100%|██████████| 10/10 [00:13<00:00, 13.92s/trial, best loss: 0.1696678403711025]
100%|██████████| 11/11 [02:00<00:00, 120.50s/trial, best loss: 0.1696678403711025]
100%|██████████| 12/12 [01:07<00:00, 67.78s/trial, best loss: 0.1696678403711025]
100%|██████████| 13/13 [01:10<0



## Hyperparameter of decision tree regressor

In [8]:
estim = HyperoptEstimator(regressor=decision_tree_regressor('dec_tree'), algo=tpe.suggest, max_evals=50, trial_timeout=120)
estim.fit(X_train, y_train)
print(estim.score(X_test, y_test))
print(estim.best_model())

100%|██████████| 1/1 [00:00<00:00,  1.31trial/s, best loss: 1.3069011411425981]
100%|██████████| 2/2 [00:00<00:00,  1.36trial/s, best loss: 0.9295062070055056]
100%|██████████| 3/3 [00:00<00:00,  1.47trial/s, best loss: 0.9295062070055056]
100%|██████████| 4/4 [00:00<00:00,  1.61trial/s, best loss: 0.48217238816337604]
100%|██████████| 5/5 [00:00<00:00,  1.51trial/s, best loss: 0.26133802543014173]
100%|██████████| 6/6 [00:00<00:00,  1.65trial/s, best loss: 0.26133802543014173]
100%|██████████| 7/7 [00:01<00:00,  1.25s/trial, best loss: 0.26133802543014173]
100%|██████████| 8/8 [00:06<00:00,  6.20s/trial, best loss: 0.26133802543014173]
100%|██████████| 9/9 [00:00<00:00,  1.19trial/s, best loss: 0.26133802543014173]
100%|██████████| 10/10 [00:02<00:00,  2.29s/trial, best loss: 0.26133802543014173]
100%|██████████| 11/11 [00:00<00:00,  1.63trial/s, best loss: 0.26133802543014173]
100%|██████████| 12/12 [00:00<00:00,  1.67trial/s, best loss: 0.26133802543014173]
100%|██████████| 13/13 [0

