# Hyperparameter optmization library

Most of the time we can see this type of code

In [12]:
import pandas as pd
import lightgbm as lgb
from sklearn.model_selection import train_test_split

df = pd.read_csv('Churn_Modelling.csv', sep=';')
X = df[['CreditScore', 'Age', 'Tenure', 'Balance','NumOfProducts']]
y = df['Exited']

X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2, random_state=1234)

train_data = lgb.Dataset(X_train, label=y_train)
test_data = lgb.Dataset(X_test, label=y_test, reference=train_data)

params={'objective':'binary',
        'metric':'auc',
        'learning_rate':0.4,
        'max_depth' : 15,
        'num_leaves' : 20,
        'feature_fraction' : 0.8,
        'subsample' : 0.2}
model = lgb.train(params, train_data,
                 num_boost_round=300,
                 early_stopping_rounds=30,
                 valid_sets=[test_data],
                 valid_names=['test'])
score = model.best_score['test']['auc']
print('validation AUC: ', score)

[1]	test's auc: 0.733364
Training until validation scores don't improve for 30 rounds
[2]	test's auc: 0.816237
[3]	test's auc: 0.814695
[4]	test's auc: 0.812469
[5]	test's auc: 0.811224
[6]	test's auc: 0.820794
[7]	test's auc: 0.822028
[8]	test's auc: 0.819399
[9]	test's auc: 0.822647
[10]	test's auc: 0.82188
[11]	test's auc: 0.820608
[12]	test's auc: 0.820398
[13]	test's auc: 0.82055
[14]	test's auc: 0.821607
[15]	test's auc: 0.821256
[16]	test's auc: 0.820178
[17]	test's auc: 0.819268
[18]	test's auc: 0.820213
[19]	test's auc: 0.820616
[20]	test's auc: 0.819415
[21]	test's auc: 0.818184
[22]	test's auc: 0.81951
[23]	test's auc: 0.81866
[24]	test's auc: 0.818284
[25]	test's auc: 0.817409
[26]	test's auc: 0.816728
[27]	test's auc: 0.816026
[28]	test's auc: 0.814758
[29]	test's auc: 0.813723
[30]	test's auc: 0.812126
[31]	test's auc: 0.810655
[32]	test's auc: 0.808349
[33]	test's auc: 0.807348
[34]	test's auc: 0.807702
[35]	test's auc: 0.80844
[36]	test's auc: 0.809444
[37]	test's auc: 

## Step 1: Decouple search parameters from code 

In [13]:
#Put parameters at the top of code
import pandas as pd
import lightgbm as lgb
from sklearn.model_selection import train_test_split

SEARCH_PARAMS={'learning_rate':0.4,
        'max_depth' : 15,
        'num_leaves' : 20,
        'feature_fraction' : 0.8,
        'subsample' : 0.2}

df = pd.read_csv('Churn_Modelling.csv', sep=';')
X = df[['CreditScore', 'Age', 'Tenure', 'Balance','NumOfProducts']]
y = df['Exited']

X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2, random_state=1234)

train_data = lgb.Dataset(X_train, label=y_train)
test_data = lgb.Dataset(X_test, label=y_test, reference=train_data)

params={'objective':'binary',
        'metric':'auc',
        **SEARCH_PARAMS}
model = lgb.train(params, train_data,
                 num_boost_round=300,
                 early_stopping_rounds=30,
                 valid_sets=[test_data],
                 valid_names=['test'])
score = model.best_score['test']['auc']
print('validation AUC: ', score)

[1]	test's auc: 0.733364
Training until validation scores don't improve for 30 rounds
[2]	test's auc: 0.816237
[3]	test's auc: 0.814695
[4]	test's auc: 0.812469
[5]	test's auc: 0.811224
[6]	test's auc: 0.820794
[7]	test's auc: 0.822028
[8]	test's auc: 0.819399
[9]	test's auc: 0.822647
[10]	test's auc: 0.82188
[11]	test's auc: 0.820608
[12]	test's auc: 0.820398
[13]	test's auc: 0.82055
[14]	test's auc: 0.821607
[15]	test's auc: 0.821256
[16]	test's auc: 0.820178
[17]	test's auc: 0.819268
[18]	test's auc: 0.820213
[19]	test's auc: 0.820616
[20]	test's auc: 0.819415
[21]	test's auc: 0.818184
[22]	test's auc: 0.81951
[23]	test's auc: 0.81866
[24]	test's auc: 0.818284
[25]	test's auc: 0.817409
[26]	test's auc: 0.816728
[27]	test's auc: 0.816026
[28]	test's auc: 0.814758
[29]	test's auc: 0.813723
[30]	test's auc: 0.812126
[31]	test's auc: 0.810655
[32]	test's auc: 0.808349
[33]	test's auc: 0.807348
[34]	test's auc: 0.807702
[35]	test's auc: 0.80844
[36]	test's auc: 0.809444
[37]	test's auc: 

## STEP2 : Wrap training and evaluation into a function

In [15]:
import pandas as pd
import lightgbm as lgb
from sklearn.model_selection import train_test_split

SEARCH_PARAMS = {'learning_rate': 0.4,
                 'max_depth': 15,
                 'num_leaves': 20,
                 'feature_fraction': 0.8,
                 'subsample': 0.2}


def train_evaluate(search_params):
    data = pd.read_csv('../data/train.csv', nrows=10000)
    X = df[['CreditScore', 'Age', 'Tenure', 'Balance','NumOfProducts']]
    y = df['Exited']
    X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2, random_state=1234)

    train_data = lgb.Dataset(X_train, label=y_train)
    test_data = lgb.Dataset(X_test, label=y_test, reference=train_data)

    params={'objective':'binary',
            'metric':'auc',
            **SEARCH_PARAMS}
    model = lgb.train(params, train_data,
                     num_boost_round=300,
                     early_stopping_rounds=30,
                     valid_sets=[test_data],
                     valid_names=['test'])
    score = model.best_score['test']['auc']
    
    return score


if __name__ == '__main__':
    score = train_evaluate(SEARCH_PARAMS)
    print('validation AUC:', score)

[1]	test's auc: 0.733364
Training until validation scores don't improve for 30 rounds
[2]	test's auc: 0.816237
[3]	test's auc: 0.814695
[4]	test's auc: 0.812469
[5]	test's auc: 0.811224
[6]	test's auc: 0.820794
[7]	test's auc: 0.822028
[8]	test's auc: 0.819399
[9]	test's auc: 0.822647
[10]	test's auc: 0.82188
[11]	test's auc: 0.820608
[12]	test's auc: 0.820398
[13]	test's auc: 0.82055
[14]	test's auc: 0.821607
[15]	test's auc: 0.821256
[16]	test's auc: 0.820178
[17]	test's auc: 0.819268
[18]	test's auc: 0.820213
[19]	test's auc: 0.820616
[20]	test's auc: 0.819415
[21]	test's auc: 0.818184
[22]	test's auc: 0.81951
[23]	test's auc: 0.81866
[24]	test's auc: 0.818284
[25]	test's auc: 0.817409
[26]	test's auc: 0.816728
[27]	test's auc: 0.816026
[28]	test's auc: 0.814758
[29]	test's auc: 0.813723
[30]	test's auc: 0.812126
[31]	test's auc: 0.810655
[32]	test's auc: 0.808349
[33]	test's auc: 0.807348
[34]	test's auc: 0.807702
[35]	test's auc: 0.80844
[36]	test's auc: 0.809444
[37]	test's auc: 

## Step 3: Run Hypeparameter Tuning script

In [18]:
import skopt
# Import function train_evaluate


In [19]:
#Define the Search Space
SPACE =[
    skopt.space.Real(0.01, 0.5, name='learning_rate', prior='log_uniform'),
    skopt.space.Integer(1, 30, name='max_depth'),
    skopt.space.Integer(2, 100, name='num_leaves'),
    skopt.space.Real(0.01, 1.0, name='feature_fraction', prior='uniform'),
    skopt.space.Real(0.01, 1.0, name='subsample', prior='uniform')]

In [20]:
#Fonction pour évaluer en fonction de l'espace de recherche
@skopt.utils.use_named_args(SPACE)
def objective(**params):
    return -1.0 * train_evaluate(params)

In [24]:
#On enregistre les résultats 
results = skopt.forest_minimize(objective, SPACE, n_calls=30, n_random_starts=10)
best_auc = -1.0 * results.fun
best_params = results.x
print('best result: ', best_auc)
print('best paramters: ', best_params)

[1]	test's auc: 0.733364
Training until validation scores don't improve for 30 rounds
[2]	test's auc: 0.816237
[3]	test's auc: 0.814695
[4]	test's auc: 0.812469
[5]	test's auc: 0.811224
[6]	test's auc: 0.820794
[7]	test's auc: 0.822028
[8]	test's auc: 0.819399
[9]	test's auc: 0.822647
[10]	test's auc: 0.82188
[11]	test's auc: 0.820608
[12]	test's auc: 0.820398
[13]	test's auc: 0.82055
[14]	test's auc: 0.821607
[15]	test's auc: 0.821256
[16]	test's auc: 0.820178
[17]	test's auc: 0.819268
[18]	test's auc: 0.820213
[19]	test's auc: 0.820616
[20]	test's auc: 0.819415
[21]	test's auc: 0.818184
[22]	test's auc: 0.81951
[23]	test's auc: 0.81866
[24]	test's auc: 0.818284
[25]	test's auc: 0.817409
[26]	test's auc: 0.816728
[27]	test's auc: 0.816026
[28]	test's auc: 0.814758
[29]	test's auc: 0.813723
[30]	test's auc: 0.812126
[31]	test's auc: 0.810655
[32]	test's auc: 0.808349
[33]	test's auc: 0.807348
[34]	test's auc: 0.807702
[35]	test's auc: 0.80844
[36]	test's auc: 0.809444
[37]	test's auc: 

[12]	test's auc: 0.820398
[13]	test's auc: 0.82055
[14]	test's auc: 0.821607
[15]	test's auc: 0.821256
[16]	test's auc: 0.820178
[17]	test's auc: 0.819268
[18]	test's auc: 0.820213
[19]	test's auc: 0.820616
[20]	test's auc: 0.819415
[21]	test's auc: 0.818184
[22]	test's auc: 0.81951
[23]	test's auc: 0.81866
[24]	test's auc: 0.818284
[25]	test's auc: 0.817409
[26]	test's auc: 0.816728
[27]	test's auc: 0.816026
[28]	test's auc: 0.814758
[29]	test's auc: 0.813723
[30]	test's auc: 0.812126
[31]	test's auc: 0.810655
[32]	test's auc: 0.808349
[33]	test's auc: 0.807348
[34]	test's auc: 0.807702
[35]	test's auc: 0.80844
[36]	test's auc: 0.809444
[37]	test's auc: 0.808507
[38]	test's auc: 0.806826
[39]	test's auc: 0.805477
Early stopping, best iteration is:
[9]	test's auc: 0.822647
[1]	test's auc: 0.733364
Training until validation scores don't improve for 30 rounds
[2]	test's auc: 0.816237
[3]	test's auc: 0.814695
[4]	test's auc: 0.812469
[5]	test's auc: 0.811224
[6]	test's auc: 0.820794
[7]	t

[1]	test's auc: 0.733364
Training until validation scores don't improve for 30 rounds
[2]	test's auc: 0.816237
[3]	test's auc: 0.814695
[4]	test's auc: 0.812469
[5]	test's auc: 0.811224
[6]	test's auc: 0.820794
[7]	test's auc: 0.822028
[8]	test's auc: 0.819399
[9]	test's auc: 0.822647
[10]	test's auc: 0.82188
[11]	test's auc: 0.820608
[12]	test's auc: 0.820398
[13]	test's auc: 0.82055
[14]	test's auc: 0.821607
[15]	test's auc: 0.821256
[16]	test's auc: 0.820178
[17]	test's auc: 0.819268
[18]	test's auc: 0.820213
[19]	test's auc: 0.820616
[20]	test's auc: 0.819415
[21]	test's auc: 0.818184
[22]	test's auc: 0.81951
[23]	test's auc: 0.81866
[24]	test's auc: 0.818284
[25]	test's auc: 0.817409
[26]	test's auc: 0.816728
[27]	test's auc: 0.816026
[28]	test's auc: 0.814758
[29]	test's auc: 0.813723
[30]	test's auc: 0.812126
[31]	test's auc: 0.810655
[32]	test's auc: 0.808349
[33]	test's auc: 0.807348
[34]	test's auc: 0.807702
[35]	test's auc: 0.80844
[36]	test's auc: 0.809444
[37]	test's auc: 

[1]	test's auc: 0.733364
Training until validation scores don't improve for 30 rounds
[2]	test's auc: 0.816237
[3]	test's auc: 0.814695
[4]	test's auc: 0.812469
[5]	test's auc: 0.811224
[6]	test's auc: 0.820794
[7]	test's auc: 0.822028
[8]	test's auc: 0.819399
[9]	test's auc: 0.822647
[10]	test's auc: 0.82188
[11]	test's auc: 0.820608
[12]	test's auc: 0.820398
[13]	test's auc: 0.82055
[14]	test's auc: 0.821607
[15]	test's auc: 0.821256
[16]	test's auc: 0.820178
[17]	test's auc: 0.819268
[18]	test's auc: 0.820213
[19]	test's auc: 0.820616
[20]	test's auc: 0.819415
[21]	test's auc: 0.818184
[22]	test's auc: 0.81951
[23]	test's auc: 0.81866
[24]	test's auc: 0.818284
[25]	test's auc: 0.817409
[26]	test's auc: 0.816728
[27]	test's auc: 0.816026
[28]	test's auc: 0.814758
[29]	test's auc: 0.813723
[30]	test's auc: 0.812126
[31]	test's auc: 0.810655
[32]	test's auc: 0.808349
[33]	test's auc: 0.807348
[34]	test's auc: 0.807702
[35]	test's auc: 0.80844
[36]	test's auc: 0.809444
[37]	test's auc: 

# Visualize training with Neptune


In [28]:
import neptune
import neptunecontrib.monitoring.skopt as sk_utils
import skopt

In [37]:
#neptune.init('RomainLeclair/Test')
#neptune.create_experiment(name='Test', upload_source_files=['*.py'])
#We need to create an account 

MissingApiToken: Missing API token. Use "NEPTUNE_API_TOKEN" environment variable or pass it as an argument