# LightGBM parameter tuning using Baysian optimization

In [None]:
from fastai import *
import numpy as np
import pandas as pd
import pandas_profiling
import matplotlib.pyplot as plt
import seaborn as sns
import lightgbm as lgb
from skopt.space import Real, Integer
from skopt.utils import use_named_args
import itertools
from sklearn.metrics import roc_auc_score
from skopt import gp_minimize
from sklearn.model_selection import train_test_split, ParameterGrid
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier

In [None]:
TRAIN = 'data/train.csv'
TEST = 'data/test.csv'
SAMPLE = 'data/sample_submission.csv'
train = pd.read_csv(TRAIN)
test = pd.read_csv(TEST)
X = train.drop(['ID_code', 'target'], axis=1)
y = train.target
X_train, X_test, y_train, y_test = train_test_split(X, y)

In [None]:
??gp_minimize

In [None]:
baysian_space  = [
          Integer(5, 20, name='num_leaves'),
          Integer(50, 200, name='min_child_samples'),
          Integer(20, 50,  name='min_data_in_leaf'),
          Integer(1, 5, name='bagging_freq'),
          Real(0.6, 0.9, name='subsample'),
          Real(0.01, 0.1, name='feature_fraction'),
          Real(0.001, 0.01, name='learning_rate'),
          Real(0.1, 0.5, name='bagging_fraction'),
         ]

In [None]:
trn_data = lgb.Dataset(X_train, label=y_train)
val_data = lgb.Dataset(X_test, label=y_test)

In [None]:
def lgb_optim(values):
    params = {'num_leaves': values[0], 
          'min_child_samples': values[1], 
          'min_data_in_leaf': values[2], 
          'bagging_freq': values[3],
            'subsample': values[4],
            'feature_fraction': values[5],
             'learning_rate':values[6],
             'bagging_fraction': values[7],
             'boosting_type': 'gbdt',
             'objective': 'binary',
              'max_depth':-1,
              'metric':'auc',
              'boost_from_average':'false',
               'verbosity': -1,
              'objective': 'binary',
              'tree_learner': 'serial',
               'gpu_platform_id': -1,
             }
    print('\nNext set of params.....',params)
    trn_data = lgb.Dataset(X_train, label=y_train)
    val_data = lgb.Dataset(X_test, label=y_test)
    model_lgb     = lgb.train(params, trn_data, 1000000, valid_sets = [trn_data, val_data], verbose_eval=5000, early_stopping_rounds = 4000)
    auc = -roc_auc_score(y_train, model_lgb.predict(X_train))
    return  auc

In [None]:
res_gp = gp_minimize(lgb_optim, baysian_space, n_calls=20,
                     random_state=0,n_random_starts=10)

In [None]:
clf = lgb.train(param, trn_data, 1000000, valid_sets = [trn_data, val_data], verbose_eval=5000, early_stopping_rounds = 4000)

# RandomForest parameter tuning using Baysian optimization

In [None]:
rf_baysian_space  = [
          Integer(15, 200, name='min_samples_leaf'),
          Integer(100, 250, name='n_estimators'),
          Real(0.1, 0.7, name='max_features'),
         ]

In [None]:
def rf_optim(values):
    params = {'min_samples_leaf': values[0], 
          'n_estimators': values[1], 
          'max_features': values[2], 
           'n_jobs' : -1,
           'random_state' : 42,
             }
    print('\nNext set of params.....',params)
    rf = RandomForestClassifier(**params)
    rf.fit(X_train, y_train)
    auc = -roc_auc_score(y_train, rf.predict(X_train))
    return  auc

In [None]:
res_gp = gp_minimize(rf_optim, rf_baysian_space, n_calls=10,
                     random_state=0,n_random_starts=10)

# KFold

In [None]:
from sklearn.model_selection import StratifiedKFold

In [None]:
def k_fold_lgb_optim(values):
    params = {'num_leaves': values[0], 
          'min_child_samples': values[1], 
          'min_data_in_leaf': values[2], 
          'bagging_freq': values[3],
            'subsample': values[4],
            'feature_fraction': values[5],
             'learning_rate':values[6],
             'bagging_fraction': values[7],
             'boosting_type': 'gbdt',
             'objective': 'binary',
              'max_depth':-1,
              'metric':'auc',
              'boost_from_average':'false',
               'verbosity': -1,
              'objective': 'binary',
              'tree_learner': 'serial',
               'gpu_platform_id': -1,
             }
    print('\nNext set of params.....',params)
    skf = StratifiedKFold(n_splits=10)
    for train_idx, test_idx in skf.split(X, y):
        trn_data = lbd.Dataset(X[train_idx], label=y[train_idx])
        test_data = lbd.Dataset(X[test_idx], label=y[test_idx])
        model_lgb     = lgb.train(params, trn_data, 1000000, valid_sets = [trn_data, val_data], verbose_eval=5000, early_stopping_rounds = 4000)
        auc = roc_auc_score(y[test_idx], model_lgb.predict(X[test_idx]))
        return -auc

In [None]:
res_gp = gp_minimize(lgb_optim, baysian_space, n_calls=10,
                     random_state=0, n_random_starts=10)