In [1]:
import pandas as pd
import numpy as np
import random
import os

import statsmodels.api as sm
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import log_loss
import lightgbm as lgb
import xgboost as xgb
import catboost as cat
from bayes_opt import BayesianOptimization
import sys
sys.path.append("..")
from utils import *
from preprocessing_utils import *
import warnings
warnings.filterwarnings("ignore")

def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)

seed_everything(42)

In [2]:
train, valid = load_train_valid()
train_x, train_y, valid_x, valid_y = preprocessing(train, valid, is_test=False)

lec = LabelEncoder()
train_y = lec.fit_transform(train_y)
valid_y = lec.transform(valid_y)

In [3]:
bayesian_params = {
    'max_depth':(6, 16), #트리 최대 깊이
    'num_leaves':(24,64), #트리 하나에 최대 잎 개수
    'min_child_samples': (10, 200), #하나의 잎에 최소 데이터 개수 (오버피팅 대응)
    'min_child_weight': (1, 50), #하나의 잎에 최소 sum hessian
    'subsample': (0.5, 1), #
    'colsample_bytree': (0.5, 1), #피처의 50% 를 트레이닝 전에 선택
    'max_bin': (10, 500), #bins 의 최대 개수
    'reg_lambda': (0.001, 10), #L2 regularization
    'reg_alpha': (0.01, 50) #L1 regularization 
}

In [10]:
def lgb_logloss_eval(max_depth, num_leaves, min_child_samples, min_child_weight, subsample, colsample_bytree, max_bin, reg_lambda, reg_alpha):
  params = {
      "n_estimators" : 500, "learning_rate": 0.02,
      'max_depth': int(round(max_depth)),
      'num_leaves': int(round(num_leaves)),
      'min_child_samples': int(round(min_child_samples)),
      'min_child_weight': int(round(min_child_weight)),
      'subsample': max(min(subsample, 1), 0), 
      'colsample_bytree': max(min(colsample_bytree, 1), 0),
      'max_bin':  max(int(round(max_bin)),10),
      'reg_lambda': max(reg_lambda,0),
      'reg_alpha': max(reg_alpha, 0)

  }
  lgb_model = lgb.LGBMClassifier(**params)
  lgb_model.fit(train_x, train_y, eval_set=[(train_x, train_y), (valid_x, valid_y)], eval_metric= 'logloss', verbose= 100, 
                early_stopping_rounds= 100)
  valid_proba = lgb_model.predict_proba(valid_x)
  logLoss = log_loss(valid_y, valid_proba)

  return logLoss

In [12]:
lgbBO = BayesianOptimization(f= lgb_logloss_eval, pbounds=bayesian_params, random_state = 42)
lgbBO.minimize(init_points = 5, n_iter = 25)

AttributeError: 'BayesianOptimization' object has no attribute 'minimize'

In [None]:
lgbBO.

In [None]:
# lightGBM
lgbm = lgb.LGBMClassifier()
lgbm.fit(train_x, train_y, eval_metric='logloss')
prediction_lgbm = lgbm.predict_proba(valid_x)