In [1]:
from bayes_opt import BayesianOptimization
from bayes_opt import UtilityFunction

In [2]:
def dummy_func(x, y):
    
    return pow(x, 2) - pow(y - 1, 2) + 1

In [3]:
optimizer = BayesianOptimization(
                    f       = None,
                    pbounds = {'x' : (-2, 2), 'y' : (-3, 3)},
                    verbose = 2, random_state = 1
                )

utility    = UtilityFunction(kind = 'ucb', kappa = 2.5, xi = 0.0)
next_point = optimizer.suggest(utility)

print(f'next point to probe is       : {next_point}')
target = dummy_func(**next_point)

print(f'found the target value to be : {target}')

next point to probe is       : {'x': -0.331911981189704, 'y': 1.3219469606529488}
found the target value to be : 1.006515717783603


In [4]:
optimizer.register(params = next_point, target= target)

for _ in range(5):
    
    next_point = optimizer.suggest(utility)
    target     = dummy_func(**next_point)
    optimizer.register(params = next_point, target = target)
    
    print(f'target     : {target:.2f}')
    print(f'next point : {next_point}\n')
    
print(optimizer.max)

target     : -11.38
next point : {'x': 1.8861546000771092, 'y': -2.9917780942581977}

target     : 0.99
next point : {'x': -0.31764604716962586, 'y': 1.3285597809731806}

target     : 1.00
next point : {'x': -2.0, 'y': 3.0}

target     : -0.18
next point : {'x': -2.0, 'y': -1.2761469781507055}

target     : 1.00
next point : {'x': 2.0, 'y': 3.0}

{'target': 1.006515717783603, 'params': {'x': -0.331911981189704, 'y': 1.3219469606529488}}


In [5]:
def func_with_disc_params(x, y, d):
    
    assert type(d) == int
    return ((x + y + d) // (1 + d)) / (1 +(x + y)**2)


def function_optimize(x, y, w):
    
    return func_with_disc_params(x, y, int(w))

In [6]:
optimizer = BayesianOptimization(
                    f       = function_optimize,
                    pbounds = {'x' : (-10, 10), 'y' : (-10, 10), 'w' :(0, 5)},
                    verbose = 2, random_state = 1
                )

optimizer.set_gp_params(alpha = 1e-3)
optimizer.maximize()

|   iter    |  target   |     w     |     x     |     y     |
-------------------------------------------------------------
| [0m1        [0m | [0m-0.06199 [0m | [0m2.085    [0m | [0m4.406    [0m | [0m-9.998   [0m |
| [95m2        [0m | [95m-0.0344  [0m | [95m1.512    [0m | [95m-7.065   [0m | [95m-8.153   [0m |
| [0m3        [0m | [0m-0.2177  [0m | [0m0.9313   [0m | [0m-3.089   [0m | [0m-2.065   [0m |
| [95m4        [0m | [95m0.1865   [0m | [95m2.694    [0m | [95m-1.616   [0m | [95m3.704    [0m |
| [0m5        [0m | [0m-0.2187  [0m | [0m1.022    [0m | [0m7.562    [0m | [0m-9.452   [0m |
| [95m6        [0m | [95m0.1868   [0m | [95m2.533    [0m | [95m-1.728   [0m | [95m3.815    [0m |
| [0m7        [0m | [0m0.05119  [0m | [0m3.957    [0m | [0m-0.6151  [0m | [0m6.785    [0m |
| [0m8        [0m | [0m0.1761   [0m | [0m0.5799   [0m | [0m1.181    [0m | [0m4.054    [0m |
| [0m9        [0m | [0m0.04045  [0m | [0

In [7]:
import os

import pandas as pd
import numpy as np

In [8]:
ROOT_PATH = '/'.join(os.getcwd().split(os.path.sep)[:-2])
DATA_PATH = f'{ROOT_PATH}/dataset/psychology'

In [9]:
train_df   = pd.read_csv(f'{DATA_PATH}/train.csv', index_col = 0)
test_df    = pd.read_csv(f'{DATA_PATH}/test_x.csv', index_col = 0)
submission = pd.read_csv(f'{DATA_PATH}/sample_submission.csv', index_col = 0) 

In [10]:
feats      = train_df.drop('voted', axis = 1)
target     = train_df['voted']

In [11]:
feats.race.value_counts()

White                    31248
Asian                     6834
Other                     4330
Black                     2168
Native American            548
Arab                       351
Indigenous Australian       53
Name: race, dtype: int64

In [12]:
feats.age_group.value_counts()

10s     14215
20s     14112
30s      7836
40s      5051
50s      2889
60s      1194
+70s      235
Name: age_group, dtype: int64

In [13]:
feats.gender.value_counts()

Male      24217
Female    21315
Name: gender, dtype: int64

In [14]:
feats.religion.value_counts()

Atheist                 10192
Agnostic                 9624
Christian_Catholic       6431
Christian_Other          5137
Christian_Protestant     4875
Other                    4770
Hindu                    1429
Muslim                   1192
Buddhist                  850
Jewish                    487
Christian_Mormon          428
Sikh                      117
Name: religion, dtype: int64

In [15]:
print(f'원본 데이터 칼럼 : {list(feats.columns)}\n')
feats_dummies = pd.get_dummies(feats)
print(f'더미화 된 데이터 칼럼 : {list(feats_dummies.columns)}')

원본 데이터 칼럼 : ['QaA', 'QaE', 'QbA', 'QbE', 'QcA', 'QcE', 'QdA', 'QdE', 'QeA', 'QeE', 'QfA', 'QfE', 'QgA', 'QgE', 'QhA', 'QhE', 'QiA', 'QiE', 'QjA', 'QjE', 'QkA', 'QkE', 'QlA', 'QlE', 'QmA', 'QmE', 'QnA', 'QnE', 'QoA', 'QoE', 'QpA', 'QpE', 'QqA', 'QqE', 'QrA', 'QrE', 'QsA', 'QsE', 'QtA', 'QtE', 'age_group', 'education', 'engnat', 'familysize', 'gender', 'hand', 'married', 'race', 'religion', 'tp01', 'tp02', 'tp03', 'tp04', 'tp05', 'tp06', 'tp07', 'tp08', 'tp09', 'tp10', 'urban', 'wf_01', 'wf_02', 'wf_03', 'wr_01', 'wr_02', 'wr_03', 'wr_04', 'wr_05', 'wr_06', 'wr_07', 'wr_08', 'wr_09', 'wr_10', 'wr_11', 'wr_12', 'wr_13']

더미화 된 데이터 칼럼 : ['QaA', 'QaE', 'QbA', 'QbE', 'QcA', 'QcE', 'QdA', 'QdE', 'QeA', 'QeE', 'QfA', 'QfE', 'QgA', 'QgE', 'QhA', 'QhE', 'QiA', 'QiE', 'QjA', 'QjE', 'QkA', 'QkE', 'QlA', 'QlE', 'QmA', 'QmE', 'QnA', 'QnE', 'QoA', 'QoE', 'QpA', 'QpE', 'QqA', 'QqE', 'QrA', 'QrE', 'QsA', 'QsE', 'QtA', 'QtE', 'education', 'engnat', 'familysize', 'hand', 'married', 'tp01', 'tp02', 'tp03'

In [16]:
print(f'X shape : {feats.shape}, dummies shape : {feats_dummies.shape}')

X shape : (45532, 76), dummies shape : (45532, 100)


In [17]:
test_df = pd.get_dummies(test_df)
test_df.shape

(11383, 100)

In [18]:
feats_dummies = feats_dummies.fillna(feats_dummies.mean())
feats_dummies.drop_duplicates(keep = 'first', inplace = True)
feats_dummies.shape

(45532, 100)

In [19]:
from sklearn.metrics import roc_auc_score,make_scorer
from sklearn.model_selection import cross_validate
from sklearn.preprocessing import MinMaxScaler
from bayes_opt import BayesianOptimization
import lightgbm as lgbm

In [20]:
scaler = MinMaxScaler()
scaler.fit(feats_dummies)

feats = scaler.transform(feats_dummies)
test  = scaler.transform(test_df)

In [21]:
def lgbm_cv(lr, num_leaves, max_depth, min_child_weight, colsample_bytree,
            feature_fraction, bagging_fraction, lambda_l1, lambda_l2):
    
    model = lgbm.LGBMClassifier(learning_rate = lr,
                                n_estimators  = 300,
                                num_leaves    = int(round(num_leaves)),
                                max_depth     = int(round(max_depth)),
                                min_child_weight = int(round(min_child_weight)),
                                colsample_bytree = colsample_bytree,
                                feature_fraction = max(min(feature_fraction, 1), 0),
                                bagging_fraction = max(min(bagging_fraction, 1), 0),
                                lambda_l1        = max(lambda_l1, 0),
                                lambda_l2        = max(lambda_l2, 0)
                            )
    
    scoring   = {'roc_auc_score' : make_scorer(roc_auc_score)}
    result    = cross_validate(model, feats, target, cv = 5, scoring = scoring)
    auc_score = result['test_roc_auc_score'].mean()
    
    return auc_score

In [22]:
pbounds = {
            'lr'            : (0.0001, 0.05),
            'num_leaves'    : (300, 600),
            'max_depth'     : (2, 25),
            'min_child_weight' : (30, 100),
            'colsample_bytree' : (0, 0.99),
            'feature_fraction' : (0.0001, 0.99),
            'bagging_fraction' : (0.0001, 0.99),
            'lambda_l1'        : (0, 0.99),
            'lambda_l2'        : (0, 0.99)
        }

In [23]:
lgbmBO = BayesianOptimization(f = lgbm_cv, pbounds = pbounds, verbose = 2, random_state = 99)
lgbmBO.maximize()

|   iter    |  target   | baggin... | colsam... | featur... | lambda_l1 | lambda_l2 |    lr     | max_depth | min_ch... | num_le... |
-------------------------------------------------------------------------------------------------------------------------------------
| [0m1        [0m | [0m0.6999   [0m | [0m0.6656   [0m | [0m0.4832   [0m | [0m0.8173   [0m | [0m0.03113  [0m | [0m0.8      [0m | [0m0.02832  [0m | [0m8.845    [0m | [0m33.27    [0m | [0m597.2    [0m |
| [0m2        [0m | [0m0.6954   [0m | [0m0.006857 [0m | [0m0.7621   [0m | [0m0.7393   [0m | [0m0.3737   [0m | [0m0.4892   [0m | [0m0.04645  [0m | [0m11.1     [0m | [0m98.18    [0m | [0m457.3    [0m |
| [0m3        [0m | [0m0.6921   [0m | [0m0.09277  [0m | [0m0.8052   [0m | [0m0.2096   [0m | [0m0.5488   [0m | [0m0.2893   [0m | [0m0.04083  [0m | [0m21.04    [0m | [0m45.51    [0m | [0m493.5    [0m |
| [0m4        [0m | [0m0.6852   [0m | [0m0.09432  [0m | [0m