In [94]:
import os
import random
import numpy as np
import pandas as pd
import pickle
import gc

import lightgbm as lgb
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split, StratifiedKFold, KFold
from sklearn.metrics import accuracy_score, confusion_matrix, roc_auc_score

import warnings
warnings.filterwarnings('ignore')

In [2]:
def seed_everything(seed=0):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    
SEED = 2019
seed_everything(SEED)

In [3]:
DATASET_PATH = './dataset/'
actions = ['cClockwise', 'clockwise', 'left2right', 'right2left', 'up2down']

for action in actions:
    all_data = []
    
    for example in os.listdir(DATASET_PATH + action):
        
        # example == 1.txt
        # action == cClockwise
        
        with open(DATASET_PATH + action + '/' + example, 'rb') as f:
            data = pickle.load(f)
            all_data.append(np.array(data).flatten().reshape(1, -1))

    data_num = len(all_data)
    
    # data to dict
    data_dict = {}
    for i, data in enumerate(all_data):
        data_dict[i] = data

    # create empty dataframe
    columns = [i for i in range(102)]
    indices = [i for i in range(data_num)]
    df = pd.DataFrame(columns=columns, index=indices)

    for i in range(data_num):
        df.loc[i] = data_dict[i]

    df.to_csv('./dataset/{}_example.csv'.format(action), index=False)

In [4]:
df1 = pd.read_csv('./dataset/cClockwise_example.csv')
df2 = pd.read_csv('./dataset/clockwise_example.csv')
df3 = pd.read_csv('./dataset/left2right_example.csv')
df4 = pd.read_csv('./dataset/right2left_example.csv')
df5 = pd.read_csv('./dataset/up2down_example.csv')

In [5]:
df1['target'] = 4 # 반시계
df2['target'] = 3 # 시계
df3['target'] = 2 # 왼오
df4['target'] = 1 # 오왼
df5['target'] = 0 # 업다운

In [99]:
df = pd.concat([df1, df2], axis=0)
df = pd.concat([df, df3], axis=0)
df = pd.concat([df, df4], axis=0)
df = pd.concat([df, df5], axis=0)

In [100]:
df['target'].value_counts()

0    1012
3    1007
1    1006
2    1000
4    1000
Name: target, dtype: int64

In [101]:
df = df.sample(frac=1)

In [102]:
df = df.reset_index(drop=True)

In [103]:
train_num = int(len(df)*0.8)

In [104]:
train_df = df[:train_num]
test_df = df[train_num:]

In [105]:
train_df[target] = train_df[target].astype('category')
test_df[target] = test_df[target].astype('category')

In [106]:
bayesian_tr_idx, bayesian_val_idx = train_test_split(train_df, test_size = 0.2, random_state = 42, stratify = train_df['target'])
bayesian_tr_idx = bayesian_tr_idx.index
bayesian_val_idx = bayesian_val_idx.index

In [107]:
features = train_df.columns[:-1]
target = 'target'

In [134]:
#black box LGBM 
def LGB_bayesian(
    #learning_rate,
    num_leaves, 
    bagging_fraction,
    feature_fraction,
    min_child_weight, 
    min_data_in_leaf,
    max_depth,
    reg_alpha,
    reg_lambda
     ):
    
    # LightGBM expects next three parameters need to be integer. 
    num_leaves = int(num_leaves)
    min_data_in_leaf = int(min_data_in_leaf)
    max_depth = int(max_depth)

    assert type(num_leaves) == int
    assert type(min_data_in_leaf) == int
    assert type(max_depth) == int
    

    param = {
              'num_leaves': num_leaves, 
              'min_data_in_leaf': min_data_in_leaf,
              'min_child_weight': min_child_weight,
              'bagging_fraction' : bagging_fraction,
              'feature_fraction' : feature_fraction,
#               'learning_rate' : learning_rate,
              'max_depth': max_depth,
              'reg_alpha': reg_alpha,
              'reg_lambda': reg_lambda,
              'objective': 'multiclass',
#               'save_binary': True,
#               'seed': 1337,
#               'feature_fraction_seed': 1337,
#               'bagging_seed': 1337,
#               'drop_seed': 1337,
#               'data_random_seed': 1337,
              'boosting_type': 'gbdt',
              'verbose': 1,
              'is_unbalance': False,
              'boost_from_average': True,
              'metric':'multi_logloss',
              'num_class': 5}
    
    X, y = train_df[features], train_df[target]    

#     oof = np.zeros(len(bayesian_val_idx))
    tr_x, tr_y = X.iloc[bayesian_tr_idx,:], y.iloc[bayesian_tr_idx]
    vl_x, vl_y = X.iloc[bayesian_val_idx,:], y.iloc[bayesian_val_idx]
    
    trn_data = lgb.Dataset(tr_x, tr_y)
    val_data = lgb.Dataset(vl_x, vl_y)
    
    
    clf = lgb.train(param, trn_data,  num_boost_round=50, valid_sets = [trn_data, val_data], verbose_eval=0, early_stopping_rounds = 50)

    val_predict_result = clf.predict(X.iloc[bayesian_val_idx].values, num_iteration= clf.best_iteration)  

    val_predict_result = np.argmax(val_predict_result, axis=1)
    score = accuracy_score(y.iloc[bayesian_val_idx].values, val_predict_result)

    return score

In [130]:
bounds_LGB = {
    'num_leaves': (31, 500), 
    'min_data_in_leaf': (20, 200),
    'bagging_fraction' : (0.1, 0.9),
    'feature_fraction' : (0.1, 0.9),
#     'learning_rate': (0.01, 0.3),
    'min_child_weight': (0.00001, 0.01),   
    'reg_alpha': (1, 2), 
    'reg_lambda': (1, 2),
    'max_depth':(-1,50),
}

In [110]:
# !pip install bayesian-optimization


In [19]:
from bayes_opt import BayesianOptimization

LGB_BO = BayesianOptimization(LGB_bayesian, bounds_LGB, random_state=42)
print(LGB_BO.space.keys)

In [132]:
init_points = 10
n_iter = 15

In [136]:
print('-' * 130)

with warnings.catch_warnings():
    warnings.filterwarnings('ignore')
    LGB_BO.maximize(init_points=init_points, n_iter=n_iter, acq='ucb', xi=0.0, alpha=1e-6)

----------------------------------------------------------------------------------------------------------------------------------
|   iter    |  target   | baggin... | featur... | max_depth | min_ch... | min_da... | num_le... | reg_alpha | reg_la... |
-------------------------------------------------------------------------------------------------------------------------
| [0m 1       [0m | [0m 0.995   [0m | [0m 0.3996  [0m | [0m 0.8606  [0m | [0m 36.33   [0m | [0m 0.005991[0m | [0m 48.08   [0m | [0m 104.2   [0m | [0m 1.058   [0m | [0m 1.866   [0m |
| [0m 2       [0m | [0m 0.9925  [0m | [0m 0.5809  [0m | [0m 0.6665  [0m | [0m 0.04981 [0m | [0m 0.009699[0m | [0m 169.8   [0m | [0m 130.6   [0m | [0m 1.182   [0m | [0m 1.183   [0m |
| [0m 3       [0m | [0m 0.9925  [0m | [0m 0.3434  [0m | [0m 0.5198  [0m | [0m 21.03   [0m | [0m 0.002919[0m | [0m 130.1   [0m | [0m 96.42   [0m | [0m 1.292   [0m | [0m 1.366   [0m |
| [0m 4       [0m

In [137]:
LGB_BO.max['target']

0.996268656716418

In [138]:
LGB_BO.max['params']

{'bagging_fraction': 0.32474760774990463,
 'feature_fraction': 0.5341568665265988,
 'max_depth': 6.187135473712895,
 'min_child_weight': 0.008023947837732857,
 'min_data_in_leaf': 33.41911586235875,
 'num_leaves': 493.84997326564263,
 'reg_alpha': 1.7722447692966574,
 'reg_lambda': 1.1987156815341724}