### import modules

In [1]:
import lightgbm as lgb

import numpy as np
import pandas as pd
from sklearn.model_selection import KFold, train_test_split, ParameterGrid
from sklearn.metrics import confusion_matrix, mean_squared_error, f1_score
from sklearn.datasets import load_iris, load_digits, load_boston

import warnings
warnings.filterwarnings('ignore')

rng = np.random.RandomState(950530)
lgb.__version__

'3.1.1'

In [70]:
iris = load_iris()
y = iris['target']
X = iris['data']
kf = KFold(n_splits=2, shuffle=True, random_state=rng)

In [None]:
lgb_model = lgb.LGBMClassifier(n_jobs=1)
lgb_model.fit(X[train_index], y[train_index])
predictions = lgb_model.predict(X[test_index])
actuals = y[test_index]

In [14]:
lgb.LGBMClassifier(max_cat_group = 1)

LGBMClassifier(max_cat_group=1)

In [16]:
lgb_model = lgb.LGBMClassifier(max_cat_threshold = 1)
lgb_model.fit(X, y)
predictions = lgb_model.predict(X)
actuals = y
print(confusion_matrix(actuals, predictions))

[[50  0  0]
 [ 0 50  0]
 [ 0  0 50]]


### 파라미터 정리
**파라미터 참고문서**
- [lightGBM parameters](https://lightgbm.readthedocs.io/en/latest/Parameters.html#objective)
- [Laura++](https://sites.google.com/view/lauraepp/parameters)

In [100]:
#### 파라미터 ####

# 최대 깊이
max_depth = -1

# 하나의 Leaf가 가지고 있는 최소한의 관측치 수
min_data_in_leaf = 20

# column sampling ratio
feature_fraction = 1 # .7 (typically)

# row sampling ratio
bagging_fraction = 1 # .7 (typically)

# ratio of L1, L2 regularization
lambda_l1 = 0
lambda_l2 = 0

# 범주형 변수가 있는 경우, 범주의 최대 개수 설정
max_cat_threshold = 32

# 
early_stopping_round = 'NULL'




#### Core parameter ####
#### ############## ####

task = 'train'
#  'train'         , for training, 
#  'predict'       , for prediction, 
#  'convert_model' , for converting model file into if-else format, see more information in Convert Parameters
#  'refit'         , for refitting existing models with new data, aliases: refit_tree
#  'save_binary'   , load train (and validation) data then save dataset to binary file. Typical usage: save_binary first, then run multiple train tasks in parallel using the saved binary file
# Note: can be used only in CLI version; for language-specific packages you can use the correspondent functions

boosting = 'gdbt'
#  'gbdt' , traditional Gradient Boosting Decision Tree, aliases: gbrt
#  'rf'   , Random Forest, aliases: random_forest
#  'dart' , Dropouts meet Multiple Additive Regression Trees
#  'goss' , Gradient-based One-Side Sampling
# Note: internally, LightGBM uses gbdt mode for the first 1 / learning_rate iterations

application = 'regression'
# objective
# Go README.md

num_iterations = 100 
# typically n_iter >= 100
# number of boosting iterations

learning_rate = 0.1 
# typically 0.1, 0.001, 0.0003
# Go README.md

num_leaves = 31 
# max number of leaves in one tree



#### Metric parameters ####
#### ################# ####

metric = ''
# 'mean_absolute_error',     MAE
# 'mean_squared_error',      MSE
# 'root_mean_squared_error', RMSE
# 'binary_logloss'
# 'multi_logloss'
# For more detail, Go README.md


#### I/O parameters ####
#### ############# ####

max_bin = 32

categorical_feature = ''
# 범주형 변수의 index를 기입
# categorical_feature = 0,1,2
# categorical_feature = name: C0, C1, C2

ignore_column = ''
# 학습에서 무시할 변수 선택
# 위와 동일한 방법 사용



PARAMETERS = {
    'max_depth' : max_depth,
    'min_data_in_leaf' : min_data_in_leaf,
    'feature_fraction' : feature_fraction,
    'bagging_fraction' : bagging_fraction,
    'early_stopping_round' : early_stopping_round,
    'lambda_l1' : lambda_l1,
    'lambda_l2' : lambda_l2,
    'max_cat_threshold' : max_cat_threshold,
    
    'task' : task,
    'boosting' : boosting,
    'application' : application,
    'num_iterations' : num_iterations,
    'learning_rate' : learning_rate,
    
    'metric' : metric,
    
    'max_bin' : max_bin,
    'categorical_feature' : categorical_feature

}

In [99]:
for train_index, test_index in kf.split(X):
    
    lgb_model = lgb.LGBMClassifier(**parameters)
    lgb_model.fit(X[train_index], y[train_index])
    predictions = lgb_model.predict(X[train_index])
    actuals = y[test_index]
    print(confusion_matrix(actuals, predictions))

[[22  0  0]
 [ 6 20  0]
 [ 0  4 23]]
[[22  6  0]
 [ 0 20  4]
 [ 0  0 23]]
