# import modules

In [3]:
import lightgbm as lgb

import numpy as np
import pandas as pd
from sklearn.model_selection import KFold, train_test_split, ParameterGrid
from sklearn.metrics import confusion_matrix, mean_squared_error, f1_score
from sklearn.datasets import load_iris, load_digits, load_boston

import warnings
warnings.filterwarnings('ignore')

rng = np.random.RandomState(950530)
lgb.__version__


'3.1.1'

# 1. binary classification

In [4]:
digits = load_digits(n_class = 2)

In [5]:
print('Shape of X : ', digits.data.shape)
print('Shape of y : ', digits.target.shape)

Shape of X :  (360, 64)
Shape of y :  (360,)


In [6]:
y = digits['target'] # label
X = digits['data']

# make K-fold cross validation instance + you can use 
kf = KFold(n_splits=2,       # there is 2 folds
           shuffle=True,     # 데이터를 분할하기 전에 섞어줘
           random_state=rng) # seed 고정

In [7]:
for train_index, test_index in kf.split(X):
    
    lgb_model = lgb.LGBMClassifier(n_jobs=1)
    lgb_model.fit(X[train_index], y[train_index])
    predictions = lgb_model.predict(X[test_index])
    actuals = y[test_index]
    
    print(confusion_matrix(actuals, predictions))


[[81  4]
 [ 0 95]]
[[93  0]
 [ 1 86]]


# 2. multi-class classification

In [8]:
iris = load_iris()
y = iris['target']
X = iris['data']
kf = KFold(n_splits=2, shuffle=True, random_state=rng)

In [9]:
for train_index, test_index in kf.split(X):
    
    lgb_model = lgb.LGBMClassifier(n_jobs=1)
    lgb_model.fit(X[train_index], y[train_index])
    predictions = lgb_model.predict(X[test_index])
    actuals = y[test_index]
    
    print(confusion_matrix(actuals, predictions))

[[21  3  0]
 [ 0 22  4]
 [ 0  3 22]]
[[26  0  0]
 [ 0 23  1]
 [ 0  2 23]]


# 3. Regression

In [11]:
print("Boston Housing: regression")
boston = load_boston()
y = boston['target']
X = boston['data']
kf = KFold(n_splits=2, shuffle=True, random_state=rng)

Boston Housing: regression


In [12]:
for train_index, test_index in kf.split(X):
    
    lgb_model = lgb.LGBMRegressor(n_jobs=1)
    lgb_model.fit(X[train_index], y[train_index])
    predictions = lgb_model.predict(X[test_index])
    actuals = y[test_index]
    
    print(mean_squared_error(actuals, predictions))


10.91990583450736
19.843625053006033


# 4. Parameter Optimization 

In [13]:
print("Parameter optimization01 : ParameterGrid")

y = boston['target']
X = boston['data']

# 5-fold, Shuffle
kf = KFold(n_splits=5, shuffle=True, random_state=rng)

Parameter optimization01 : ParameterGrid


In [17]:
# 파라미터 그리드 설정
LGB_parameter_grid = ParameterGrid({"max_depth": np.arange(2, 5),
                                  "n_estimators": [200, 210, 250, 290, 300]})
  
# [1st Loop]
# Set parameter which we want test
for parameter in LGB_parameter_grid:

    best_score = 1000
    avr_score = 0
    _scores = []
    
    # [2nd Loop]
    # K-fold cross validation -> Mean Score is the 'Set of parameters's score
    for train_index, test_index in kf.split(X):

        model = lgb.LGBMRegressor(n_jobs =1, verbosity = -1, **parameter)
        model.fit(X[train_index], y[train_index],
                 verbose = 1)
        pred_Y = model.predict(X[test_index])
        score = mean_squared_error(pred_Y, y[test_index])
        
        _scores.append(score)

    avr_score = np.mean(_scores)

    if avr_score < best_score:
        best_score = avr_score
        best_parameter = parameter
    
print('Best Parameter : ', best_parameter)
print('Best Score(MSE):', best_score)

Best Parameter :  {'max_depth': 4, 'n_estimators': 300}
Best Score(MSE): 10.517548349515572
