## Import

In [None]:
# for read data
import os
import numpy as np
import pandas as pd

# model
from lightgbm import LGBMClassifier     

# tunning
import optuna

# Out of Fold
from sklearn.metrics import log_loss  
from sklearn.model_selection import StratifiedKFold

# for save
import joblib

## Read data

In [None]:
path = (os.path.abspath("./input"))

X_train = pd.read_csv(path +'/LGBM_train.csv', encoding='cp949')
X_test = pd.read_csv(path +'/LGBM_test.csv', encoding='cp949')
y_train = pd.read_csv(path +'/y_train.csv', encoding='cp949').group

In [None]:
train_ID, test_ID = X_train.custid, X_test.custid
del X_train['custid'], X_test['custid']

### ▶ Shallow Tunning & Ensemble

In [None]:
# 모델 설정
lgbm_1 = LGBMClassifier(n_estimators=150, min_child_samples=400,
                        learning_rate=0.03,objective='multiclass', metrics='multi_logloss', num_gpu=1, random_state=0)

### ▶ Out of Fold

In [None]:
model = lgbm_1

In [None]:
SKF = StratifiedKFold(n_splits = 5, shuffle = True, random_state = 0)

In [None]:
lgbm_pred = np.zeros((X_test.shape[0], 8))
loss_list = []
for tr_idx, val_idx in SKF.split(X_train, y_train):
    tr_x, tr_y = X_train.iloc[tr_idx], y_train.iloc[tr_idx]
    val_x, val_y = X_train.iloc[val_idx], y_train.iloc[val_idx]
    
    model.fit(tr_x, tr_y)
    pred = model.predict_proba(val_x)
    loss = log_loss(val_y, pred)
    loss_list.append(loss)
    
    sub_pred = np.array(model.predict_proba(X_test)) / 5  # averaging
    lgbm_pred += sub_pred
print(f'{model.__class__.__name__}의 5fold 평균 Log Loss는 {np.mean(loss_list)}')

In [None]:
# min_child_samples=50: 1.53695786862622
# min_child_samples=100: 1.5363410525558314
# min_child_samples=150: 1.5319978453349357
# min_child_samples=200: 1.5306841729108775
# n_estimators=120: 1.5251515293031905

# n_estimators=130,min_child_samples=250: 1.5222674774168437
# n_estimators=150, min_child_samples=300 : 1.51....

### ▶ Deploy Model&Submission data

In [None]:
pred = pd.DataFrame(lgbm_pred)
pred.columns = ['F20','F30','F40','F50','M20','M30','M40','M50']
submissions = pd.concat([pd.Series(test_ID, name="ID"), pred] ,axis=1)

In [None]:
sub_path = (os.path.abspath("./submission"))

fname = '/MLGBM_FLGBM.csv' # Model_FeatureSet
submissions.to_csv(sub_path+fname, index=False)
print("'{}' is ready to submit." .format(fname))