In [1]:
# 기본 package import
import pandas as pd
import numpy as np 
import warnings
warnings.filterwarnings("ignore")

# sklearn package import
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import GridSearchCV, StratifiedKFold, cross_val_score, train_test_split
from sklearn.metrics import f1_score, make_scorer
from sklearn.externals import joblib

# Bayesian Optimizer
from skopt import BayesSearchCV

# Classifier package import
from lightgbm import LGBMClassifier
from xgboost import XGBClassifier
from catboost import CatBoostClassifier, cv, Pool

# stacking 
from vecstack import stacking

## Data structure
- data are consist of test and train set.
- train data are splitted into train and validation part. 
- So, final structure is train, validation, test dataset.

In [6]:
# Train and Test splitter
def DataSplit(data) :
    data_train = data.loc[lambda x : x["TARGET"].notna(),:]
    data_test = data.loc[lambda x : x["TARGET"].isna(),:]
    
    # 5:5 sampler 
    train_one = data_train.loc[lambda x : x["TARGET"]== 1,:]
    train_zero = data_train.loc[lambda x : x["TARGET"]== 0,:]
    
    sample_train = pd.concat([train_one, train_zero.sample(len(train_one))])
    
    X_train = sample_train.loc[:,"NAME_CONTRACT_TYPE" : "external_sources_nanmedian"]
    y_train = sample_train.loc[:,"TARGET"]
    
    X_train, X_valid, y_train, y_valid = train_test_split(X_train, y_train, test_size = 0.2)
    
    X_test = data_test.loc[:,"NAME_CONTRACT_TYPE" : "external_sources_nanmedian"]
    y_test = data_test.loc[:,"TARGET"]
    
    return X_train, X_valid, X_test, y_train, y_valid, y_test


In [7]:
# Data import
df = pd.read_csv("tr_te15.csv")
del df["Unnamed: 0"]

# 범주형 변수 NA imputation
CATE = list(df.loc[:,df.dtypes == object].columns)
for column in CATE :
    df[column].fillna("XNA",inplace=True)
    

X_train, X_valid, X_test, y_train, y_valid, y_test = DataSplit(df)
my_scorer = make_scorer(f1_score, greater_is_better = True)

# Bayesian Optimization
### -hyper-parameter tuning
- Gaussian Process 를 이용하여 Hyper-parameter space에서 최적의 스코어를 찾아가는 알고리즘임. 
- Bayesian Optimization을 구현하는 코드는 매우 많음. Bayes_opt 라는 패키지도 존재함. 따라서 본인이 이론적인 베이스를 탄탄하게 구축하고 구현하고자 한다면 여러 방법이 있으니 잘 찾아보길 바람.
- 본인이 탐색한 방법은 Scikit-Optimize(skopt) 패키지를 이용한 BayesSearchCV 함수를 사용할 것임. 이 함수는 Scikit-Learn 패키지의 Wrapper 라고 생각하면 되는듯. 따라서 Scikit-learn과 관련된 boosting 모델 등만 사용가능함.
- 이 함수는 임의의 시작점에서 출발해 최적의 스코어 지점을 탐색하는 iteration을 반복함.
- 여기서, iteration 횟수에 대한 적정성에 대해서는 주관적 판단이 필요함. 다만 Bayesian Optimization을 사용하면 Best로는 아니지만 최선으로 보이는 지점으로는 매우 빠르게 수렴하는 편임(보통 어느 정도 수준의 score에 대해서는 5번 안에 찾아가는 것으로 보임)
- 미미한 증가라도 원하는 경우 iteraion을 매우 크게 늘리면 찾아가기는 함. 다만 overfit이 발생하거나 지나치게 깊은 모델을 학습하게 되어 모델 하나에 엄청난 시간을 소요하는 경우도 생김(경험 상 하나의 hyper parameter 하에 학습시 20시간 걸리는 것도 봤음)
- 따라서 어느 정도 적정선만 찾으려고 하면 10번 안에 찾아가는 매우 빠른 속도를 보이는 장점을 가지고 있음.
- 많은 경험을 한 것은 아니지만, BO를 이용한다면 LGBM > XGB > CB 순으로 속도가 빠른것 같음. LGBM 짱짱
- hyper parameter의 범위에 대해서는 각 모델의 documetation을 참고해서 parameter 특성에 맞춰서 설정해주면 됨. 

## Light GBM Bayesian Optimization By Scikit-Optimize
- For 14-dimensional Hyper-Parameter Space
- Takes 30 iteration for optimization

In [9]:
warnings.filterwarnings("ignore")

# Classifier
bayes_cv_tuner = BayesSearchCV(
    estimator = LGBMClassifier(
        objective='binary',
        metric='F1',
        n_jobs=8,
        verbose=2
    ),
    search_spaces = {
        'learning_rate': (0.01, 0.5, 'log-uniform'),
        'num_leaves': (10, 80),      
        'max_depth': (4, 30),
        'min_child_samples': (0, 50),
        'max_bin': (100, 1000),
        'subsample': (0.01, 1.0, 'uniform'),
        'subsample_freq': (0, 10),
        'colsample_bytree': (0.01, 1.0, 'uniform'),
        'min_child_weight': (0, 10),
        'subsample_for_bin': (100000, 500000),
        'reg_lambda': (1e-9, 1000, 'log-uniform'),
        'reg_alpha': (1e-9, 1.0, 'log-uniform'),
        'scale_pos_weight': (1e-6, 500, 'log-uniform'),
        'n_estimators': (50, 150),
    },    
    scoring = my_scorer,
    cv = StratifiedKFold(
        n_splits=3,
        shuffle=True,
        random_state=42
    ),
    n_jobs = 8,
    n_iter = 30,
    verbose = 0,
    refit = True,
    random_state = 42
)

In [8]:
def status_print(optim_result):
    """Status callback durring bayesian hyperparameter search"""
    
    # Get all the models tested so far in DataFrame format
    all_models = pd.DataFrame(bayes_cv_tuner.cv_results_)    
    
    # Get current parameters and the best parameters    
    best_params = pd.Series(bayes_cv_tuner.best_params_)
    print('Model #{}\nBest F1-score: {}\nBest params: {}\n'.format(
        len(all_models),
        np.round(bayes_cv_tuner.best_score_, 4),
        bayes_cv_tuner.best_params_
    ))
    
    # Save all model results
    clf_name = bayes_cv_tuner.estimator.__class__.__name__
    all_models.to_csv(clf_name+"_cv_results.csv")

In [10]:
result = bayes_cv_tuner.fit(X_train, y_train, callback=status_print)

Model #1
Best F1-score: 0.1294
Best params: {'colsample_bytree': 0.4160029192647807, 'learning_rate': 0.17233925413725915, 'max_bin': 940, 'max_depth': 12, 'min_child_samples': 34, 'min_child_weight': 4, 'n_estimators': 85, 'num_leaves': 62, 'reg_alpha': 5.497557739289786e-07, 'reg_lambda': 0.05936070635912049, 'scale_pos_weight': 0.060830282487222144, 'subsample': 0.13556548021189216, 'subsample_for_bin': 171234, 'subsample_freq': 6}

Model #2
Best F1-score: 0.5005
Best params: {'colsample_bytree': 0.8390144719977516, 'learning_rate': 0.3167569558914482, 'max_bin': 373, 'max_depth': 29, 'min_child_samples': 43, 'min_child_weight': 1, 'n_estimators': 64, 'num_leaves': 35, 'reg_alpha': 0.0005266983003701547, 'reg_lambda': 276.5424475574225, 'scale_pos_weight': 0.3016410771843142, 'subsample': 0.9923710598637134, 'subsample_for_bin': 406716, 'subsample_freq': 4}

Model #3
Best F1-score: 0.7173
Best params: {'colsample_bytree': 0.4503841871781403, 'learning_rate': 0.36381617755817935, 'ma

Model #20
Best F1-score: 0.7217
Best params: {'colsample_bytree': 0.393013738373805, 'learning_rate': 0.418239119517446, 'max_bin': 172, 'max_depth': 14, 'min_child_samples': 5, 'min_child_weight': 5, 'n_estimators': 65, 'num_leaves': 62, 'reg_alpha': 0.0001905489877131977, 'reg_lambda': 1000.0, 'scale_pos_weight': 1.7379136009802567, 'subsample': 0.31567283020694265, 'subsample_for_bin': 183714, 'subsample_freq': 5}

Model #21
Best F1-score: 0.7217
Best params: {'colsample_bytree': 0.393013738373805, 'learning_rate': 0.418239119517446, 'max_bin': 172, 'max_depth': 14, 'min_child_samples': 5, 'min_child_weight': 5, 'n_estimators': 65, 'num_leaves': 62, 'reg_alpha': 0.0001905489877131977, 'reg_lambda': 1000.0, 'scale_pos_weight': 1.7379136009802567, 'subsample': 0.31567283020694265, 'subsample_for_bin': 183714, 'subsample_freq': 5}

Model #22
Best F1-score: 0.7217
Best params: {'colsample_bytree': 0.393013738373805, 'learning_rate': 0.418239119517446, 'max_bin': 172, 'max_depth': 14, 'm

In [11]:
Best_LGBM_BO = result.best_estimator_

In [28]:
joblib.dump(Best_LGBM_BO, "model/LGBM_BO.pkl")

['model/LGBM_BO.pkl']

In [4]:
Best_LGBM_BO = joblib.load("model/LGBM_BO.pkl")

In [5]:
Best_LGBM_BO.fit(X_train, y_train)

LGBMClassifier(boosting_type='gbdt', class_weight=None, colsample_bytree=1.0,
        importance_type='split', learning_rate=0.48877929548775684,
        max_bin=539, max_depth=24, metric='F1', min_child_samples=36,
        min_child_weight=2, min_split_gain=0.0, n_estimators=105, n_jobs=8,
        num_leaves=63, objective='binary', random_state=None,
        reg_alpha=1.5680470900438596e-09, reg_lambda=1000.0,
        scale_pos_weight=1.917631826991509, silent=True,
        subsample=0.040575370798758226, subsample_for_bin=172898,
        subsample_freq=0, verbose=2)

In [6]:
pred = Best_LGBM_BO.predict(X_valid)
f1_score(y_valid, pred)

0.7223901831762404

## XGBoost Bayesian Optimization by Scikit-Optimize
- For 13-dimensional Hyper-Parameter Space
- Takes 30 iteration for optimization

In [7]:
warnings.filterwarnings("ignore")

bayes_cv_tuner_xgb = BayesSearchCV(
    estimator = XGBClassifier(
        n_jobs = 3,
        objective = 'binary:logistic',
        metric = f1_score,
        silent=1,
        tree_method='approx'
    ),
    search_spaces = {
        'learning_rate': (0.01, 1.0, 'log-uniform'),
        'min_child_weight': (0, 10),
        'max_depth': (0, 50),
        'max_delta_step': (0, 20),
        'subsample': (0.01, 1.0, 'uniform'),
        'colsample_bytree': (0.01, 1.0, 'uniform'),
        'colsample_bylevel': (0.01, 1.0, 'uniform'),
        'reg_lambda': (1e-9, 1000, 'log-uniform'),
        'reg_alpha': (1e-9, 1.0, 'log-uniform'),
        'gamma': (1e-9, 0.5, 'log-uniform'),
        'min_child_weight': (0, 5),
        'n_estimators': (50, 150),
        'scale_pos_weight': (1e-6, 500, 'log-uniform')
    },    
    scoring = my_scorer,
    cv = StratifiedKFold(
        n_splits=3,
        shuffle=True,
        random_state=42
    ),
    n_jobs = 3,
    n_iter = 30,   
    verbose = 0,
    refit = True,
    random_state = 42
)

In [8]:
def status_print_xgb(optim_result):
    """Status callback durring bayesian hyperparameter search"""
    
    # Get all the models tested so far in DataFrame format
    all_models = pd.DataFrame(bayes_cv_tuner_xgb.cv_results_)    
    
    # Get current parameters and the best parameters    
    best_params = pd.Series(bayes_cv_tuner_xgb.best_params_)
    print('Model #{}\nBest F1-score: {}\nBest params: {}\n'.format(
        len(all_models),
        np.round(bayes_cv_tuner_xgb.best_score_, 4),
        bayes_cv_tuner_xgb.best_params_
    ))
    
    # Save all model results
    clf_name = bayes_cv_tuner_xgb.estimator.__class__.__name__
    all_models.to_csv(clf_name+"_cv_results.csv")

In [9]:
result_xgb = bayes_cv_tuner_xgb.fit(X_train, y_train, callback=status_print_xgb)

Model #1
Best F1-score: 0.0786
Best params: {'colsample_bylevel': 0.4160029192647807, 'colsample_bytree': 0.7304484857455519, 'gamma': 0.13031389926541354, 'learning_rate': 0.042815319280763466, 'max_delta_step': 13, 'max_depth': 21, 'min_child_weight': 2, 'n_estimators': 124, 'reg_alpha': 5.497557739289786e-07, 'reg_lambda': 0.05936070635912049, 'scale_pos_weight': 0.060830282487222144, 'subsample': 0.13556548021189216}

Model #2
Best F1-score: 0.4899
Best params: {'colsample_bylevel': 0.8390144719977516, 'colsample_bytree': 0.8844821246070537, 'gamma': 4.358684608480795e-07, 'learning_rate': 0.7988179462781242, 'max_delta_step': 17, 'max_depth': 3, 'min_child_weight': 1, 'n_estimators': 85, 'reg_alpha': 0.0005266983003701547, 'reg_lambda': 276.5424475574225, 'scale_pos_weight': 0.3016410771843142, 'subsample': 0.9923710598637134}

Model #3
Best F1-score: 0.7137
Best params: {'colsample_bylevel': 0.4503841871781403, 'colsample_bytree': 0.9195352964526833, 'gamma': 8.168958221061441e-0

Model #21
Best F1-score: 0.725
Best params: {'colsample_bylevel': 0.5156333223271519, 'colsample_bytree': 0.9087278081792709, 'gamma': 8.563997027215662e-09, 'learning_rate': 0.07560544236324758, 'max_delta_step': 3, 'max_depth': 24, 'min_child_weight': 1, 'n_estimators': 120, 'reg_alpha': 0.00011145103489529869, 'reg_lambda': 286.5510997980129, 'scale_pos_weight': 1.9809276025249891, 'subsample': 0.43414241282433724}

Model #22
Best F1-score: 0.725
Best params: {'colsample_bylevel': 0.5156333223271519, 'colsample_bytree': 0.9087278081792709, 'gamma': 8.563997027215662e-09, 'learning_rate': 0.07560544236324758, 'max_delta_step': 3, 'max_depth': 24, 'min_child_weight': 1, 'n_estimators': 120, 'reg_alpha': 0.00011145103489529869, 'reg_lambda': 286.5510997980129, 'scale_pos_weight': 1.9809276025249891, 'subsample': 0.43414241282433724}

Model #23
Best F1-score: 0.725
Best params: {'colsample_bylevel': 0.5156333223271519, 'colsample_bytree': 0.9087278081792709, 'gamma': 8.563997027215662e-

In [10]:
Best_XGB_BO = result_xgb.best_estimator_
joblib.dump(Best_XGB_BO, "model/XGB_BO.pkl")

['model/XGB_BO.pkl']

In [11]:
Best_XGB_BO = joblib.load("model/XGB_BO.pkl")

In [12]:
pred = Best_XGB_BO.predict(X_valid)
f1_score(y_valid, pred)

0.7329593744688084

## CATBoost Bayesian Optimization by Scikit-Optimize

- Also, 8-dimensional hyper-parameter space
- Takes 20 iterations for optimization
- CB의 경우는 너무 오래걸리기도 하고, hyper-parameter를 잘 모르겠음 그래서 para 8개만 함. iter도 20개.

In [18]:
warnings.filterwarnings("ignore")

# Classifier
bayes_cv_tuner_cb= BayesSearchCV(
    estimator = CatBoostClassifier(
        eval_metric='F1',
        bootstrap_type='Bernoulli',
        thread_count=3,
        logging_level="Silent"
    ),
    search_spaces = {
        'learning_rate': (0.01, 0.5, 'log-uniform'),
        'scale_pos_weight' : (1.0,150.0),
        'depth': (4, 10),
        'rsm' : (0.01, 1.0, 'uniform'),
        'reg_lambda': (1, 50, 'log-uniform'), # CatBoost 는 L1 regularization 지원 안함.
        'n_estimators': (50, 500),
        'random_strength' : (0.001, 10, 'log-uniform'),
        'leaf_estimation_iterations' : (1, 30)
    },
    scoring = my_scorer,
    cv = StratifiedKFold(
        n_splits=3,
        shuffle=True,
        random_state=42
    ),
    n_jobs = 1,
    n_iter = 20,
    verbose = 0,
    refit = True,
    random_state = 42
)

In [19]:
def status_print_cb(optim_result):
    """Status callback durring bayesian hyperparameter search"""
    
    # Get all the models tested so far in DataFrame format
    all_models = pd.DataFrame(bayes_cv_tuner_cb.cv_results_)    
    
    # Get current parameters and the best parameters    
    best_params = pd.Series(bayes_cv_tuner_cb.best_params_)
    print('Model #{}\nBest F1-score: {}\nBest params: {}\n'.format(
        len(all_models),
        np.round(bayes_cv_tuner_cb.best_score_, 4),
        bayes_cv_tuner_cb.best_params_
    ))
    
    # Save all model results
    clf_name = bayes_cv_tuner_cb.estimator.__class__.__name__
    all_models.to_csv(clf_name+"_cv_results.csv")

In [20]:
result_cb = bayes_cv_tuner_cb.fit(X_train, y_train, callback = status_print_cb)

Model #1
Best F1-score: 0.6717
Best params: {'depth': 6, 'leaf_estimation_iterations': 22, 'learning_rate': 0.38451629413594224, 'n_estimators': 192, 'random_strength': 0.47928274405969296, 'reg_lambda': 5.053300616712746, 'rsm': 0.35742202155015257, 'scale_pos_weight': 111.18613089437267}

Model #2
Best F1-score: 0.6717
Best params: {'depth': 6, 'leaf_estimation_iterations': 22, 'learning_rate': 0.38451629413594224, 'n_estimators': 192, 'random_strength': 0.47928274405969296, 'reg_lambda': 5.053300616712746, 'rsm': 0.35742202155015257, 'scale_pos_weight': 111.18613089437267}

Model #3
Best F1-score: 0.6717
Best params: {'depth': 6, 'leaf_estimation_iterations': 22, 'learning_rate': 0.38451629413594224, 'n_estimators': 192, 'random_strength': 0.47928274405969296, 'reg_lambda': 5.053300616712746, 'rsm': 0.35742202155015257, 'scale_pos_weight': 111.18613089437267}

Model #4
Best F1-score: 0.6717
Best params: {'depth': 6, 'leaf_estimation_iterations': 22, 'learning_rate': 0.38451629413594

In [21]:
Best_CB_BO = result_cb.best_estimator_
joblib.dump(Best_CB_BO, "model/CB_BO.pkl")

['model/CB_BO.pkl']

In [22]:
Best_CB_BO = joblib.load("model/CB_BO.pkl")

In [23]:
pred = Best_CB_BO.predict(X_valid)
f1_score(y_valid, pred)

0.7165413533834587

## Stacking
- vecstack 패키지 이용.

In [8]:
Best_LGBM_BO = joblib.load("model/LGBM_BO.pkl")
Best_XGB_BO = joblib.load("model/XGB_BO.pkl")
Best_CB_BO = joblib.load("model/CB_BO.pkl")

In [9]:
models = [Best_CB_BO, Best_LGBM_BO, Best_XGB_BO]
warnings.filterwarnings("ignore")
# 넣고 싶은 모델들. 
S_train, S_test = stacking(models, X_train, y_train, X_test, regression = False, 
                           metric = f1_score, verbose = 2, shuffle=True, n_folds=3)

# 만약 S_train, S_test 에 대해서 확률을 원하는 경우는 needs_proba=True 설정하면 됨.
# S_train, S_test = stacking(models, X_train, y_train, X_test, 
#                            regression = False, metric = f1_score, verbose = 2, 
#                            shuffle=True, n_folds=3,needs_proba=True)

task:         [classification]
n_classes:    [2]
metric:       [f1_score]
mode:         [oof_pred_bag]
n_models:     [3]

model  0:     [CatBoostClassifier]
    fold  0:  [0.69962740]
    fold  1:  [0.70710272]
    fold  2:  [0.71703452]
    ----
    MEAN:     [0.70792155] + [0.00712998]
    FULL:     [0.70796207]

model  1:     [LGBMClassifier]
    fold  0:  [0.71207430]
    fold  1:  [0.71496405]
    fold  2:  [0.73311126]
    ----
    MEAN:     [0.72004987] + [0.00931084]
    FULL:     [0.72010760]

model  2:     [XGBClassifier]
    fold  0:  [0.71614819]
    fold  1:  [0.71596032]
    fold  2:  [0.73717785]
    ----
    MEAN:     [0.72309545] + [0.00995805]
    FULL:     [0.72314545]



In [23]:
S_train.view()

array([[0, 1, 1],
       [0, 0, 0],
       [1, 1, 1],
       ...,
       [1, 1, 1],
       [1, 1, 1],
       [0, 1, 1]])

- Stacking 이 끝난 데이터들은 위의 형태로 저장됨. (확률로 설정하면 확률로 나옵니당)
- 여기서 이 stacking 데이터를 이용하여 추가 모델(meta model이라고 보통 부름)로 최종 S_test를 이용해 predict 하면됨.
- 여기서는 대부분 가중치를 찾는 것이기 때문에 Neural Net을 사용해도 되고, Boosting 모델을 똑같이 적용해도 됨. 
- 만약 부스팅 모델을 쓰면 BO 한번 더 이용해도 됨

- 예측력을 높이기 위해서 Stacking 데이터에 feature_importance가 높았던 변수 몇개를 추가해서 데이터셋을 구성하여 meta model을 학습시킬 수도 있다고 함. 본인 선택.
- 아래는 간단한 예

In [12]:
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier
from keras.utils import np_utils 
from keras import backend as K

Using TensorFlow backend.


In [13]:
def f1(y_true, y_pred):
    def recall(y_true, y_pred):
        """Recall metric.

        Only computes a batch-wise average of recall.

        Computes the recall, a metric for multi-label classification of
        how many relevant items are selected.
        """
        true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
        possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
        recall = true_positives / (possible_positives + K.epsilon())
        return recall

    def precision(y_true, y_pred):
        """Precision metric.

        Only computes a batch-wise average of precision.

        Computes the precision, a metric for multi-label classification of
        how many selected items are relevant.
        """
        true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
        predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
        precision = true_positives / (predicted_positives + K.epsilon())
        return precision
    precision = precision(y_true, y_pred)
    recall = recall(y_true, y_pred)
    return 2*((precision*recall)/(precision+recall+K.epsilon()))


In [14]:
def baseline_model():
    # create model
    model = Sequential()
    model.add(Dense(8, input_dim=3, activation='relu'))
    model.add(Dense(4, activation='relu'))
    model.add(Dense(2, activation='softmax'))
    # Compile model
    model.compile(loss='categorical_crossentropy', optimizer='Adagrad', metrics=[f1])
    return model

In [15]:
meta_model = KerasClassifier(build_fn=baseline_model, epochs=25)
meta_model.fit(S_train, y_train)


Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


<keras.callbacks.History at 0x27b46592a90>

In [16]:
meta_model = XGBClassifier(metrics=f1_score)
meta_model.fit(S_train,y_train)

XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
       colsample_bytree=1, gamma=0, learning_rate=0.1, max_delta_step=0,
       max_depth=3, metrics=<function f1_score at 0x0000027B5BD90730>,
       min_child_weight=1, missing=None, n_estimators=100, n_jobs=1,
       nthread=None, objective='binary:logistic', random_state=0,
       reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,
       silent=True, subsample=1)

In [18]:
meta_model.predict(S_test)

array([1., 0., 1., ..., 0., 0., 1.])

이러면 예측이 끝나셨습니다. submission...