# 라이브러리 호출

In [2]:
import pandas as pd
import random
import os
import numpy as np
import sklearn as sk
import sklearn.ensemble as sken
from sklearn.model_selection import StratifiedKFold
import xgboost as xgb
from tqdm import tqdm
import lightgbm as lgb 

In [3]:
CONFIG = {
    'seed' :21,
    'n_splits' : 5, 
    'sample_submission' : './data/sample_submission.csv',
    'submission' : './data/submission.csv',
    'train' : './data/train.csv',
    'test' : './data/test.csv',
}

In [4]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)

seed_everything(21) # Seed 고정

In [5]:
train = pd.read_csv(CONFIG['train'])
test = pd.read_csv(CONFIG['test'])

In [6]:
feature = ['가입일', '음성사서함이용', '주간통화시간', '주간통화횟수', '주간통화요금', '저녁통화시간',
       '저녁통화횟수', '저녁통화요금', '밤통화시간', '밤통화횟수', '밤통화요금', '상담전화건수']

In [7]:
x_train = train[feature].values
y_train = train['전화해지여부'].values
test = test.drop('ID', axis = 1).values

In [13]:
kf = StratifiedKFold(n_splits = CONFIG['n_splits'], shuffle = True, random_state = CONFIG['seed'])
models = []
n_estimators = 555
for idxm, (train_index, valid_index) in enumerate(kf.split(x_train, y_train)):
    X_train, X_test = x_train[train_index], x_train[valid_index]
    Y_train, Y_test = y_train[train_index], y_train[valid_index]
    # model = lgb.LGBMClassifier(n_estimators = n_estimators, random_state=CONFIG['seed'])
    model = xgb.XGBClassifier(eval_metric="auc", early_stopping_rounds=5)
    model.fit(X_train, Y_train, eval_set=[(X_test, Y_test)])
    print(f"Score : {model.score(X_test, Y_test)}")
    models.append(model)
preds = [model.predict(test).tolist() for model in models]

[0]	validation_0-auc:0.69186
[1]	validation_0-auc:0.69846
[2]	validation_0-auc:0.71599
[3]	validation_0-auc:0.72647
[4]	validation_0-auc:0.73227
[5]	validation_0-auc:0.74071
[6]	validation_0-auc:0.75027
[7]	validation_0-auc:0.75905
[8]	validation_0-auc:0.76708
[9]	validation_0-auc:0.77189
[10]	validation_0-auc:0.77117
[11]	validation_0-auc:0.77438
[12]	validation_0-auc:0.78872
[13]	validation_0-auc:0.79242
[14]	validation_0-auc:0.80233
[15]	validation_0-auc:0.80624
[16]	validation_0-auc:0.80693
[17]	validation_0-auc:0.80913
[18]	validation_0-auc:0.81408
[19]	validation_0-auc:0.81638
[20]	validation_0-auc:0.82093
[21]	validation_0-auc:0.82180
[22]	validation_0-auc:0.82428
[23]	validation_0-auc:0.82581
[24]	validation_0-auc:0.83036
[25]	validation_0-auc:0.83230
[26]	validation_0-auc:0.83663
[27]	validation_0-auc:0.83674
[28]	validation_0-auc:0.83756
[29]	validation_0-auc:0.84248
[30]	validation_0-auc:0.84300
[31]	validation_0-auc:0.84383
[32]	validation_0-auc:0.84437
[33]	validation_0-au

In [14]:
pred_vec = np.array((preds))
ensemble = []
for i in range(pred_vec.shape[1]):
    x = [pred_vec[:,i].tolist().count(0), pred_vec[:,i].tolist().count(1)]
    ensemble.append(x.index(max(x)))

In [15]:
submit = pd.read_csv(CONFIG['sample_submission'])
submit['전화해지여부'] = ensemble
submit.head()
submit.to_csv('./data/submission.csv', index = False)