# 라이브러리 호출

In [1]:
import pandas as pd
import random
import os
import numpy as np
import sklearn as sk
import sklearn.ensemble as sken
from sklearn.model_selection import StratifiedKFold
import xgboost as xgb
from tqdm import tqdm
import lightgbm as lgb 

In [2]:
CONFIG = {
    'seed' :21,
    'n_splits' : 5, 
    'sample_submission' : './data/sample_submission.csv',
    'submission' : './data/submission.csv',
    'train' : './data/train.csv',
    'test' : './data/test.csv',
}

In [3]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)

seed_everything(21) # Seed 고정

In [4]:
train = pd.read_csv(CONFIG['train'])
test = pd.read_csv(CONFIG['test'])

In [5]:
feature = ['가입일', '음성사서함이용', '주간통화시간', '주간통화횟수', '주간통화요금', '저녁통화시간',
       '저녁통화횟수', '저녁통화요금', '밤통화시간', '밤통화횟수', '밤통화요금', '상담전화건수']

In [6]:
x_train = train[feature].values
y_train = train['전화해지여부'].values
test = test.drop('ID', axis = 1).values

In [None]:
score = []
for est in [10, 20, 30]:
    model_for_tune = lgb.LGBMClassifier(n_estimators = est, random_state=CONFIG['seed'])
    model_for_tune.fit(x_train, y_train)
    

In [14]:
kf = StratifiedKFold(n_splits = CONFIG['n_splits'], shuffle = True, random_state = CONFIG['seed'])
models = []
n_estimators = 10
for idxm, (train_index, valid_index) in enumerate(kf.split(x_train, y_train)):
    X_train, X_test = x_train[train_index], x_train[valid_index]
    Y_train, Y_test = y_train[train_index], y_train[valid_index]
    model = lgb.LGBMClassifier(n_estimators = n_estimators, random_state=CONFIG['seed'])
    model.fit(X_train, Y_train)
    print(f"Score : {model.score(X_test, Y_test)}")
    models.append(model)
preds = [model.predict(test).tolist() for model in models]

Score : 0.8910596026490066
Score : 0.890728476821192
Score : 0.8912251655629139
Score : 0.8905629139072848
Score : 0.8908940397350993


In [15]:
pred_vec = np.array((preds))
ensemble = []
for i in range(pred_vec.shape[1]):
    x = [pred_vec[:,i].tolist().count(0), pred_vec[:,i].tolist().count(1)]
    ensemble.append(x.index(max(x)))

In [16]:
submit = pd.read_csv(CONFIG['sample_submission'])
submit['전화해지여부'] = ensemble
submit.head()
submit.to_csv('./data/submission.csv', index = False)