# 사용할 라이브러리 로딩

In [139]:
! pip install -U imbalanced-learn



In [137]:
import numpy as np # Numpy
import pandas as pd # Pandas
import matplotlib as mpl #Matplotlib 세팅용
import matplotlib.pyplot as plt # 시각화 도구
import seaborn as sns # 시각화 도구
from sklearn.model_selection import train_test_split # 데이터셋 분리
from sklearn.cluster import KMeans # 클러스터링
from sklearn.metrics import silhouette_score # 실루엣 점수
import xgboost as xgb # XGBoost
from xgboost import XGBClassifier  # XGBoostClassifier
from sklearn.model_selection import GridSearchCV # 그리드 서치
from sklearn.metrics import accuracy_score, precision_score # 평가 지표
from sklearn.metrics import recall_score, confusion_matrix, roc_auc_score, f1_score # 평가 지표
from imblearn.combine import *
from sklearn.ensemble import RandomForestClassifier # 랜덤 포레스트
from hyperopt import hp, STATUS_OK, fmin, tpe, Trials # 최적의 파람
from imblearn.combine import * # 복합 샘플링
from sklearn.model_selection import cross_val_score # 교차 스코어
import warnings # 경고문 제거용
from imblearn.combine import SMOTEENN, SMOTETomek # 복합샘플링
from sklearn.model_selection import KFold
from IPython.display import Image

from imblearn.over_sampling import *
%matplotlib inline
%config Inlinebackend.figure_format = 'retina'

# 한글 폰트 설정
mpl.rc('font', family='D2Coding')
# 유니코드에서 음수 부호 설정
mpl.rc('axes', unicode_minus = False)

warnings.filterwarnings('ignore')
sns.set(font="D2Coding", rc={"axes.unicode_minus":False}, style='darkgrid')
plt.rc('figure', figsize=(10,8))

# 필요함수 만들기

In [3]:
# 스케일링을 위한 함수 생성
col = []
def data_scaled(df, col):
    for i in col:
        data_mean = df[i].mean()
        data_std = df[i].std()
        scaled = (df[i]-data_mean)/data_std
        df[i]=scaled
    return df

In [4]:
# 평가를 위한 함수 생성
def  get_clf_eval(y_test, pred=None, pred_proba=None):
    confusion = confusion_matrix(y_test, pred)
    accuracy = accuracy_score(y_test, pred)
    precision = precision_score(y_test, pred)
    recall = recall_score(y_test, pred)
    f1 = f1_score(y_test, pred)
#     roc_auc = roc_auc_score(y_test, pred_proba)
    
    print('오차 행렬')
    print(confusion)
 
    print('정확도: {0:.4f}, 정밀도: {1:.4f}, \
    재현율: {2:.4f}, F1: {3:.4f}'.format(accuracy, precision, recall, f1))

In [5]:
# 평가를 위한 함수 생성
def  get_multi_clf_eval(y_test, pred=None, pred_proba=None):
    confusion = confusion_matrix(y_test, pred)
    accuracy = accuracy_score(y_test, pred)
    precision = precision_score(y_test, pred, average='micro')
    recall = recall_score(y_test, pred, average='micro')
    f1 = f1_score(y_test, pred, average='micro')
#     roc_auc = roc_auc_score(y_test, pred_proba)
    
    print('오차 행렬')
    print(confusion)
 
    print('정확도: {0:.4f}, 정밀도: {1:.4f}, \
    재현율: {2:.4f}, F1: {3:.4f}'.format(accuracy, precision, recall, f1))

# 데이터 불러오기

- PassengerId는 'gggg_pp' 형태를 띔
- 같은 'gggg'는 그룹을 의미하며 pp는 그룹내에서의 번호를 의미함
- 이를 바탕으로 1차적으로 Cabin의 결측값을 채워줌
- 그후 Cabin을 문자/ 숫자/ 문자/ 문자+숫자 형태로 나눈 새로운 컬럼 4를 만들어 준 데이터 세트

- 개인
![nn](Individual.png)
<br>
- 그룹
![nn](Group1.png)
![nn](Group2.png)

In [6]:
spaceship = pd.read_excel('train_test_origin.xlsx')
print(spaceship.shape)

(12970, 18)


In [7]:
spaceship.head()

Unnamed: 0,PassengerId,HomePlanet,CryoSleep,Cabin1,Cabin2,Combi,Cabin3,Cabin,Destination,Age,VIP,RoomService,FoodCourt,ShoppingMall,Spa,VRDeck,Name,Transported
0,0001_01,Europa,0.0,B,0.0,B0,P,B/0/P,TRAPPIST-1e,39.0,0.0,0.0,0.0,0.0,0.0,0.0,Maham Ofracculy,0.0
1,0002_01,Earth,0.0,F,0.0,F0,S,F/0/S,TRAPPIST-1e,24.0,0.0,109.0,9.0,25.0,549.0,44.0,Juanna Vines,1.0
2,0003_01,Europa,0.0,A,0.0,A0,S,A/0/S,TRAPPIST-1e,58.0,1.0,43.0,3576.0,0.0,6715.0,49.0,Altark Susent,0.0
3,0003_02,Europa,0.0,A,0.0,A0,S,A/0/S,TRAPPIST-1e,33.0,0.0,0.0,1283.0,371.0,3329.0,193.0,Solam Susent,0.0
4,0004_01,Earth,0.0,F,1.0,F1,S,F/1/S,TRAPPIST-1e,16.0,0.0,303.0,70.0,151.0,565.0,2.0,Willy Santantines,1.0


# 데이터 탐색

## 데이터 정보 확인

In [8]:
spaceship.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12970 entries, 0 to 12969
Data columns (total 18 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   PassengerId   12970 non-null  object 
 1   HomePlanet    12691 non-null  object 
 2   CryoSleep     12660 non-null  float64
 3   Cabin1        12804 non-null  object 
 4   Cabin2        12804 non-null  float64
 5   Combi         12804 non-null  object 
 6   Cabin3        12804 non-null  object 
 7   Cabin         12804 non-null  object 
 8   Destination   12704 non-null  object 
 9   Age           12700 non-null  float64
 10  VIP           12674 non-null  float64
 11  RoomService   12776 non-null  float64
 12  FoodCourt     12752 non-null  float64
 13  ShoppingMall  12760 non-null  float64
 14  Spa           12754 non-null  float64
 15  VRDeck        12766 non-null  float64
 16  Name          12676 non-null  object 
 17  Transported   8693 non-null   float64
dtypes: float64(10), object(8)


## 결측값 확인

In [9]:
spaceship.isnull().sum()

PassengerId        0
HomePlanet       279
CryoSleep        310
Cabin1           166
Cabin2           166
Combi            166
Cabin3           166
Cabin            166
Destination      266
Age              270
VIP              296
RoomService      194
FoodCourt        218
ShoppingMall     210
Spa              216
VRDeck           204
Name             294
Transported     4277
dtype: int64

- 결측값 처리가 필요한 피처들 : 
<br> HomePlanet(279), CryoSleep(310), Destination(266), Age(270), VIP(296), RoomService(194), FoodCourt(218), ShoppingMall(210), Spa(216), VRDeck(204)

# 전처리

## 불필요한 피처 제거 : PassengerId, Name, Combi, Cabin, Transported

In [10]:
spaceship.drop(['PassengerId', 'Name','Combi','Cabin','Transported'], inplace=True, axis=1)

In [11]:
spaceship.info() # 제거 확인

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12970 entries, 0 to 12969
Data columns (total 13 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   HomePlanet    12691 non-null  object 
 1   CryoSleep     12660 non-null  float64
 2   Cabin1        12804 non-null  object 
 3   Cabin2        12804 non-null  float64
 4   Cabin3        12804 non-null  object 
 5   Destination   12704 non-null  object 
 6   Age           12700 non-null  float64
 7   VIP           12674 non-null  float64
 8   RoomService   12776 non-null  float64
 9   FoodCourt     12752 non-null  float64
 10  ShoppingMall  12760 non-null  float64
 11  Spa           12754 non-null  float64
 12  VRDeck        12766 non-null  float64
dtypes: float64(9), object(4)
memory usage: 1.3+ MB


## 피처(CryoSleep, Cabin3, VIP) Dtype 변경 : object ▶ boolean

In [12]:
# Cabin3의 P(좌현)를 False으로 S(우현)를 True로 변경
spaceship['Cabin3'].replace({'P': True,'S': False}, inplace=True)

# boolean으로 변환
spaceship['CryoSleep'] = spaceship['CryoSleep'].astype(bool)
spaceship['VIP'] = spaceship['VIP'].astype(bool)
spaceship['Cabin3'] = spaceship['Cabin3'].astype(bool)

# 변환 확인
spaceship.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12970 entries, 0 to 12969
Data columns (total 13 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   HomePlanet    12691 non-null  object 
 1   CryoSleep     12970 non-null  bool   
 2   Cabin1        12804 non-null  object 
 3   Cabin2        12804 non-null  float64
 4   Cabin3        12970 non-null  bool   
 5   Destination   12704 non-null  object 
 6   Age           12700 non-null  float64
 7   VIP           12970 non-null  bool   
 8   RoomService   12776 non-null  float64
 9   FoodCourt     12752 non-null  float64
 10  ShoppingMall  12760 non-null  float64
 11  Spa           12754 non-null  float64
 12  VRDeck        12766 non-null  float64
dtypes: bool(3), float64(7), object(3)
memory usage: 1.0+ MB


#  CryoSleep 결측값 처리

## 데이터 불러오기

In [13]:
cs_df = spaceship.iloc[:,:]
cs_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12970 entries, 0 to 12969
Data columns (total 13 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   HomePlanet    12691 non-null  object 
 1   CryoSleep     12970 non-null  bool   
 2   Cabin1        12804 non-null  object 
 3   Cabin2        12804 non-null  float64
 4   Cabin3        12970 non-null  bool   
 5   Destination   12704 non-null  object 
 6   Age           12700 non-null  float64
 7   VIP           12970 non-null  bool   
 8   RoomService   12776 non-null  float64
 9   FoodCourt     12752 non-null  float64
 10  ShoppingMall  12760 non-null  float64
 11  Spa           12754 non-null  float64
 12  VRDeck        12766 non-null  float64
dtypes: bool(3), float64(7), object(3)
memory usage: 1.0+ MB


## 전처리

In [14]:
cs_df.dropna(axis=0,inplace=True)
cs_df.shape

(11076, 13)

In [15]:
# 원-핫 인코딩 (cabin1, destination)

## HomePlanet
encoding = pd.get_dummies(cs_df.HomePlanet)
cs_df = cs_df.drop('HomePlanet', axis =1) # 기존 삭제
cs_df = cs_df.join(encoding) # 적용
## Cabin1
encoding = pd.get_dummies(cs_df.Cabin1)
cs_df = cs_df.drop('Cabin1', axis =1) # 기존 삭제
cs_df = cs_df.join(encoding) # 적용
## Destination
encoding = pd.get_dummies(cs_df.Destination)
cs_df = cs_df.drop('Destination', axis =1) # 기존 삭제
cs_df = cs_df.join(encoding) # 적용

In [16]:
cs_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 11076 entries, 0 to 12969
Data columns (total 24 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   CryoSleep      11076 non-null  bool   
 1   Cabin2         11076 non-null  float64
 2   Cabin3         11076 non-null  bool   
 3   Age            11076 non-null  float64
 4   VIP            11076 non-null  bool   
 5   RoomService    11076 non-null  float64
 6   FoodCourt      11076 non-null  float64
 7   ShoppingMall   11076 non-null  float64
 8   Spa            11076 non-null  float64
 9   VRDeck         11076 non-null  float64
 10  Earth          11076 non-null  uint8  
 11  Europa         11076 non-null  uint8  
 12  Mars           11076 non-null  uint8  
 13  A              11076 non-null  uint8  
 14  B              11076 non-null  uint8  
 15  C              11076 non-null  uint8  
 16  D              11076 non-null  uint8  
 17  E              11076 non-null  uint8  
 18  F     

In [17]:
# 스케일링

## 위에 정의된 스케일링 함수 호출
col = ['Cabin2', 'RoomService','FoodCourt','ShoppingMall','Spa','VRDeck']
data_scaled(cs_df, col)

Unnamed: 0,CryoSleep,Cabin2,Cabin3,Age,VIP,RoomService,FoodCourt,ShoppingMall,Spa,VRDeck,...,B,C,D,E,F,G,T,55 Cancri e,PSO J318.5-22,TRAPPIST-1e
0,False,-1.171058,True,39.0,False,-0.334616,-0.282674,-0.285975,-0.271173,-0.260034,...,1,0,0,0,0,0,0,0,0,1
1,False,-1.171058,False,24.0,False,-0.166861,-0.277057,-0.244125,0.217775,-0.220858,...,0,0,0,0,1,0,0,0,0,1
2,False,-1.171058,False,58.0,True,-0.268437,1.949128,-0.285975,5.709312,-0.216406,...,0,0,0,0,0,0,0,0,0,1
3,False,-1.171058,False,33.0,False,-0.334616,0.518053,0.335083,2.693687,-0.088194,...,0,0,0,0,0,0,0,0,0,1
4,False,-1.169112,False,16.0,False,0.131712,-0.238987,-0.033199,0.232025,-0.258253,...,0,0,0,0,1,0,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12962,True,1.737845,False,43.0,False,-0.334616,-0.282674,-0.285975,-0.271173,-0.260034,...,0,0,0,0,0,1,0,0,0,1
12963,False,-0.630138,False,43.0,False,-0.262281,-0.282674,6.160647,-0.271173,-0.260034,...,0,0,1,0,0,0,0,0,0,1
12964,False,2.323517,False,40.0,False,-0.334616,0.257177,-0.285975,-0.268502,-0.260034,...,0,0,0,0,1,0,0,0,0,1
12965,True,1.739790,False,34.0,False,-0.334616,-0.282674,-0.285975,-0.271173,-0.260034,...,0,0,0,0,0,1,0,0,0,1


## 데이터와 타겟값 분리

In [18]:
cs_data = cs_df.drop('CryoSleep',axis=1)
cs_label = cs_df.CryoSleep

In [19]:
cs_data.head()

Unnamed: 0,Cabin2,Cabin3,Age,VIP,RoomService,FoodCourt,ShoppingMall,Spa,VRDeck,Earth,...,B,C,D,E,F,G,T,55 Cancri e,PSO J318.5-22,TRAPPIST-1e
0,-1.171058,True,39.0,False,-0.334616,-0.282674,-0.285975,-0.271173,-0.260034,0,...,1,0,0,0,0,0,0,0,0,1
1,-1.171058,False,24.0,False,-0.166861,-0.277057,-0.244125,0.217775,-0.220858,1,...,0,0,0,0,1,0,0,0,0,1
2,-1.171058,False,58.0,True,-0.268437,1.949128,-0.285975,5.709312,-0.216406,0,...,0,0,0,0,0,0,0,0,0,1
3,-1.171058,False,33.0,False,-0.334616,0.518053,0.335083,2.693687,-0.088194,0,...,0,0,0,0,0,0,0,0,0,1
4,-1.169112,False,16.0,False,0.131712,-0.238987,-0.033199,0.232025,-0.258253,1,...,0,0,0,0,1,0,0,0,0,1


## train, test 데이터세트 분리

In [20]:
X_train, X_test, y_train, y_test = train_test_split(cs_data,cs_label, random_state=109)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train)

## 모델링_XGBClassifier

### 그리드 서치

In [21]:
# 그리드 서치를 이용한 최적의 파라미터 찾기

xgbo = xgb.XGBClassifier()

params = {
    'max_depth':[2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
                 19, 20, 21, 22, 23, 24, 25, None],
    'learning_rate':[0.01, 0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4],
    'gamma':[0.5, 1, 1.5, 2, 2.5, 3, 3.5, 4, 4.5, 5],
    'random_state':[109]
}

gs = GridSearchCV(xgbo, param_grid = params, cv = 3, refit = True,
                  n_jobs=-1)
gs.fit(X_train, y_train)

In [22]:
# 평가 결과
model = gs.best_estimator_
print(model.score(X_train, y_train))
print(model.score(X_test, y_test))
print(model.score(X_val, y_val))

0.9457463884430176
0.939689418562658
0.9383726528647087


In [23]:
# 최적의 파라미터값
print(gs.best_params_)

{'gamma': 1.5, 'learning_rate': 0.3, 'max_depth': 4, 'random_state': 109}


In [24]:
# 그리드서치를 통해 얻어진 파라미터를 적용시킨 XGBClassifier 모델로 학습
xgbo_gs = xgb.XGBClassifier(gamma=5, learning_rate=0.4, max_depth=6, random_state=109)
xgbo_gs.fit(X_train, y_train)

In [25]:
# 앞서 만든 평가 함수(사용자 지정 함수)를 위해 필요한 변수들
train_pred = xgbo_gs.predict(X_train)
train_proba = xgbo_gs.predict_proba(X_train)

test_pred = xgbo_gs.predict(X_test)
test_proba = xgbo_gs.predict_proba(X_test)

val_pred = xgbo_gs.predict(X_val)
val_proba = xgbo_gs.predict_proba(X_val)

In [26]:
print('훈련셋 평가 지표\n')
get_clf_eval(y_train, train_pred, train_proba)
print('\n\n 테스트셋 평가 지표\n')
get_clf_eval(y_test, test_pred, test_proba)
print('\n\n 검증셋 평가 지표\n')
get_clf_eval(y_val, val_pred, val_proba)

훈련셋 평가 지표

오차 행렬
[[3566  209]
 [ 146 2309]]
정확도: 0.9430, 정밀도: 0.9170,     재현율: 0.9405, F1: 0.9286


 테스트셋 평가 지표

오차 행렬
[[1590   96]
 [  67 1016]]
정확도: 0.9411, 정밀도: 0.9137,     재현율: 0.9381, F1: 0.9257


 검증셋 평가 지표

오차 행렬
[[1193   65]
 [  55  764]]
정확도: 0.9422, 정밀도: 0.9216,     재현율: 0.9328, F1: 0.9272


### HyperOPT

In [27]:
xgb_search_space = {'max_depth': hp.quniform('max_depth', 5, 15, 1),
                   'min_child_weight': hp.quniform('min_child_weight', 1, 20, 1),
                   'colsample_bytree': hp.uniform('colsample_bytree', 0.5, 0.95),
                   'learning_rate': hp.uniform('learning_rate', 0.01, 0.4),
                   'gamma': hp.uniform('gamma', 0, 4)}

In [28]:
# fmin()에서 호출 시 search_space 값으로 XGBClassifier 교차 검증 학습 후 -1 * roc_auc 평균 값을 반환
def bin_objective_func(search_space):
    xgb_clf = xgb.XGBClassifier(n_estimators=100, max_depth=int(search_space['max_depth']),
                            min_child_weight=int(search_space['min_child_weight']),
                            colsample_bytree=search_space['colsample_bytree'],
                            learning_rate=search_space['learning_rate'],
                            gamma=search_space['gamma'])
    
    # 3개 k-fold 방식으로 평가된 roc_auc 지표를 담는 list
    roc_auc_list = []
    
    # 3개 k-fold 방식 적용
    kf = KFold(n_splits=3)
    
    # X_train을 다시 학습과 검증용 데이터로 분리
    for tr_index, val_index in kf.split(X_train):
        # kf.split(X_train)으로 추출된 학습과 검증 index 값으로 학습과 검증 데이터 세트 분리
        X_tr, y_tr = X_train.iloc[tr_index], y_train.iloc[tr_index]
        X_val, y_val = X_train.iloc[val_index], y_train.iloc[val_index]
        
        # early stopping은 30회로 설정하고 추출된 학습과 검증 데이터로 XGBClassifier 학습 수행
        xgb_clf.fit(X_tr, y_tr, early_stopping_rounds=30, eval_metric="auc",
                   eval_set=[(X_tr, y_tr), (X_val, y_val)])
        
        # 1로 예측한 확률값 추출 후 roc auc 계산하고 평균 roc auc 계산을 위해 list에 결과값 담음.
        score = roc_auc_score(y_val, xgb_clf.predict_proba(X_val)[:,1])
        roc_auc_list.append(score)
        
    # 3개 k-fold로 계산된 roc_auc 값의 평균값을 반환하되,
    # HyperOPT는 목적함수의 최솟값을 위한 입력값을 찾으므로 -1을 곱한 뒤 반환
    return -1*np.mean(roc_auc_list)

In [29]:
# HyperOPT를 이용한 최적의 파라미터 찾기

trials = Trials()

# fmin() 함수를 호출. max_evals 지정된 횟수만큼 반복 후
# 목적함수의 최솟값을 가지는 최적 입력값 추출
best = fmin(fn=bin_objective_func,
           space=xgb_search_space,
           algo=tpe.suggest,
           max_evals=50, # 최대 반복 횟수를 지정
           trials=trials, rstate=np.random.default_rng(seed=109))

print('best:', best)

[0]	validation_0-auc:0.95288	validation_1-auc:0.94657                                                                  
[1]	validation_0-auc:0.95631	validation_1-auc:0.94681                                                                  
[2]	validation_0-auc:0.97439	validation_1-auc:0.97006                                                                  
[3]	validation_0-auc:0.97489	validation_1-auc:0.97030                                                                  
[4]	validation_0-auc:0.97944	validation_1-auc:0.97099                                                                  
[5]	validation_0-auc:0.98087	validation_1-auc:0.97191                                                                  
[6]	validation_0-auc:0.98332	validation_1-auc:0.97290                                                                  
[7]	validation_0-auc:0.98369	validation_1-auc:0.97347                                                                  
[8]	validation_0-auc:0.98470	validation_

[19]	validation_0-auc:0.98935	validation_1-auc:0.97469                                                                 
[20]	validation_0-auc:0.98965	validation_1-auc:0.97445                                                                 
[21]	validation_0-auc:0.98968	validation_1-auc:0.97442                                                                 
[22]	validation_0-auc:0.98968	validation_1-auc:0.97442                                                                 
[23]	validation_0-auc:0.98998	validation_1-auc:0.97417                                                                 
[24]	validation_0-auc:0.98998	validation_1-auc:0.97417                                                                 
[25]	validation_0-auc:0.98998	validation_1-auc:0.97417                                                                 
[26]	validation_0-auc:0.99005	validation_1-auc:0.97396                                                                 
[27]	validation_0-auc:0.99068	validation

[16]	validation_0-auc:0.97534	validation_1-auc:0.97354                                                                 
[17]	validation_0-auc:0.97666	validation_1-auc:0.97372                                                                 
[18]	validation_0-auc:0.97669	validation_1-auc:0.97394                                                                 
[19]	validation_0-auc:0.97663	validation_1-auc:0.97368                                                                 
[20]	validation_0-auc:0.97684	validation_1-auc:0.97400                                                                 
[21]	validation_0-auc:0.97794	validation_1-auc:0.97452                                                                 
[22]	validation_0-auc:0.97812	validation_1-auc:0.97408                                                                 
[23]	validation_0-auc:0.97871	validation_1-auc:0.97413                                                                 
[24]	validation_0-auc:0.97912	validation

[14]	validation_0-auc:0.97425	validation_1-auc:0.97361                                                                 
[15]	validation_0-auc:0.97500	validation_1-auc:0.97398                                                                 
[16]	validation_0-auc:0.97541	validation_1-auc:0.97410                                                                 
[17]	validation_0-auc:0.97682	validation_1-auc:0.97430                                                                 
[18]	validation_0-auc:0.97729	validation_1-auc:0.97437                                                                 
[19]	validation_0-auc:0.97720	validation_1-auc:0.97485                                                                 
[20]	validation_0-auc:0.97770	validation_1-auc:0.97468                                                                 
[21]	validation_0-auc:0.97842	validation_1-auc:0.97480                                                                 
[22]	validation_0-auc:0.97835	validation

[18]	validation_0-auc:0.97723	validation_1-auc:0.96891                                                                 
[19]	validation_0-auc:0.97765	validation_1-auc:0.96804                                                                 
[20]	validation_0-auc:0.97810	validation_1-auc:0.96812                                                                 
[21]	validation_0-auc:0.97806	validation_1-auc:0.96816                                                                 
[22]	validation_0-auc:0.97802	validation_1-auc:0.96796                                                                 
[23]	validation_0-auc:0.97884	validation_1-auc:0.96857                                                                 
[24]	validation_0-auc:0.97942	validation_1-auc:0.96802                                                                 
[25]	validation_0-auc:0.98014	validation_1-auc:0.96795                                                                 
[26]	validation_0-auc:0.98036	validation

[0]	validation_0-auc:0.95161	validation_1-auc:0.95610                                                                  
[1]	validation_0-auc:0.95454	validation_1-auc:0.95652                                                                  
[2]	validation_0-auc:0.97575	validation_1-auc:0.97397                                                                  
[3]	validation_0-auc:0.98192	validation_1-auc:0.97589                                                                  
[4]	validation_0-auc:0.98261	validation_1-auc:0.97475                                                                  
[5]	validation_0-auc:0.98354	validation_1-auc:0.97575                                                                  
[6]	validation_0-auc:0.98678	validation_1-auc:0.97498                                                                  
[7]	validation_0-auc:0.98877	validation_1-auc:0.97444                                                                  
[8]	validation_0-auc:0.98911	validation_

[35]	validation_0-auc:0.99708	validation_1-auc:0.96922                                                                 
[36]	validation_0-auc:0.99717	validation_1-auc:0.96932                                                                 
[37]	validation_0-auc:0.99724	validation_1-auc:0.96924                                                                 
[38]	validation_0-auc:0.99735	validation_1-auc:0.96953                                                                 
[39]	validation_0-auc:0.99746	validation_1-auc:0.96971                                                                 
[40]	validation_0-auc:0.99749	validation_1-auc:0.96969                                                                 
[41]	validation_0-auc:0.99757	validation_1-auc:0.96986                                                                 
[42]	validation_0-auc:0.99764	validation_1-auc:0.96978                                                                 
[43]	validation_0-auc:0.99770	validation

[29]	validation_0-auc:0.97396	validation_1-auc:0.97065                                                                 
[30]	validation_0-auc:0.97439	validation_1-auc:0.97011                                                                 
[31]	validation_0-auc:0.97518	validation_1-auc:0.97148                                                                 
[32]	validation_0-auc:0.97525	validation_1-auc:0.97119                                                                 
[33]	validation_0-auc:0.97519	validation_1-auc:0.97113                                                                 
[34]	validation_0-auc:0.97519	validation_1-auc:0.97102                                                                 
[35]	validation_0-auc:0.97523	validation_1-auc:0.97095                                                                 
[36]	validation_0-auc:0.97554	validation_1-auc:0.97118                                                                 
[37]	validation_0-auc:0.97559	validation

[97]	validation_0-auc:0.97920	validation_1-auc:0.97295                                                                 
[98]	validation_0-auc:0.97925	validation_1-auc:0.97293                                                                 
[99]	validation_0-auc:0.97926	validation_1-auc:0.97292                                                                 
[0]	validation_0-auc:0.94056	validation_1-auc:0.94620                                                                  
[1]	validation_0-auc:0.96450	validation_1-auc:0.96932                                                                  
[2]	validation_0-auc:0.96397	validation_1-auc:0.96844                                                                  
[3]	validation_0-auc:0.96466	validation_1-auc:0.96888                                                                  
[4]	validation_0-auc:0.96630	validation_1-auc:0.96961                                                                  
[5]	validation_0-auc:0.96728	validation_

[3]	validation_0-auc:0.96875	validation_1-auc:0.96260                                                                  
[4]	validation_0-auc:0.96979	validation_1-auc:0.96362                                                                  
[5]	validation_0-auc:0.97008	validation_1-auc:0.96297                                                                  
[6]	validation_0-auc:0.97175	validation_1-auc:0.96441                                                                  
[7]	validation_0-auc:0.97152	validation_1-auc:0.96404                                                                  
[8]	validation_0-auc:0.97228	validation_1-auc:0.96526                                                                  
[9]	validation_0-auc:0.97241	validation_1-auc:0.96526                                                                  
[10]	validation_0-auc:0.97253	validation_1-auc:0.96532                                                                 
[11]	validation_0-auc:0.97277	validation

[0]	validation_0-auc:0.94891	validation_1-auc:0.94405                                                                  
[1]	validation_0-auc:0.95211	validation_1-auc:0.94669                                                                  
[2]	validation_0-auc:0.96888	validation_1-auc:0.96724                                                                  
[3]	validation_0-auc:0.97087	validation_1-auc:0.97057                                                                  
[4]	validation_0-auc:0.97174	validation_1-auc:0.96972                                                                  
[5]	validation_0-auc:0.97138	validation_1-auc:0.96950                                                                  
[6]	validation_0-auc:0.97176	validation_1-auc:0.96965                                                                  
[7]	validation_0-auc:0.97065	validation_1-auc:0.96830                                                                  
[8]	validation_0-auc:0.97074	validation_

[68]	validation_0-auc:0.97399	validation_1-auc:0.97311                                                                 
[69]	validation_0-auc:0.97402	validation_1-auc:0.97309                                                                 
[70]	validation_0-auc:0.97402	validation_1-auc:0.97344                                                                 
[71]	validation_0-auc:0.97403	validation_1-auc:0.97355                                                                 
[72]	validation_0-auc:0.97408	validation_1-auc:0.97356                                                                 
[73]	validation_0-auc:0.97411	validation_1-auc:0.97356                                                                 
[74]	validation_0-auc:0.97419	validation_1-auc:0.97362                                                                 
[75]	validation_0-auc:0.97426	validation_1-auc:0.97393                                                                 
[76]	validation_0-auc:0.97427	validation

[36]	validation_0-auc:0.97307	validation_1-auc:0.97356                                                                 
[37]	validation_0-auc:0.97304	validation_1-auc:0.97341                                                                 
[38]	validation_0-auc:0.97298	validation_1-auc:0.97342                                                                 
[39]	validation_0-auc:0.97347	validation_1-auc:0.97379                                                                 
[40]	validation_0-auc:0.97349	validation_1-auc:0.97386                                                                 
[41]	validation_0-auc:0.97346	validation_1-auc:0.97378                                                                 
[42]	validation_0-auc:0.97350	validation_1-auc:0.97384                                                                 
[43]	validation_0-auc:0.97350	validation_1-auc:0.97384                                                                 
[44]	validation_0-auc:0.97311	validation

[28]	validation_0-auc:0.97547	validation_1-auc:0.96770                                                                 
[29]	validation_0-auc:0.97546	validation_1-auc:0.96782                                                                 
[30]	validation_0-auc:0.97541	validation_1-auc:0.96795                                                                 
[31]	validation_0-auc:0.97541	validation_1-auc:0.96796                                                                 
[32]	validation_0-auc:0.97533	validation_1-auc:0.96808                                                                 
[33]	validation_0-auc:0.97528	validation_1-auc:0.96811                                                                 
[34]	validation_0-auc:0.97568	validation_1-auc:0.96773                                                                 
[35]	validation_0-auc:0.97572	validation_1-auc:0.96763                                                                 
[36]	validation_0-auc:0.97572	validation

[96]	validation_0-auc:0.97840	validation_1-auc:0.96799                                                                 
[97]	validation_0-auc:0.97861	validation_1-auc:0.96791                                                                 
[98]	validation_0-auc:0.97898	validation_1-auc:0.96790                                                                 
[99]	validation_0-auc:0.97922	validation_1-auc:0.96781                                                                 
[0]	validation_0-auc:0.94953	validation_1-auc:0.94229                                                                  
[1]	validation_0-auc:0.96969	validation_1-auc:0.96870                                                                  
[2]	validation_0-auc:0.97041	validation_1-auc:0.96980                                                                  
[3]	validation_0-auc:0.97300	validation_1-auc:0.97242                                                                  
[4]	validation_0-auc:0.97436	validation_

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



[34]	validation_0-auc:0.98325	validation_1-auc:0.96736                                                                 
[35]	validation_0-auc:0.98371	validation_1-auc:0.96713                                                                 
[36]	validation_0-auc:0.98405	validation_1-auc:0.96759                                                                 
[37]	validation_0-auc:0.98390	validation_1-auc:0.96748                                                                 
[38]	validation_0-auc:0.98421	validation_1-auc:0.96742                                                                 
[39]	validation_0-auc:0.98450	validation_1-auc:0.96766                                                                 
[40]	validation_0-auc:0.98458	validation_1-auc:0.96794                                                                 
[41]	validation_0-auc:0.98481	validation_1-auc:0.96789                                                                 
[42]	validation_0-auc:0.98516	validation

[17]	validation_0-auc:0.97965	validation_1-auc:0.97356                                                                 
[18]	validation_0-auc:0.97963	validation_1-auc:0.97347                                                                 
[19]	validation_0-auc:0.97965	validation_1-auc:0.97350                                                                 
[20]	validation_0-auc:0.98011	validation_1-auc:0.97375                                                                 
[21]	validation_0-auc:0.98049	validation_1-auc:0.97393                                                                 
[22]	validation_0-auc:0.98055	validation_1-auc:0.97381                                                                 
[23]	validation_0-auc:0.98073	validation_1-auc:0.97383                                                                 
[24]	validation_0-auc:0.98070	validation_1-auc:0.97372                                                                 
[25]	validation_0-auc:0.98074	validation

[23]	validation_0-auc:0.98275	validation_1-auc:0.96676                                                                 
[24]	validation_0-auc:0.98278	validation_1-auc:0.96683                                                                 
[25]	validation_0-auc:0.98291	validation_1-auc:0.96671                                                                 
[26]	validation_0-auc:0.98292	validation_1-auc:0.96656                                                                 
[27]	validation_0-auc:0.98312	validation_1-auc:0.96645                                                                 
[28]	validation_0-auc:0.98324	validation_1-auc:0.96661                                                                 
[29]	validation_0-auc:0.98333	validation_1-auc:0.96652                                                                 
[30]	validation_0-auc:0.98346	validation_1-auc:0.96648                                                                 
[31]	validation_0-auc:0.98370	validation

[54]	validation_0-auc:0.97669	validation_1-auc:0.97466                                                                 
[55]	validation_0-auc:0.97661	validation_1-auc:0.97460                                                                 
[56]	validation_0-auc:0.97738	validation_1-auc:0.97473                                                                 
[57]	validation_0-auc:0.97742	validation_1-auc:0.97476                                                                 
[58]	validation_0-auc:0.97740	validation_1-auc:0.97475                                                                 
[59]	validation_0-auc:0.97741	validation_1-auc:0.97466                                                                 
[60]	validation_0-auc:0.97743	validation_1-auc:0.97463                                                                 
[61]	validation_0-auc:0.97760	validation_1-auc:0.97429                                                                 
[62]	validation_0-auc:0.97762	validation

[22]	validation_0-auc:0.97227	validation_1-auc:0.97298                                                                 
[23]	validation_0-auc:0.97215	validation_1-auc:0.97293                                                                 
[24]	validation_0-auc:0.97212	validation_1-auc:0.97282                                                                 
[25]	validation_0-auc:0.97243	validation_1-auc:0.97285                                                                 
[26]	validation_0-auc:0.97249	validation_1-auc:0.97296                                                                 
[27]	validation_0-auc:0.97249	validation_1-auc:0.97292                                                                 
[28]	validation_0-auc:0.97366	validation_1-auc:0.97233                                                                 
[29]	validation_0-auc:0.97353	validation_1-auc:0.97220                                                                 
[30]	validation_0-auc:0.97351	validation

[38]	validation_0-auc:0.97705	validation_1-auc:0.96808                                                                 
[39]	validation_0-auc:0.97693	validation_1-auc:0.96817                                                                 
[40]	validation_0-auc:0.97697	validation_1-auc:0.96827                                                                 
[41]	validation_0-auc:0.97712	validation_1-auc:0.96841                                                                 
[42]	validation_0-auc:0.97715	validation_1-auc:0.96850                                                                 
[43]	validation_0-auc:0.97771	validation_1-auc:0.96830                                                                 
[44]	validation_0-auc:0.97769	validation_1-auc:0.96823                                                                 
[45]	validation_0-auc:0.97773	validation_1-auc:0.96829                                                                 
[46]	validation_0-auc:0.97773	validation

[0]	validation_0-auc:0.94851	validation_1-auc:0.95441                                                                  
[1]	validation_0-auc:0.95361	validation_1-auc:0.95625                                                                  
[2]	validation_0-auc:0.97315	validation_1-auc:0.97472                                                                  
[3]	validation_0-auc:0.98053	validation_1-auc:0.97443                                                                  
[4]	validation_0-auc:0.98168	validation_1-auc:0.97471                                                                  
[5]	validation_0-auc:0.98240	validation_1-auc:0.97499                                                                  
[6]	validation_0-auc:0.98355	validation_1-auc:0.97416                                                                  
[7]	validation_0-auc:0.98395	validation_1-auc:0.97402                                                                  
[8]	validation_0-auc:0.98445	validation_

[25]	validation_0-auc:0.99170	validation_1-auc:0.96785                                                                 
[26]	validation_0-auc:0.99191	validation_1-auc:0.96841                                                                 
[27]	validation_0-auc:0.99224	validation_1-auc:0.96822                                                                 
[28]	validation_0-auc:0.99223	validation_1-auc:0.96827                                                                 
[29]	validation_0-auc:0.99232	validation_1-auc:0.96825                                                                 
[30]	validation_0-auc:0.99244	validation_1-auc:0.96798                                                                 
[31]	validation_0-auc:0.99274	validation_1-auc:0.96776                                                                 
[32]	validation_0-auc:0.99285	validation_1-auc:0.96764                                                                 
[0]	validation_0-auc:0.94455	validation_

[17]	validation_0-auc:0.97382	validation_1-auc:0.97202                                                                 
[18]	validation_0-auc:0.97411	validation_1-auc:0.97165                                                                 
[19]	validation_0-auc:0.97574	validation_1-auc:0.97221                                                                 
[20]	validation_0-auc:0.97713	validation_1-auc:0.97263                                                                 
[21]	validation_0-auc:0.97731	validation_1-auc:0.97328                                                                 
[22]	validation_0-auc:0.97730	validation_1-auc:0.97299                                                                 
[23]	validation_0-auc:0.97730	validation_1-auc:0.97299                                                                 
[24]	validation_0-auc:0.97736	validation_1-auc:0.97299                                                                 
[25]	validation_0-auc:0.97744	validation

[26]	validation_0-auc:0.98134	validation_1-auc:0.96693                                                                 
[27]	validation_0-auc:0.98119	validation_1-auc:0.96647                                                                 
[28]	validation_0-auc:0.98120	validation_1-auc:0.96646                                                                 
[29]	validation_0-auc:0.98120	validation_1-auc:0.96646                                                                 
[30]	validation_0-auc:0.98120	validation_1-auc:0.96646                                                                 
[31]	validation_0-auc:0.98120	validation_1-auc:0.96646                                                                 
[32]	validation_0-auc:0.98127	validation_1-auc:0.96609                                                                 
[33]	validation_0-auc:0.98127	validation_1-auc:0.96609                                                                 
[34]	validation_0-auc:0.98127	validation

[52]	validation_0-auc:0.99540	validation_1-auc:0.97247                                                                 
[53]	validation_0-auc:0.99547	validation_1-auc:0.97224                                                                 
[54]	validation_0-auc:0.99547	validation_1-auc:0.97224                                                                 
[55]	validation_0-auc:0.99552	validation_1-auc:0.97221                                                                 
[56]	validation_0-auc:0.99552	validation_1-auc:0.97221                                                                 
[57]	validation_0-auc:0.99552	validation_1-auc:0.97221                                                                 
[58]	validation_0-auc:0.99552	validation_1-auc:0.97221                                                                 
[59]	validation_0-auc:0.99557	validation_1-auc:0.97236                                                                 
[0]	validation_0-auc:0.95058	validation_

[60]	validation_0-auc:0.99470	validation_1-auc:0.97568                                                                 
[61]	validation_0-auc:0.99474	validation_1-auc:0.97581                                                                 
[62]	validation_0-auc:0.99477	validation_1-auc:0.97591                                                                 
[63]	validation_0-auc:0.99484	validation_1-auc:0.97570                                                                 
[64]	validation_0-auc:0.99487	validation_1-auc:0.97567                                                                 
[65]	validation_0-auc:0.99496	validation_1-auc:0.97575                                                                 
[66]	validation_0-auc:0.99496	validation_1-auc:0.97575                                                                 
[0]	validation_0-auc:0.95189	validation_1-auc:0.94189                                                                  
[1]	validation_0-auc:0.95445	validation_

[61]	validation_0-auc:0.99527	validation_1-auc:0.96775                                                                 
[62]	validation_0-auc:0.99529	validation_1-auc:0.96773                                                                 
[63]	validation_0-auc:0.99533	validation_1-auc:0.96762                                                                 
[64]	validation_0-auc:0.99537	validation_1-auc:0.96766                                                                 
[65]	validation_0-auc:0.99544	validation_1-auc:0.96765                                                                 
[66]	validation_0-auc:0.99544	validation_1-auc:0.96765                                                                 
[67]	validation_0-auc:0.99544	validation_1-auc:0.96765                                                                 
[0]	validation_0-auc:0.94804	validation_1-auc:0.94313                                                                  
[1]	validation_0-auc:0.95612	validation_

[26]	validation_0-auc:0.98304	validation_1-auc:0.97393                                                                 
[27]	validation_0-auc:0.98327	validation_1-auc:0.97351                                                                 
[28]	validation_0-auc:0.98323	validation_1-auc:0.97372                                                                 
[29]	validation_0-auc:0.98323	validation_1-auc:0.97372                                                                 
[30]	validation_0-auc:0.98323	validation_1-auc:0.97372                                                                 
[31]	validation_0-auc:0.98323	validation_1-auc:0.97372                                                                 
[32]	validation_0-auc:0.98331	validation_1-auc:0.97360                                                                 
[33]	validation_0-auc:0.98331	validation_1-auc:0.97360                                                                 
[34]	validation_0-auc:0.98346	validation

[14]	validation_0-auc:0.99165	validation_1-auc:0.97102                                                                 
[15]	validation_0-auc:0.99170	validation_1-auc:0.97154                                                                 
[16]	validation_0-auc:0.99235	validation_1-auc:0.97292                                                                 
[17]	validation_0-auc:0.99274	validation_1-auc:0.97338                                                                 
[18]	validation_0-auc:0.99324	validation_1-auc:0.97279                                                                 
[19]	validation_0-auc:0.99344	validation_1-auc:0.97287                                                                 
[20]	validation_0-auc:0.99375	validation_1-auc:0.97258                                                                 
[21]	validation_0-auc:0.99398	validation_1-auc:0.97252                                                                 
[22]	validation_0-auc:0.99415	validation

[34]	validation_0-auc:0.99481	validation_1-auc:0.97508                                                                 
[35]	validation_0-auc:0.99501	validation_1-auc:0.97496                                                                 
[36]	validation_0-auc:0.99523	validation_1-auc:0.97467                                                                 
[37]	validation_0-auc:0.99533	validation_1-auc:0.97461                                                                 
[38]	validation_0-auc:0.99553	validation_1-auc:0.97437                                                                 
[39]	validation_0-auc:0.99559	validation_1-auc:0.97477                                                                 
[40]	validation_0-auc:0.99585	validation_1-auc:0.97455                                                                 
[41]	validation_0-auc:0.99606	validation_1-auc:0.97452                                                                 
[42]	validation_0-auc:0.99619	validation

[39]	validation_0-auc:0.99590	validation_1-auc:0.96921                                                                 
[40]	validation_0-auc:0.99595	validation_1-auc:0.96913                                                                 
[41]	validation_0-auc:0.99603	validation_1-auc:0.96907                                                                 
[42]	validation_0-auc:0.99632	validation_1-auc:0.96894                                                                 
[43]	validation_0-auc:0.99639	validation_1-auc:0.96888                                                                 
[44]	validation_0-auc:0.99642	validation_1-auc:0.96885                                                                 
[45]	validation_0-auc:0.99649	validation_1-auc:0.96887                                                                 
[46]	validation_0-auc:0.99653	validation_1-auc:0.96876                                                                 
[47]	validation_0-auc:0.99671	validation

[0]	validation_0-auc:0.94628	validation_1-auc:0.95017                                                                  
[1]	validation_0-auc:0.95002	validation_1-auc:0.95371                                                                  
[2]	validation_0-auc:0.96875	validation_1-auc:0.97020                                                                  
[3]	validation_0-auc:0.97049	validation_1-auc:0.97175                                                                  
[4]	validation_0-auc:0.97106	validation_1-auc:0.97218                                                                  
[5]	validation_0-auc:0.97357	validation_1-auc:0.97122                                                                  
[6]	validation_0-auc:0.97387	validation_1-auc:0.97159                                                                  
[7]	validation_0-auc:0.97331	validation_1-auc:0.97166                                                                  
[8]	validation_0-auc:0.97234	validation_

[3]	validation_0-auc:0.97466	validation_1-auc:0.96575                                                                  
[4]	validation_0-auc:0.97493	validation_1-auc:0.96724                                                                  
[5]	validation_0-auc:0.97575	validation_1-auc:0.96812                                                                  
[6]	validation_0-auc:0.97584	validation_1-auc:0.97003                                                                  
[7]	validation_0-auc:0.97611	validation_1-auc:0.96856                                                                  
[8]	validation_0-auc:0.97547	validation_1-auc:0.96818                                                                  
[9]	validation_0-auc:0.97538	validation_1-auc:0.96827                                                                  
[10]	validation_0-auc:0.97596	validation_1-auc:0.96740                                                                 
[11]	validation_0-auc:0.97661	validation

[35]	validation_0-auc:0.98682	validation_1-auc:0.97197                                                                 
[36]	validation_0-auc:0.98695	validation_1-auc:0.97211                                                                 
[37]	validation_0-auc:0.98720	validation_1-auc:0.97237                                                                 
[38]	validation_0-auc:0.98747	validation_1-auc:0.97238                                                                 
[39]	validation_0-auc:0.98772	validation_1-auc:0.97238                                                                 
[40]	validation_0-auc:0.98770	validation_1-auc:0.97234                                                                 
[41]	validation_0-auc:0.98784	validation_1-auc:0.97241                                                                 
[42]	validation_0-auc:0.98799	validation_1-auc:0.97231                                                                 
[0]	validation_0-auc:0.94771	validation_

[60]	validation_0-auc:0.98782	validation_1-auc:0.97500                                                                 
[61]	validation_0-auc:0.98782	validation_1-auc:0.97500                                                                 
[62]	validation_0-auc:0.98787	validation_1-auc:0.97502                                                                 
[63]	validation_0-auc:0.98800	validation_1-auc:0.97508                                                                 
[64]	validation_0-auc:0.98800	validation_1-auc:0.97508                                                                 
[65]	validation_0-auc:0.98805	validation_1-auc:0.97504                                                                 
[66]	validation_0-auc:0.98814	validation_1-auc:0.97498                                                                 
[67]	validation_0-auc:0.98819	validation_1-auc:0.97501                                                                 
[68]	validation_0-auc:0.98831	validation

[46]	validation_0-auc:0.98950	validation_1-auc:0.96769                                                                 
[47]	validation_0-auc:0.98951	validation_1-auc:0.96777                                                                 
[48]	validation_0-auc:0.98951	validation_1-auc:0.96777                                                                 
[49]	validation_0-auc:0.98962	validation_1-auc:0.96760                                                                 
[50]	validation_0-auc:0.98962	validation_1-auc:0.96760                                                                 
[51]	validation_0-auc:0.98962	validation_1-auc:0.96760                                                                 
[52]	validation_0-auc:0.98973	validation_1-auc:0.96757                                                                 
[53]	validation_0-auc:0.98973	validation_1-auc:0.96757                                                                 
[54]	validation_0-auc:0.98973	validation

[52]	validation_0-auc:0.97859	validation_1-auc:0.97316                                                                 
[53]	validation_0-auc:0.97883	validation_1-auc:0.97307                                                                 
[54]	validation_0-auc:0.97900	validation_1-auc:0.97328                                                                 
[55]	validation_0-auc:0.97899	validation_1-auc:0.97324                                                                 
[56]	validation_0-auc:0.97906	validation_1-auc:0.97305                                                                 
[57]	validation_0-auc:0.97905	validation_1-auc:0.97314                                                                 
[58]	validation_0-auc:0.97931	validation_1-auc:0.97319                                                                 
[59]	validation_0-auc:0.97931	validation_1-auc:0.97319                                                                 
[60]	validation_0-auc:0.97931	validation

[41]	validation_0-auc:0.97682	validation_1-auc:0.97327                                                                 
[42]	validation_0-auc:0.97708	validation_1-auc:0.97342                                                                 
[43]	validation_0-auc:0.97731	validation_1-auc:0.97380                                                                 
[44]	validation_0-auc:0.97735	validation_1-auc:0.97374                                                                 
[45]	validation_0-auc:0.97747	validation_1-auc:0.97361                                                                 
[46]	validation_0-auc:0.97754	validation_1-auc:0.97393                                                                 
[47]	validation_0-auc:0.97770	validation_1-auc:0.97408                                                                 
[48]	validation_0-auc:0.97770	validation_1-auc:0.97408                                                                 
[49]	validation_0-auc:0.97775	validation

[3]	validation_0-auc:0.97921	validation_1-auc:0.97229                                                                  
[4]	validation_0-auc:0.98453	validation_1-auc:0.97205                                                                  
[5]	validation_0-auc:0.98646	validation_1-auc:0.97187                                                                  
[6]	validation_0-auc:0.98894	validation_1-auc:0.97327                                                                  
[7]	validation_0-auc:0.99005	validation_1-auc:0.97294                                                                  
[8]	validation_0-auc:0.99069	validation_1-auc:0.97260                                                                  
[9]	validation_0-auc:0.99124	validation_1-auc:0.97261                                                                  
[10]	validation_0-auc:0.99161	validation_1-auc:0.97198                                                                 
[11]	validation_0-auc:0.99216	validation

[23]	validation_0-auc:0.99577	validation_1-auc:0.97501                                                                 
[24]	validation_0-auc:0.99584	validation_1-auc:0.97481                                                                 
[25]	validation_0-auc:0.99598	validation_1-auc:0.97521                                                                 
[26]	validation_0-auc:0.99619	validation_1-auc:0.97486                                                                 
[27]	validation_0-auc:0.99631	validation_1-auc:0.97442                                                                 
[28]	validation_0-auc:0.99635	validation_1-auc:0.97446                                                                 
[29]	validation_0-auc:0.99649	validation_1-auc:0.97500                                                                 
[30]	validation_0-auc:0.99669	validation_1-auc:0.97484                                                                 
[31]	validation_0-auc:0.99681	validation

[47]	validation_0-auc:0.99811	validation_1-auc:0.96854                                                                 
[48]	validation_0-auc:0.99813	validation_1-auc:0.96861                                                                 
[49]	validation_0-auc:0.99819	validation_1-auc:0.96870                                                                 
[50]	validation_0-auc:0.99824	validation_1-auc:0.96871                                                                 
[51]	validation_0-auc:0.99825	validation_1-auc:0.96865                                                                 
[52]	validation_0-auc:0.99832	validation_1-auc:0.96840                                                                 
[53]	validation_0-auc:0.99837	validation_1-auc:0.96845                                                                 
[54]	validation_0-auc:0.99843	validation_1-auc:0.96861                                                                 
[55]	validation_0-auc:0.99853	validation

[10]	validation_0-auc:0.97585	validation_1-auc:0.97409                                                                 
[11]	validation_0-auc:0.97623	validation_1-auc:0.97551                                                                 
[12]	validation_0-auc:0.97618	validation_1-auc:0.97543                                                                 
[13]	validation_0-auc:0.97627	validation_1-auc:0.97512                                                                 
[14]	validation_0-auc:0.97627	validation_1-auc:0.97512                                                                 
[15]	validation_0-auc:0.97640	validation_1-auc:0.97511                                                                 
[16]	validation_0-auc:0.97689	validation_1-auc:0.97539                                                                 
[17]	validation_0-auc:0.97695	validation_1-auc:0.97517                                                                 
[18]	validation_0-auc:0.97715	validation

[2]	validation_0-auc:0.96711	validation_1-auc:0.96575                                                                  
[3]	validation_0-auc:0.97016	validation_1-auc:0.96727                                                                  
[4]	validation_0-auc:0.96951	validation_1-auc:0.96760                                                                  
[5]	validation_0-auc:0.97070	validation_1-auc:0.96600                                                                  
[6]	validation_0-auc:0.97017	validation_1-auc:0.96463                                                                  
[7]	validation_0-auc:0.96931	validation_1-auc:0.96324                                                                  
[8]	validation_0-auc:0.96983	validation_1-auc:0.96207                                                                  
[9]	validation_0-auc:0.96998	validation_1-auc:0.96127                                                                  
[10]	validation_0-auc:0.96917	validation

[23]	validation_0-auc:0.97833	validation_1-auc:0.97536                                                                 
[24]	validation_0-auc:0.97830	validation_1-auc:0.97533                                                                 
[25]	validation_0-auc:0.97831	validation_1-auc:0.97551                                                                 
[26]	validation_0-auc:0.97838	validation_1-auc:0.97545                                                                 
[27]	validation_0-auc:0.97860	validation_1-auc:0.97551                                                                 
[28]	validation_0-auc:0.97857	validation_1-auc:0.97558                                                                 
[29]	validation_0-auc:0.97860	validation_1-auc:0.97545                                                                 
[30]	validation_0-auc:0.97870	validation_1-auc:0.97526                                                                 
[31]	validation_0-auc:0.97895	validation

[32]	validation_0-auc:0.98099	validation_1-auc:0.96717                                                                 
[33]	validation_0-auc:0.98097	validation_1-auc:0.96715                                                                 
[34]	validation_0-auc:0.98096	validation_1-auc:0.96697                                                                 
[35]	validation_0-auc:0.98107	validation_1-auc:0.96691                                                                 
                                                                                                                       

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



[5]	validation_0-auc:0.97863	validation_1-auc:0.97542
[6]	validation_0-auc:0.98011	validation_1-auc:0.97413                                                                  
[7]	validation_0-auc:0.98049	validation_1-auc:0.97476                                                                  
[8]	validation_0-auc:0.98079	validation_1-auc:0.97453                                                                  
[9]	validation_0-auc:0.98158	validation_1-auc:0.97426                                                                  
[10]	validation_0-auc:0.98205	validation_1-auc:0.97459                                                                 
[11]	validation_0-auc:0.98285	validation_1-auc:0.97451                                                                 
[12]	validation_0-auc:0.98285	validation_1-auc:0.97417                                                                 
[13]	validation_0-auc:0.98341	validation_1-auc:0.97449                                                    

[2]	validation_0-auc:0.97101	validation_1-auc:0.96860                                                                  
[3]	validation_0-auc:0.97268	validation_1-auc:0.97222                                                                  
[4]	validation_0-auc:0.97705	validation_1-auc:0.97225                                                                  
[5]	validation_0-auc:0.97684	validation_1-auc:0.97263                                                                  
[6]	validation_0-auc:0.97701	validation_1-auc:0.97382                                                                  
[7]	validation_0-auc:0.97834	validation_1-auc:0.97212                                                                  
[8]	validation_0-auc:0.97887	validation_1-auc:0.97179                                                                  
[9]	validation_0-auc:0.97926	validation_1-auc:0.97129                                                                  
[10]	validation_0-auc:0.97937	validation

[34]	validation_0-auc:0.98360	validation_1-auc:0.97454                                                                 
[35]	validation_0-auc:0.98404	validation_1-auc:0.97463                                                                 
[0]	validation_0-auc:0.94817	validation_1-auc:0.93770                                                                  
[1]	validation_0-auc:0.95330	validation_1-auc:0.94530                                                                  
[2]	validation_0-auc:0.97458	validation_1-auc:0.96484                                                                  
[3]	validation_0-auc:0.97683	validation_1-auc:0.96707                                                                  
[4]	validation_0-auc:0.97709	validation_1-auc:0.96796                                                                  
[5]	validation_0-auc:0.97878	validation_1-auc:0.96925                                                                  
[6]	validation_0-auc:0.97922	validation_

[31]	validation_0-auc:0.98769	validation_1-auc:0.97293                                                                 
[32]	validation_0-auc:0.98798	validation_1-auc:0.97285                                                                 
[33]	validation_0-auc:0.98799	validation_1-auc:0.97272                                                                 
[34]	validation_0-auc:0.98799	validation_1-auc:0.97272                                                                 
[35]	validation_0-auc:0.98817	validation_1-auc:0.97245                                                                 
[36]	validation_0-auc:0.98839	validation_1-auc:0.97271                                                                 
[37]	validation_0-auc:0.98839	validation_1-auc:0.97271                                                                 
[38]	validation_0-auc:0.98847	validation_1-auc:0.97264                                                                 
[39]	validation_0-auc:0.98847	validation

[53]	validation_0-auc:0.98848	validation_1-auc:0.97487                                                                 
[54]	validation_0-auc:0.98851	validation_1-auc:0.97484                                                                 
[55]	validation_0-auc:0.98857	validation_1-auc:0.97474                                                                 
[56]	validation_0-auc:0.98866	validation_1-auc:0.97486                                                                 
[57]	validation_0-auc:0.98866	validation_1-auc:0.97486                                                                 
[58]	validation_0-auc:0.98875	validation_1-auc:0.97482                                                                 
[59]	validation_0-auc:0.98894	validation_1-auc:0.97439                                                                 
[60]	validation_0-auc:0.98907	validation_1-auc:0.97434                                                                 
[61]	validation_0-auc:0.98907	validation

[40]	validation_0-auc:0.98930	validation_1-auc:0.96772                                                                 
[41]	validation_0-auc:0.98949	validation_1-auc:0.96778                                                                 
[42]	validation_0-auc:0.98959	validation_1-auc:0.96750                                                                 
[43]	validation_0-auc:0.98975	validation_1-auc:0.96801                                                                 
[44]	validation_0-auc:0.98970	validation_1-auc:0.96789                                                                 
[45]	validation_0-auc:0.98970	validation_1-auc:0.96789                                                                 
[46]	validation_0-auc:0.98970	validation_1-auc:0.96789                                                                 
[47]	validation_0-auc:0.98984	validation_1-auc:0.96797                                                                 
[48]	validation_0-auc:0.99011	validation

[8]	validation_0-auc:0.98223	validation_1-auc:0.97399                                                                  
[9]	validation_0-auc:0.98218	validation_1-auc:0.97346                                                                  
[10]	validation_0-auc:0.98260	validation_1-auc:0.97359                                                                 
[11]	validation_0-auc:0.98490	validation_1-auc:0.97285                                                                 
[12]	validation_0-auc:0.98518	validation_1-auc:0.97323                                                                 
[13]	validation_0-auc:0.98498	validation_1-auc:0.97328                                                                 
[14]	validation_0-auc:0.98493	validation_1-auc:0.97346                                                                 
[15]	validation_0-auc:0.98457	validation_1-auc:0.97320                                                                 
[16]	validation_0-auc:0.98464	validation

[6]	validation_0-auc:0.97849	validation_1-auc:0.96709                                                                  
[7]	validation_0-auc:0.97988	validation_1-auc:0.96748                                                                  
[8]	validation_0-auc:0.98187	validation_1-auc:0.96674                                                                  
[9]	validation_0-auc:0.98189	validation_1-auc:0.96732                                                                  
[10]	validation_0-auc:0.98280	validation_1-auc:0.96657                                                                 
[11]	validation_0-auc:0.98527	validation_1-auc:0.96646                                                                 
[12]	validation_0-auc:0.98596	validation_1-auc:0.96709                                                                 
[13]	validation_0-auc:0.98656	validation_1-auc:0.96684                                                                 
[14]	validation_0-auc:0.98671	validation

[1]	validation_0-auc:0.97272	validation_1-auc:0.97420                                                                  
[2]	validation_0-auc:0.97333	validation_1-auc:0.97478                                                                  
[3]	validation_0-auc:0.97626	validation_1-auc:0.97507                                                                  
[4]	validation_0-auc:0.97671	validation_1-auc:0.97541                                                                  
[5]	validation_0-auc:0.97899	validation_1-auc:0.97535                                                                  
[6]	validation_0-auc:0.97947	validation_1-auc:0.97473                                                                  
[7]	validation_0-auc:0.97991	validation_1-auc:0.97357                                                                  
[8]	validation_0-auc:0.98017	validation_1-auc:0.97443                                                                  
[9]	validation_0-auc:0.98083	validation_

[1]	validation_0-auc:0.95495	validation_1-auc:0.94450                                                                  
[2]	validation_0-auc:0.97307	validation_1-auc:0.97125                                                                  
[3]	validation_0-auc:0.97614	validation_1-auc:0.97442                                                                  
[4]	validation_0-auc:0.97934	validation_1-auc:0.97328                                                                  
[5]	validation_0-auc:0.97889	validation_1-auc:0.97275                                                                  
[6]	validation_0-auc:0.97974	validation_1-auc:0.97319                                                                  
[7]	validation_0-auc:0.98024	validation_1-auc:0.97330                                                                  
[8]	validation_0-auc:0.98127	validation_1-auc:0.97215                                                                  
[9]	validation_0-auc:0.98182	validation_

[36]	validation_0-auc:0.98750	validation_1-auc:0.97266                                                                 
[37]	validation_0-auc:0.98782	validation_1-auc:0.97292                                                                 
[0]	validation_0-auc:0.94771	validation_1-auc:0.94083                                                                  
[1]	validation_0-auc:0.95285	validation_1-auc:0.94137                                                                  
[2]	validation_0-auc:0.97581	validation_1-auc:0.96415                                                                  
[3]	validation_0-auc:0.97823	validation_1-auc:0.96674                                                                  
[4]	validation_0-auc:0.97996	validation_1-auc:0.96764                                                                  
[5]	validation_0-auc:0.98075	validation_1-auc:0.97009                                                                  
[6]	validation_0-auc:0.98184	validation_

[30]	validation_0-auc:0.97993	validation_1-auc:0.97378                                                                 
[31]	validation_0-auc:0.97998	validation_1-auc:0.97360                                                                 
[32]	validation_0-auc:0.97999	validation_1-auc:0.97356                                                                 
[33]	validation_0-auc:0.98038	validation_1-auc:0.97330                                                                 
[34]	validation_0-auc:0.98061	validation_1-auc:0.97310                                                                 
[35]	validation_0-auc:0.98079	validation_1-auc:0.97298                                                                 
[36]	validation_0-auc:0.98083	validation_1-auc:0.97314                                                                 
[0]	validation_0-auc:0.94489	validation_1-auc:0.94808                                                                  
[1]	validation_0-auc:0.94993	validation_

[61]	validation_0-auc:0.98066	validation_1-auc:0.97525                                                                 
[62]	validation_0-auc:0.98066	validation_1-auc:0.97525                                                                 
[63]	validation_0-auc:0.98066	validation_1-auc:0.97525                                                                 
[64]	validation_0-auc:0.98066	validation_1-auc:0.97525                                                                 
[65]	validation_0-auc:0.98066	validation_1-auc:0.97525                                                                 
[66]	validation_0-auc:0.98066	validation_1-auc:0.97525                                                                 
[0]	validation_0-auc:0.94609	validation_1-auc:0.94058                                                                  
[1]	validation_0-auc:0.95353	validation_1-auc:0.94656                                                                  
[2]	validation_0-auc:0.97344	validation_

[26]	validation_0-auc:0.99239	validation_1-auc:0.97194                                                                 
[27]	validation_0-auc:0.99247	validation_1-auc:0.97216                                                                 
[28]	validation_0-auc:0.99261	validation_1-auc:0.97185                                                                 
[29]	validation_0-auc:0.99271	validation_1-auc:0.97194                                                                 
[30]	validation_0-auc:0.99271	validation_1-auc:0.97194                                                                 
[31]	validation_0-auc:0.99297	validation_1-auc:0.97142                                                                 
[32]	validation_0-auc:0.99314	validation_1-auc:0.97153                                                                 
[33]	validation_0-auc:0.99338	validation_1-auc:0.97209                                                                 
[0]	validation_0-auc:0.95238	validation_

[25]	validation_0-auc:0.99178	validation_1-auc:0.96666                                                                 
[26]	validation_0-auc:0.99187	validation_1-auc:0.96682                                                                 
[27]	validation_0-auc:0.99201	validation_1-auc:0.96724                                                                 
[28]	validation_0-auc:0.99201	validation_1-auc:0.96724                                                                 
[29]	validation_0-auc:0.99213	validation_1-auc:0.96718                                                                 
[30]	validation_0-auc:0.99213	validation_1-auc:0.96718                                                                 
[31]	validation_0-auc:0.99238	validation_1-auc:0.96670                                                                 
[32]	validation_0-auc:0.99238	validation_1-auc:0.96670                                                                 
[33]	validation_0-auc:0.99250	validation

[22]	validation_0-auc:0.99132	validation_1-auc:0.97384                                                                 
[23]	validation_0-auc:0.99132	validation_1-auc:0.97384                                                                 
[24]	validation_0-auc:0.99132	validation_1-auc:0.97384                                                                 
[25]	validation_0-auc:0.99147	validation_1-auc:0.97363                                                                 
[26]	validation_0-auc:0.99147	validation_1-auc:0.97363                                                                 
[27]	validation_0-auc:0.99173	validation_1-auc:0.97396                                                                 
[28]	validation_0-auc:0.99182	validation_1-auc:0.97414                                                                 
[29]	validation_0-auc:0.99182	validation_1-auc:0.97414                                                                 
[30]	validation_0-auc:0.99191	validation

[13]	validation_0-auc:0.98000	validation_1-auc:0.97193                                                                 
[14]	validation_0-auc:0.98009	validation_1-auc:0.97196                                                                 
[15]	validation_0-auc:0.98048	validation_1-auc:0.97283                                                                 
[16]	validation_0-auc:0.98133	validation_1-auc:0.97253                                                                 
[17]	validation_0-auc:0.98156	validation_1-auc:0.97270                                                                 
[18]	validation_0-auc:0.98155	validation_1-auc:0.97243                                                                 
[19]	validation_0-auc:0.98160	validation_1-auc:0.97239                                                                 
[20]	validation_0-auc:0.98212	validation_1-auc:0.97241                                                                 
[21]	validation_0-auc:0.98203	validation

[35]	validation_0-auc:0.98242	validation_1-auc:0.97376                                                                 
[36]	validation_0-auc:0.98276	validation_1-auc:0.97405                                                                 
[37]	validation_0-auc:0.98309	validation_1-auc:0.97389                                                                 
[38]	validation_0-auc:0.98318	validation_1-auc:0.97374                                                                 
[39]	validation_0-auc:0.98370	validation_1-auc:0.97378                                                                 
[40]	validation_0-auc:0.98378	validation_1-auc:0.97379                                                                 
[41]	validation_0-auc:0.98386	validation_1-auc:0.97405                                                                 
[42]	validation_0-auc:0.98408	validation_1-auc:0.97408                                                                 
[43]	validation_0-auc:0.98403	validation

[59]	validation_0-auc:0.98724	validation_1-auc:0.96941                                                                 
[60]	validation_0-auc:0.98724	validation_1-auc:0.96941                                                                 
[61]	validation_0-auc:0.98724	validation_1-auc:0.96941                                                                 
[62]	validation_0-auc:0.98727	validation_1-auc:0.96949                                                                 
[63]	validation_0-auc:0.98735	validation_1-auc:0.96940                                                                 
[64]	validation_0-auc:0.98738	validation_1-auc:0.96955                                                                 
[65]	validation_0-auc:0.98748	validation_1-auc:0.96962                                                                 
[66]	validation_0-auc:0.98748	validation_1-auc:0.96962                                                                 
[67]	validation_0-auc:0.98757	validation

[31]	validation_0-auc:0.97941	validation_1-auc:0.97151                                                                 
[32]	validation_0-auc:0.97944	validation_1-auc:0.97141                                                                 
[33]	validation_0-auc:0.97969	validation_1-auc:0.97126                                                                 
[34]	validation_0-auc:0.97994	validation_1-auc:0.97136                                                                 
[35]	validation_0-auc:0.97995	validation_1-auc:0.97120                                                                 
[36]	validation_0-auc:0.98017	validation_1-auc:0.97128                                                                 
[37]	validation_0-auc:0.98025	validation_1-auc:0.97132                                                                 
[38]	validation_0-auc:0.98039	validation_1-auc:0.97129                                                                 
[39]	validation_0-auc:0.98052	validation

[6]	validation_0-auc:0.97722	validation_1-auc:0.97020                                                                  
[7]	validation_0-auc:0.97723	validation_1-auc:0.96816                                                                  
[8]	validation_0-auc:0.97686	validation_1-auc:0.96628                                                                  
[9]	validation_0-auc:0.97689	validation_1-auc:0.96651                                                                  
[10]	validation_0-auc:0.97683	validation_1-auc:0.96573                                                                 
[11]	validation_0-auc:0.97775	validation_1-auc:0.96584                                                                 
[12]	validation_0-auc:0.97840	validation_1-auc:0.96606                                                                 
[13]	validation_0-auc:0.97858	validation_1-auc:0.96642                                                                 
[14]	validation_0-auc:0.97849	validation

[2]	validation_0-auc:0.97076	validation_1-auc:0.97225                                                                  
[3]	validation_0-auc:0.97190	validation_1-auc:0.97332                                                                  
[4]	validation_0-auc:0.97477	validation_1-auc:0.97392                                                                  
[5]	validation_0-auc:0.97694	validation_1-auc:0.97433                                                                  
[6]	validation_0-auc:0.97697	validation_1-auc:0.97374                                                                  
[7]	validation_0-auc:0.97820	validation_1-auc:0.97388                                                                  
[8]	validation_0-auc:0.97791	validation_1-auc:0.97422                                                                  
[9]	validation_0-auc:0.97841	validation_1-auc:0.97329                                                                  
[10]	validation_0-auc:0.97841	validation

[24]	validation_0-auc:0.98560	validation_1-auc:0.96832                                                                 
[25]	validation_0-auc:0.98566	validation_1-auc:0.96841                                                                 
[26]	validation_0-auc:0.98582	validation_1-auc:0.96849                                                                 
[27]	validation_0-auc:0.98584	validation_1-auc:0.96825                                                                 
[28]	validation_0-auc:0.98577	validation_1-auc:0.96855                                                                 
[29]	validation_0-auc:0.98597	validation_1-auc:0.96855                                                                 
[30]	validation_0-auc:0.98616	validation_1-auc:0.96862                                                                 
[31]	validation_0-auc:0.98637	validation_1-auc:0.96859                                                                 
[32]	validation_0-auc:0.98636	validation

[52]	validation_0-auc:0.98938	validation_1-auc:0.97390                                                                 
[53]	validation_0-auc:0.98938	validation_1-auc:0.97390                                                                 
[54]	validation_0-auc:0.98938	validation_1-auc:0.97390                                                                 
[55]	validation_0-auc:0.98938	validation_1-auc:0.97390                                                                 
[56]	validation_0-auc:0.98948	validation_1-auc:0.97367                                                                 
[57]	validation_0-auc:0.98965	validation_1-auc:0.97384                                                                 
[58]	validation_0-auc:0.98965	validation_1-auc:0.97384                                                                 
[59]	validation_0-auc:0.98965	validation_1-auc:0.97384                                                                 
[60]	validation_0-auc:0.98965	validation

[21]	validation_0-auc:0.98623	validation_1-auc:0.96562                                                                 
[22]	validation_0-auc:0.98641	validation_1-auc:0.96605                                                                 
[23]	validation_0-auc:0.98641	validation_1-auc:0.96605                                                                 
[24]	validation_0-auc:0.98661	validation_1-auc:0.96591                                                                 
[25]	validation_0-auc:0.98661	validation_1-auc:0.96591                                                                 
[26]	validation_0-auc:0.98661	validation_1-auc:0.96591                                                                 
[27]	validation_0-auc:0.98678	validation_1-auc:0.96563                                                                 
[28]	validation_0-auc:0.98730	validation_1-auc:0.96577                                                                 
[29]	validation_0-auc:0.98776	validation

[14]	validation_0-auc:0.98999	validation_1-auc:0.97340                                                                 
[15]	validation_0-auc:0.99042	validation_1-auc:0.97459                                                                 
[16]	validation_0-auc:0.99107	validation_1-auc:0.97425                                                                 
[17]	validation_0-auc:0.99129	validation_1-auc:0.97392                                                                 
[18]	validation_0-auc:0.99152	validation_1-auc:0.97418                                                                 
[19]	validation_0-auc:0.99152	validation_1-auc:0.97418                                                                 
[20]	validation_0-auc:0.99181	validation_1-auc:0.97415                                                                 
[21]	validation_0-auc:0.99191	validation_1-auc:0.97433                                                                 
[22]	validation_0-auc:0.99193	validation

[45]	validation_0-auc:0.99340	validation_1-auc:0.96879                                                                 
[46]	validation_0-auc:0.99340	validation_1-auc:0.96879                                                                 
[47]	validation_0-auc:0.99340	validation_1-auc:0.96879                                                                 
[48]	validation_0-auc:0.99340	validation_1-auc:0.96879                                                                 
[49]	validation_0-auc:0.99340	validation_1-auc:0.96879                                                                 
[50]	validation_0-auc:0.99340	validation_1-auc:0.96879                                                                 
[51]	validation_0-auc:0.99340	validation_1-auc:0.96879                                                                 
[52]	validation_0-auc:0.99340	validation_1-auc:0.96879                                                                 
[53]	validation_0-auc:0.99340	validation

[26]	validation_0-auc:0.98181	validation_1-auc:0.97175                                                                 
[27]	validation_0-auc:0.98181	validation_1-auc:0.97175                                                                 
[28]	validation_0-auc:0.98181	validation_1-auc:0.97175                                                                 
[29]	validation_0-auc:0.98181	validation_1-auc:0.97175                                                                 
[30]	validation_0-auc:0.98184	validation_1-auc:0.97170                                                                 
[31]	validation_0-auc:0.98200	validation_1-auc:0.97200                                                                 
[32]	validation_0-auc:0.98208	validation_1-auc:0.97215                                                                 
[33]	validation_0-auc:0.98208	validation_1-auc:0.97215                                                                 
[34]	validation_0-auc:0.98208	validation

[23]	validation_0-auc:0.98259	validation_1-auc:0.96693                                                                 
[24]	validation_0-auc:0.98264	validation_1-auc:0.96694                                                                 
[25]	validation_0-auc:0.98267	validation_1-auc:0.96671                                                                 
[26]	validation_0-auc:0.98267	validation_1-auc:0.96671                                                                 
[27]	validation_0-auc:0.98267	validation_1-auc:0.96671                                                                 
[28]	validation_0-auc:0.98267	validation_1-auc:0.96671                                                                 
[29]	validation_0-auc:0.98333	validation_1-auc:0.96686                                                                 
[30]	validation_0-auc:0.98333	validation_1-auc:0.96686                                                                 
[31]	validation_0-auc:0.98335	validation

[14]	validation_0-auc:0.97982	validation_1-auc:0.97332                                                                 
[15]	validation_0-auc:0.97973	validation_1-auc:0.97318                                                                 
[16]	validation_0-auc:0.97960	validation_1-auc:0.97266                                                                 
[17]	validation_0-auc:0.97988	validation_1-auc:0.97278                                                                 
[18]	validation_0-auc:0.98011	validation_1-auc:0.97297                                                                 
[19]	validation_0-auc:0.98031	validation_1-auc:0.97314                                                                 
[20]	validation_0-auc:0.98075	validation_1-auc:0.97260                                                                 
[21]	validation_0-auc:0.98107	validation_1-auc:0.97294                                                                 
[22]	validation_0-auc:0.98133	validation

[14]	validation_0-auc:0.97659	validation_1-auc:0.97271                                                                 
[15]	validation_0-auc:0.97684	validation_1-auc:0.97233                                                                 
[16]	validation_0-auc:0.97768	validation_1-auc:0.97304                                                                 
[17]	validation_0-auc:0.97765	validation_1-auc:0.97243                                                                 
[18]	validation_0-auc:0.97834	validation_1-auc:0.97217                                                                 
[19]	validation_0-auc:0.97857	validation_1-auc:0.97233                                                                 
[20]	validation_0-auc:0.97930	validation_1-auc:0.97244                                                                 
[21]	validation_0-auc:0.97930	validation_1-auc:0.97244                                                                 
[22]	validation_0-auc:0.97930	validation

[2]	validation_0-auc:0.97466	validation_1-auc:0.96693                                                                  
[3]	validation_0-auc:0.97403	validation_1-auc:0.96749                                                                  
[4]	validation_0-auc:0.97534	validation_1-auc:0.96762                                                                  
[5]	validation_0-auc:0.97582	validation_1-auc:0.96749                                                                  
[6]	validation_0-auc:0.97579	validation_1-auc:0.96704                                                                  
[7]	validation_0-auc:0.97741	validation_1-auc:0.96813                                                                  
[8]	validation_0-auc:0.97782	validation_1-auc:0.96821                                                                  
[9]	validation_0-auc:0.97782	validation_1-auc:0.96812                                                                  
[10]	validation_0-auc:0.97766	validation

[31]	validation_0-auc:0.99566	validation_1-auc:0.97199                                                                 
[32]	validation_0-auc:0.99572	validation_1-auc:0.97221                                                                 
[33]	validation_0-auc:0.99582	validation_1-auc:0.97215                                                                 
[34]	validation_0-auc:0.99587	validation_1-auc:0.97215                                                                 
[35]	validation_0-auc:0.99596	validation_1-auc:0.97231                                                                 
[36]	validation_0-auc:0.99605	validation_1-auc:0.97253                                                                 
[37]	validation_0-auc:0.99625	validation_1-auc:0.97221                                                                 
[38]	validation_0-auc:0.99633	validation_1-auc:0.97207                                                                 
[39]	validation_0-auc:0.99646	validation

[6]	validation_0-auc:0.98698	validation_1-auc:0.96744                                                                  
[7]	validation_0-auc:0.98776	validation_1-auc:0.96677                                                                  
[8]	validation_0-auc:0.98852	validation_1-auc:0.96617                                                                  
[9]	validation_0-auc:0.99014	validation_1-auc:0.96655                                                                  
[10]	validation_0-auc:0.99051	validation_1-auc:0.96502                                                                 
[11]	validation_0-auc:0.99151	validation_1-auc:0.96695                                                                 
[12]	validation_0-auc:0.99166	validation_1-auc:0.96664                                                                 
[13]	validation_0-auc:0.99214	validation_1-auc:0.96630                                                                 
[14]	validation_0-auc:0.99243	validation

[74]	validation_0-auc:0.99821	validation_1-auc:0.96810                                                                 
[75]	validation_0-auc:0.99824	validation_1-auc:0.96819                                                                 
[76]	validation_0-auc:0.99823	validation_1-auc:0.96816                                                                 
[77]	validation_0-auc:0.99828	validation_1-auc:0.96807                                                                 
[78]	validation_0-auc:0.99830	validation_1-auc:0.96799                                                                 
[0]	validation_0-auc:0.94839	validation_1-auc:0.94237                                                                  
[1]	validation_0-auc:0.95155	validation_1-auc:0.94616                                                                  
[2]	validation_0-auc:0.96863	validation_1-auc:0.96457                                                                  
[3]	validation_0-auc:0.97144	validation_

[16]	validation_0-auc:0.98138	validation_1-auc:0.97530                                                                 
[17]	validation_0-auc:0.98176	validation_1-auc:0.97515                                                                 
[18]	validation_0-auc:0.98224	validation_1-auc:0.97495                                                                 
[19]	validation_0-auc:0.98267	validation_1-auc:0.97534                                                                 
[20]	validation_0-auc:0.98331	validation_1-auc:0.97520                                                                 
[21]	validation_0-auc:0.98381	validation_1-auc:0.97501                                                                 
[22]	validation_0-auc:0.98390	validation_1-auc:0.97501                                                                 
[23]	validation_0-auc:0.98439	validation_1-auc:0.97491                                                                 
[24]	validation_0-auc:0.98458	validation

[34]	validation_0-auc:0.98752	validation_1-auc:0.96721                                                                 
[35]	validation_0-auc:0.98778	validation_1-auc:0.96752                                                                 
[36]	validation_0-auc:0.98806	validation_1-auc:0.96740                                                                 
[37]	validation_0-auc:0.98821	validation_1-auc:0.96763                                                                 
[38]	validation_0-auc:0.98840	validation_1-auc:0.96759                                                                 
[39]	validation_0-auc:0.98843	validation_1-auc:0.96739                                                                 
[40]	validation_0-auc:0.98858	validation_1-auc:0.96744                                                                 
[41]	validation_0-auc:0.98858	validation_1-auc:0.96744                                                                 
[42]	validation_0-auc:0.98870	validation

[2]	validation_0-auc:0.97079	validation_1-auc:0.96833                                                                  
[3]	validation_0-auc:0.97265	validation_1-auc:0.97189                                                                  
[4]	validation_0-auc:0.97663	validation_1-auc:0.97138                                                                  
[5]	validation_0-auc:0.97676	validation_1-auc:0.97176                                                                  
[6]	validation_0-auc:0.97767	validation_1-auc:0.97176                                                                  
[7]	validation_0-auc:0.97867	validation_1-auc:0.97026                                                                  
[8]	validation_0-auc:0.97860	validation_1-auc:0.97028                                                                  
[9]	validation_0-auc:0.97919	validation_1-auc:0.97073                                                                  
[10]	validation_0-auc:0.97969	validation

[26]	validation_0-auc:0.98498	validation_1-auc:0.97470                                                                 
[27]	validation_0-auc:0.98544	validation_1-auc:0.97483                                                                 
[28]	validation_0-auc:0.98556	validation_1-auc:0.97490                                                                 
[29]	validation_0-auc:0.98596	validation_1-auc:0.97500                                                                 
[30]	validation_0-auc:0.98598	validation_1-auc:0.97468                                                                 
[31]	validation_0-auc:0.98598	validation_1-auc:0.97468                                                                 
[32]	validation_0-auc:0.98598	validation_1-auc:0.97468                                                                 
[33]	validation_0-auc:0.98611	validation_1-auc:0.97443                                                                 
[34]	validation_0-auc:0.98611	validation

[41]	validation_0-auc:0.98903	validation_1-auc:0.96824                                                                 
[42]	validation_0-auc:0.98903	validation_1-auc:0.96824                                                                 
[43]	validation_0-auc:0.98909	validation_1-auc:0.96845                                                                 
[44]	validation_0-auc:0.98909	validation_1-auc:0.96845                                                                 
[45]	validation_0-auc:0.98909	validation_1-auc:0.96845                                                                 
[46]	validation_0-auc:0.98918	validation_1-auc:0.96807                                                                 
[47]	validation_0-auc:0.98918	validation_1-auc:0.96807                                                                 
[48]	validation_0-auc:0.98926	validation_1-auc:0.96800                                                                 
[49]	validation_0-auc:0.98926	validation

[20]	validation_0-auc:0.98013	validation_1-auc:0.97360                                                                 
[21]	validation_0-auc:0.98013	validation_1-auc:0.97360                                                                 
[22]	validation_0-auc:0.98013	validation_1-auc:0.97360                                                                 
[23]	validation_0-auc:0.98013	validation_1-auc:0.97360                                                                 
[24]	validation_0-auc:0.98013	validation_1-auc:0.97360                                                                 
[25]	validation_0-auc:0.98013	validation_1-auc:0.97360                                                                 
[26]	validation_0-auc:0.98013	validation_1-auc:0.97360                                                                 
[27]	validation_0-auc:0.98013	validation_1-auc:0.97360                                                                 
[28]	validation_0-auc:0.98013	validation

[18]	validation_0-auc:0.97248	validation_1-auc:0.97147                                                                 
[19]	validation_0-auc:0.97280	validation_1-auc:0.97218                                                                 
[20]	validation_0-auc:0.97314	validation_1-auc:0.97223                                                                 
[21]	validation_0-auc:0.97315	validation_1-auc:0.97222                                                                 
[22]	validation_0-auc:0.97387	validation_1-auc:0.97272                                                                 
[23]	validation_0-auc:0.97366	validation_1-auc:0.97329                                                                 
[24]	validation_0-auc:0.97360	validation_1-auc:0.97330                                                                 
[25]	validation_0-auc:0.97369	validation_1-auc:0.97303                                                                 
[26]	validation_0-auc:0.97356	validation

[86]	validation_0-auc:0.97828	validation_1-auc:0.97275                                                                 
[0]	validation_0-auc:0.94546	validation_1-auc:0.94936                                                                  
[1]	validation_0-auc:0.94724	validation_1-auc:0.95160                                                                  
[2]	validation_0-auc:0.96743	validation_1-auc:0.97106                                                                  
[3]	validation_0-auc:0.96959	validation_1-auc:0.97302                                                                  
[4]	validation_0-auc:0.97140	validation_1-auc:0.97249                                                                  
[5]	validation_0-auc:0.97167	validation_1-auc:0.97290                                                                  
[6]	validation_0-auc:0.97198	validation_1-auc:0.97339                                                                  
[7]	validation_0-auc:0.97132	validation_

[31]	validation_0-auc:0.97643	validation_1-auc:0.96954                                                                 
[32]	validation_0-auc:0.97639	validation_1-auc:0.96949                                                                 
[33]	validation_0-auc:0.97637	validation_1-auc:0.96945                                                                 
[34]	validation_0-auc:0.97636	validation_1-auc:0.96952                                                                 
[35]	validation_0-auc:0.97644	validation_1-auc:0.96957                                                                 
[36]	validation_0-auc:0.97643	validation_1-auc:0.96957                                                                 
[37]	validation_0-auc:0.97687	validation_1-auc:0.96887                                                                 
[38]	validation_0-auc:0.97685	validation_1-auc:0.96891                                                                 
[39]	validation_0-auc:0.97682	validation

[33]	validation_0-auc:0.98301	validation_1-auc:0.97433                                                                 
[34]	validation_0-auc:0.98304	validation_1-auc:0.97414                                                                 
[35]	validation_0-auc:0.98331	validation_1-auc:0.97407                                                                 
[36]	validation_0-auc:0.98327	validation_1-auc:0.97386                                                                 
[37]	validation_0-auc:0.98340	validation_1-auc:0.97391                                                                 
[38]	validation_0-auc:0.98360	validation_1-auc:0.97400                                                                 
[39]	validation_0-auc:0.98387	validation_1-auc:0.97367                                                                 
[40]	validation_0-auc:0.98391	validation_1-auc:0.97389                                                                 
[41]	validation_0-auc:0.98418	validation

[46]	validation_0-auc:0.98371	validation_1-auc:0.97348                                                                 
[47]	validation_0-auc:0.98381	validation_1-auc:0.97346                                                                 
[48]	validation_0-auc:0.98384	validation_1-auc:0.97358                                                                 
[49]	validation_0-auc:0.98380	validation_1-auc:0.97336                                                                 
[0]	validation_0-auc:0.94963	validation_1-auc:0.93682                                                                  
[1]	validation_0-auc:0.95480	validation_1-auc:0.94368                                                                  
[2]	validation_0-auc:0.97456	validation_1-auc:0.96188                                                                  
[3]	validation_0-auc:0.97606	validation_1-auc:0.96537                                                                  
[4]	validation_0-auc:0.97637	validation_

[28]	validation_0-auc:0.97964	validation_1-auc:0.97317                                                                 
[29]	validation_0-auc:0.97965	validation_1-auc:0.97302                                                                 
[30]	validation_0-auc:0.97991	validation_1-auc:0.97284                                                                 
[31]	validation_0-auc:0.97991	validation_1-auc:0.97284                                                                 
[32]	validation_0-auc:0.97991	validation_1-auc:0.97284                                                                 
[33]	validation_0-auc:0.97991	validation_1-auc:0.97284                                                                 
[34]	validation_0-auc:0.98019	validation_1-auc:0.97283                                                                 
[35]	validation_0-auc:0.98019	validation_1-auc:0.97283                                                                 
[36]	validation_0-auc:0.98021	validation

[18]	validation_0-auc:0.97997	validation_1-auc:0.96624                                                                 
[19]	validation_0-auc:0.98014	validation_1-auc:0.96581                                                                 
[20]	validation_0-auc:0.98040	validation_1-auc:0.96621                                                                 
[21]	validation_0-auc:0.98051	validation_1-auc:0.96610                                                                 
[22]	validation_0-auc:0.98057	validation_1-auc:0.96637                                                                 
[23]	validation_0-auc:0.98062	validation_1-auc:0.96660                                                                 
[24]	validation_0-auc:0.98062	validation_1-auc:0.96660                                                                 
[25]	validation_0-auc:0.98068	validation_1-auc:0.96650                                                                 
[26]	validation_0-auc:0.98077	validation

[1]	validation_0-auc:0.95214	validation_1-auc:0.95578                                                                  
[2]	validation_0-auc:0.97121	validation_1-auc:0.97268                                                                  
[3]	validation_0-auc:0.97840	validation_1-auc:0.97628                                                                  
[4]	validation_0-auc:0.97913	validation_1-auc:0.97611                                                                  
[5]	validation_0-auc:0.97960	validation_1-auc:0.97663                                                                  
[6]	validation_0-auc:0.98388	validation_1-auc:0.97514                                                                  
[7]	validation_0-auc:0.98467	validation_1-auc:0.97387                                                                  
[8]	validation_0-auc:0.98533	validation_1-auc:0.97282                                                                  
[9]	validation_0-auc:0.98631	validation_

[33]	validation_0-auc:0.99229	validation_1-auc:0.96695                                                                 
[34]	validation_0-auc:0.99229	validation_1-auc:0.96695                                                                 
[35]	validation_0-auc:0.99229	validation_1-auc:0.96695                                                                 
[36]	validation_0-auc:0.99229	validation_1-auc:0.96695                                                                 
[37]	validation_0-auc:0.99235	validation_1-auc:0.96684                                                                 
[38]	validation_0-auc:0.99235	validation_1-auc:0.96684                                                                 
[39]	validation_0-auc:0.99235	validation_1-auc:0.96684                                                                 
[40]	validation_0-auc:0.99235	validation_1-auc:0.96684                                                                 
[41]	validation_0-auc:0.99235	validation

[22]	validation_0-auc:0.98469	validation_1-auc:0.97354                                                                 
[23]	validation_0-auc:0.98511	validation_1-auc:0.97350                                                                 
[24]	validation_0-auc:0.98522	validation_1-auc:0.97349                                                                 
[25]	validation_0-auc:0.98527	validation_1-auc:0.97337                                                                 
[26]	validation_0-auc:0.98558	validation_1-auc:0.97366                                                                 
[27]	validation_0-auc:0.98581	validation_1-auc:0.97377                                                                 
[28]	validation_0-auc:0.98600	validation_1-auc:0.97367                                                                 
[29]	validation_0-auc:0.98605	validation_1-auc:0.97360                                                                 
[30]	validation_0-auc:0.98616	validation

[21]	validation_0-auc:0.99316	validation_1-auc:0.97309                                                                 
[22]	validation_0-auc:0.99329	validation_1-auc:0.97325                                                                 
[23]	validation_0-auc:0.99359	validation_1-auc:0.97244                                                                 
[24]	validation_0-auc:0.99365	validation_1-auc:0.97217                                                                 
[25]	validation_0-auc:0.99370	validation_1-auc:0.97177                                                                 
[26]	validation_0-auc:0.99374	validation_1-auc:0.97090                                                                 
[27]	validation_0-auc:0.99396	validation_1-auc:0.97095                                                                 
[28]	validation_0-auc:0.99403	validation_1-auc:0.97094                                                                 
[29]	validation_0-auc:0.99404	validation

[11]	validation_0-auc:0.99019	validation_1-auc:0.96638                                                                 
[12]	validation_0-auc:0.99023	validation_1-auc:0.96700                                                                 
[13]	validation_0-auc:0.99078	validation_1-auc:0.96726                                                                 
[14]	validation_0-auc:0.99116	validation_1-auc:0.96751                                                                 
[15]	validation_0-auc:0.99180	validation_1-auc:0.96741                                                                 
[16]	validation_0-auc:0.99218	validation_1-auc:0.96775                                                                 
[17]	validation_0-auc:0.99240	validation_1-auc:0.96838                                                                 
[18]	validation_0-auc:0.99257	validation_1-auc:0.96833                                                                 
[19]	validation_0-auc:0.99266	validation

[22]	validation_0-auc:0.98334	validation_1-auc:0.97237                                                                 
[23]	validation_0-auc:0.98340	validation_1-auc:0.97260                                                                 
[24]	validation_0-auc:0.98336	validation_1-auc:0.97262                                                                 
[25]	validation_0-auc:0.98372	validation_1-auc:0.97210                                                                 
[26]	validation_0-auc:0.98385	validation_1-auc:0.97215                                                                 
[27]	validation_0-auc:0.98400	validation_1-auc:0.97224                                                                 
[28]	validation_0-auc:0.98405	validation_1-auc:0.97223                                                                 
[29]	validation_0-auc:0.98436	validation_1-auc:0.97214                                                                 
[30]	validation_0-auc:0.98461	validation

[53]	validation_0-auc:0.98582	validation_1-auc:0.97552                                                                 
[54]	validation_0-auc:0.98581	validation_1-auc:0.97557                                                                 
[55]	validation_0-auc:0.98592	validation_1-auc:0.97551                                                                 
[56]	validation_0-auc:0.98608	validation_1-auc:0.97554                                                                 
[57]	validation_0-auc:0.98632	validation_1-auc:0.97555                                                                 
[58]	validation_0-auc:0.98636	validation_1-auc:0.97552                                                                 
[59]	validation_0-auc:0.98656	validation_1-auc:0.97559                                                                 
[60]	validation_0-auc:0.98652	validation_1-auc:0.97553                                                                 
[61]	validation_0-auc:0.98661	validation

[25]	validation_0-auc:0.98390	validation_1-auc:0.96727                                                                 
[26]	validation_0-auc:0.98384	validation_1-auc:0.96719                                                                 
[27]	validation_0-auc:0.98421	validation_1-auc:0.96753                                                                 
[28]	validation_0-auc:0.98426	validation_1-auc:0.96753                                                                 
[29]	validation_0-auc:0.98450	validation_1-auc:0.96745                                                                 
[30]	validation_0-auc:0.98466	validation_1-auc:0.96725                                                                 
[31]	validation_0-auc:0.98473	validation_1-auc:0.96750                                                                 
[32]	validation_0-auc:0.98487	validation_1-auc:0.96742                                                                 
[33]	validation_0-auc:0.98491	validation

[21]	validation_0-auc:0.97924	validation_1-auc:0.97428                                                                 
[22]	validation_0-auc:0.97944	validation_1-auc:0.97429                                                                 
[23]	validation_0-auc:0.97965	validation_1-auc:0.97400                                                                 
[24]	validation_0-auc:0.97976	validation_1-auc:0.97411                                                                 
[25]	validation_0-auc:0.97976	validation_1-auc:0.97411                                                                 
[26]	validation_0-auc:0.97976	validation_1-auc:0.97411                                                                 
[27]	validation_0-auc:0.97976	validation_1-auc:0.97411                                                                 
[28]	validation_0-auc:0.98011	validation_1-auc:0.97423                                                                 
[29]	validation_0-auc:0.98008	validation

In [30]:
# HyperOPT를 통해 얻어진 파라미터를 적용시킨 XGBClassifier 모델로 학습
xgbo = XGBClassifier(n_estimators=100, learning_rate=round(best['learning_rate'], 5), gamma=round(best['gamma'], 5),
                            max_depth=int(best['max_depth']), min_child_weight=int(best['min_child_weight']),
                            colsample_bytree=round(best['colsample_bytree'], 5), random_state = 109
                           )
xgbo.fit(X_train, y_train)

In [31]:
# 앞서 만든 평가 함수(사용자 지정 함수)를 위해 필요한 변수들
train_pred = xgbo.predict(X_train)
train_proba = xgbo.predict_proba(X_train)

test_pred = xgbo.predict(X_test)
test_proba = xgbo.predict_proba(X_test)

val_pred = xgbo.predict(X_val)
val_proba = xgbo.predict_proba(X_val)

In [32]:
print('훈련셋 평가 지표\n')
get_clf_eval(y_train, train_pred, train_proba)
print('\n\n 테스트셋 평가 지표\n')
get_clf_eval(y_test, test_pred, test_proba)
print('\n\n 검증셋 평가 지표\n')
get_clf_eval(y_val, val_pred, val_proba)

훈련셋 평가 지표

오차 행렬
[[3718   57]
 [  24 2431]]
정확도: 0.9870, 정밀도: 0.9771,     재현율: 0.9902, F1: 0.9836


 테스트셋 평가 지표

오차 행렬
[[1596   90]
 [ 113  970]]
정확도: 0.9267, 정밀도: 0.9151,     재현율: 0.8957, F1: 0.9053


 검증셋 평가 지표

오차 행렬
[[1201   57]
 [  73  746]]
정확도: 0.9374, 정밀도: 0.9290,     재현율: 0.9109, F1: 0.9199


## 결측값 채우기

In [33]:
c_na = pd.read_excel('na_slp.xlsx')

In [34]:
c_na.head()

Unnamed: 0,PassengerId,HomePlanet,CryoSleep,Cabin1,Cabin2,Combi,Cabin3,Cabin,Destination,Age,VIP,RoomService,FoodCourt,ShoppingMall,Spa,VRDeck,Name,Transported
0,0099_02,Earth,,G,12.0,G12,P,G/12/P,TRAPPIST-1e,2.0,0.0,0.0,0.0,0.0,0.0,0.0,Thewis Connelson,1.0
1,0105_01,Earth,,F,21.0,F21,P,F/21/P,TRAPPIST-1e,27.0,0.0,0.0,0.0,570.0,2.0,131.0,Carry Cleachrand,0.0
2,0110_02,Europa,,B,5.0,B5,P,B/5/P,TRAPPIST-1e,40.0,0.0,0.0,331.0,0.0,0.0,1687.0,Aldeba Bootious,0.0
3,0115_01,Mars,,F,24.0,F24,P,F/24/P,TRAPPIST-1e,26.0,0.0,0.0,0.0,0.0,0.0,0.0,Rohs Pead,1.0
4,0173_01,Earth,,E,11.0,E11,S,E/11/S,TRAPPIST-1e,58.0,0.0,0.0,985.0,0.0,5.0,0.0,Hilip Grifford,1.0


In [35]:
c_na.drop(['PassengerId', 'Name', 'Transported', 'Combi', 'Cabin', 'CryoSleep'], inplace=True, axis=1)

In [36]:
c_na.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 310 entries, 0 to 309
Data columns (total 12 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   HomePlanet    305 non-null    object 
 1   Cabin1        306 non-null    object 
 2   Cabin2        306 non-null    float64
 3   Cabin3        306 non-null    object 
 4   Destination   304 non-null    object 
 5   Age           307 non-null    float64
 6   VIP           301 non-null    float64
 7   RoomService   307 non-null    float64
 8   FoodCourt     307 non-null    float64
 9   ShoppingMall  300 non-null    float64
 10  Spa           303 non-null    float64
 11  VRDeck        303 non-null    float64
dtypes: float64(8), object(4)
memory usage: 29.2+ KB


In [37]:
c_na['Cabin3'].replace({'P': 'True','S': 'False'}, inplace=True)

In [38]:
c_na['Cabin3'] = c_na.Cabin3.astype(bool)
c_na['VIP'] = c_na.VIP.astype(bool)
c_na.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 310 entries, 0 to 309
Data columns (total 12 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   HomePlanet    305 non-null    object 
 1   Cabin1        306 non-null    object 
 2   Cabin2        306 non-null    float64
 3   Cabin3        310 non-null    bool   
 4   Destination   304 non-null    object 
 5   Age           307 non-null    float64
 6   VIP           310 non-null    bool   
 7   RoomService   307 non-null    float64
 8   FoodCourt     307 non-null    float64
 9   ShoppingMall  300 non-null    float64
 10  Spa           303 non-null    float64
 11  VRDeck        303 non-null    float64
dtypes: bool(2), float64(7), object(3)
memory usage: 24.9+ KB


In [39]:
# object 타입의 데이터들 더미화
train_encoding = pd.get_dummies(c_na['HomePlanet'])
c_na=c_na.drop('HomePlanet',axis=1)
c_na = c_na.join(train_encoding)

train_encoding = pd.get_dummies(c_na['Cabin1'])
c_na=c_na.drop('Cabin1',axis=1)
c_na = c_na.join(train_encoding)

train_encoding = pd.get_dummies(c_na['Destination'])
c_na=c_na.drop('Destination',axis=1)
c_na = c_na.join(train_encoding)

In [40]:
c_na.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 310 entries, 0 to 309
Data columns (total 23 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Cabin2         306 non-null    float64
 1   Cabin3         310 non-null    bool   
 2   Age            307 non-null    float64
 3   VIP            310 non-null    bool   
 4   RoomService    307 non-null    float64
 5   FoodCourt      307 non-null    float64
 6   ShoppingMall   300 non-null    float64
 7   Spa            303 non-null    float64
 8   VRDeck         303 non-null    float64
 9   Earth          310 non-null    uint8  
 10  Europa         310 non-null    uint8  
 11  Mars           310 non-null    uint8  
 12  A              310 non-null    uint8  
 13  B              310 non-null    uint8  
 14  C              310 non-null    uint8  
 15  D              310 non-null    uint8  
 16  E              310 non-null    uint8  
 17  F              310 non-null    uint8  
 18  G         

In [41]:
c_na_d = c_na.dropna(axis=0)

In [42]:
col = ['Cabin2', 'Age', 'RoomService','FoodCourt','ShoppingMall','Spa','VRDeck']
data_scaled(c_na, col)

Unnamed: 0,Cabin2,Cabin3,Age,VIP,RoomService,FoodCourt,ShoppingMall,Spa,VRDeck,Earth,...,B,C,D,E,F,G,T,55 Cancri e,PSO J318.5-22,TRAPPIST-1e
0,-1.183897,True,-1.800699,False,-0.329359,-0.324266,-0.351021,-0.236142,-0.287000,1,...,0,0,0,0,0,1,0,0,0,1
1,-1.166462,True,-0.116726,False,-0.329359,-0.324266,0.975138,-0.234693,-0.156691,1,...,0,0,0,0,1,0,0,0,0,1
2,-1.197458,True,0.758940,False,-0.329359,-0.064159,-0.351021,-0.236142,1.391106,0,...,1,0,0,0,0,0,0,0,0,1
3,-1.160650,True,-0.184085,False,-0.329359,-0.324266,-0.351021,-0.236142,-0.287000,0,...,0,0,0,0,1,0,0,0,0,1
4,-1.185834,True,1.971401,False,-0.329359,0.449770,-0.351021,-0.232519,-0.287000,1,...,0,0,0,1,0,0,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
305,2.260524,True,-0.588239,False,-0.329359,-0.324266,-0.351021,-0.236142,-0.287000,0,...,0,0,0,0,1,0,0,1,0,0
306,-0.635657,True,-0.116726,False,-0.329359,1.390402,-0.334735,1.634807,-0.268100,0,...,0,1,0,0,0,0,0,0,0,1
307,,True,0.826299,False,-0.329359,1.245810,-0.351021,0.505136,0.575429,0,...,0,0,0,0,0,0,0,0,0,1
308,2.233403,True,-0.992392,False,-0.329359,-0.324266,,0.761649,-0.258153,1,...,0,0,0,0,1,0,0,0,0,1


In [43]:
c_na.head()

Unnamed: 0,Cabin2,Cabin3,Age,VIP,RoomService,FoodCourt,ShoppingMall,Spa,VRDeck,Earth,...,B,C,D,E,F,G,T,55 Cancri e,PSO J318.5-22,TRAPPIST-1e
0,-1.183897,True,-1.800699,False,-0.329359,-0.324266,-0.351021,-0.236142,-0.287,1,...,0,0,0,0,0,1,0,0,0,1
1,-1.166462,True,-0.116726,False,-0.329359,-0.324266,0.975138,-0.234693,-0.156691,1,...,0,0,0,0,1,0,0,0,0,1
2,-1.197458,True,0.75894,False,-0.329359,-0.064159,-0.351021,-0.236142,1.391106,0,...,1,0,0,0,0,0,0,0,0,1
3,-1.16065,True,-0.184085,False,-0.329359,-0.324266,-0.351021,-0.236142,-0.287,0,...,0,0,0,0,1,0,0,0,0,1
4,-1.185834,True,1.971401,False,-0.329359,0.44977,-0.351021,-0.232519,-0.287,1,...,0,0,0,1,0,0,0,0,0,1


### 그리드서치로 예측

In [44]:
c_pred = xgbo_gs.predict(c_na)

In [45]:
c_pred = pd.DataFrame(c_pred)

In [46]:
c_pred.value_counts()

0    310
dtype: int64

In [47]:
c_pred.to_csv('c_pred.csv')

### HyperOPT로 예측

In [48]:
c_pred_h = xgbo.predict(c_na)

In [49]:
c_pred_h = pd.DataFrame(c_pred_h)

In [50]:
c_pred_h.value_counts()

0    310
dtype: int64

# HomePlanet 결측값 처리

## 데이터 불러오기

In [51]:
# HomePlanet에 대한 데이터프레임 생성
hp_df = spaceship.iloc[:,:]

# 앞서 제거한 피처 외 추가제거
hp_df = hp_df.drop('Age', axis=1)

In [52]:
hp_df.head()

Unnamed: 0,HomePlanet,CryoSleep,Cabin1,Cabin2,Cabin3,Destination,VIP,RoomService,FoodCourt,ShoppingMall,Spa,VRDeck
0,Europa,False,B,0.0,True,TRAPPIST-1e,False,0.0,0.0,0.0,0.0,0.0
1,Earth,False,F,0.0,False,TRAPPIST-1e,False,109.0,9.0,25.0,549.0,44.0
2,Europa,False,A,0.0,False,TRAPPIST-1e,True,43.0,3576.0,0.0,6715.0,49.0
3,Europa,False,A,0.0,False,TRAPPIST-1e,False,0.0,1283.0,371.0,3329.0,193.0
4,Earth,False,F,1.0,False,TRAPPIST-1e,False,303.0,70.0,151.0,565.0,2.0


## 전처리

In [53]:
# 결측값이 있는 행 삭제 
hp_df.dropna(inplace=True)
hp_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 11076 entries, 0 to 12969
Data columns (total 12 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   HomePlanet    11076 non-null  object 
 1   CryoSleep     11076 non-null  bool   
 2   Cabin1        11076 non-null  object 
 3   Cabin2        11076 non-null  float64
 4   Cabin3        11076 non-null  bool   
 5   Destination   11076 non-null  object 
 6   VIP           11076 non-null  bool   
 7   RoomService   11076 non-null  float64
 8   FoodCourt     11076 non-null  float64
 9   ShoppingMall  11076 non-null  float64
 10  Spa           11076 non-null  float64
 11  VRDeck        11076 non-null  float64
dtypes: bool(3), float64(6), object(3)
memory usage: 897.8+ KB


In [54]:
# homeplanet이 타깃으로 할거라서 홈플레닛 값을 0,1,2로 치환하여 사용
hp_df.HomePlanet = hp_df['HomePlanet'].map({'Earth':0, 'Europa':1, 'Mars':2})

# 변경 확인
hp_df.HomePlanet.unique()

array([1, 0, 2], dtype=int64)

In [55]:
# 원-핫 인코딩 (cabin1, destination)

## Cabin1
encoding = pd.get_dummies(hp_df.Cabin1)
hp_df = hp_df.drop('Cabin1', axis =1) # 기존 삭제
hp_df = hp_df.join(encoding) # 적용
## Destination
encoding = pd.get_dummies(hp_df.Destination)
hp_df = hp_df.drop('Destination', axis =1) # 기존 삭제
hp_df = hp_df.join(encoding) # 적용

In [56]:
hp_df.head()

Unnamed: 0,HomePlanet,CryoSleep,Cabin2,Cabin3,VIP,RoomService,FoodCourt,ShoppingMall,Spa,VRDeck,...,B,C,D,E,F,G,T,55 Cancri e,PSO J318.5-22,TRAPPIST-1e
0,1,False,0.0,True,False,0.0,0.0,0.0,0.0,0.0,...,1,0,0,0,0,0,0,0,0,1
1,0,False,0.0,False,False,109.0,9.0,25.0,549.0,44.0,...,0,0,0,0,1,0,0,0,0,1
2,1,False,0.0,False,True,43.0,3576.0,0.0,6715.0,49.0,...,0,0,0,0,0,0,0,0,0,1
3,1,False,0.0,False,False,0.0,1283.0,371.0,3329.0,193.0,...,0,0,0,0,0,0,0,0,0,1
4,0,False,1.0,False,False,303.0,70.0,151.0,565.0,2.0,...,0,0,0,0,1,0,0,0,0,1


In [57]:
hp_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 11076 entries, 0 to 12969
Data columns (total 21 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   HomePlanet     11076 non-null  int64  
 1   CryoSleep      11076 non-null  bool   
 2   Cabin2         11076 non-null  float64
 3   Cabin3         11076 non-null  bool   
 4   VIP            11076 non-null  bool   
 5   RoomService    11076 non-null  float64
 6   FoodCourt      11076 non-null  float64
 7   ShoppingMall   11076 non-null  float64
 8   Spa            11076 non-null  float64
 9   VRDeck         11076 non-null  float64
 10  A              11076 non-null  uint8  
 11  B              11076 non-null  uint8  
 12  C              11076 non-null  uint8  
 13  D              11076 non-null  uint8  
 14  E              11076 non-null  uint8  
 15  F              11076 non-null  uint8  
 16  G              11076 non-null  uint8  
 17  T              11076 non-null  uint8  
 18  55 Can

In [58]:
# 스케일링

## 위에 정의된 스케일링 함수 호출
col = ['Cabin2', 'RoomService','FoodCourt','ShoppingMall','Spa','VRDeck']
data_scaled(hp_df, col)

Unnamed: 0,HomePlanet,CryoSleep,Cabin2,Cabin3,VIP,RoomService,FoodCourt,ShoppingMall,Spa,VRDeck,...,B,C,D,E,F,G,T,55 Cancri e,PSO J318.5-22,TRAPPIST-1e
0,1,False,-1.171058,True,False,-0.334616,-0.282674,-0.285975,-0.271173,-0.260034,...,1,0,0,0,0,0,0,0,0,1
1,0,False,-1.171058,False,False,-0.166861,-0.277057,-0.244125,0.217775,-0.220858,...,0,0,0,0,1,0,0,0,0,1
2,1,False,-1.171058,False,True,-0.268437,1.949128,-0.285975,5.709312,-0.216406,...,0,0,0,0,0,0,0,0,0,1
3,1,False,-1.171058,False,False,-0.334616,0.518053,0.335083,2.693687,-0.088194,...,0,0,0,0,0,0,0,0,0,1
4,0,False,-1.169112,False,False,0.131712,-0.238987,-0.033199,0.232025,-0.258253,...,0,0,0,0,1,0,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12962,0,True,1.737845,False,False,-0.334616,-0.282674,-0.285975,-0.271173,-0.260034,...,0,0,0,0,0,1,0,0,0,1
12963,2,False,-0.630138,False,False,-0.262281,-0.282674,6.160647,-0.271173,-0.260034,...,0,0,1,0,0,0,0,0,0,1
12964,0,False,2.323517,False,False,-0.334616,0.257177,-0.285975,-0.268502,-0.260034,...,0,0,0,0,1,0,0,0,0,1
12965,0,True,1.739790,False,False,-0.334616,-0.282674,-0.285975,-0.271173,-0.260034,...,0,0,0,0,0,1,0,0,0,1


## 데이터와 타겟값 분리

In [59]:
hp_data = hp_df.iloc[:,1:]
hp_label = hp_df.iloc[:,0]

In [60]:
hp_data.head()

Unnamed: 0,CryoSleep,Cabin2,Cabin3,VIP,RoomService,FoodCourt,ShoppingMall,Spa,VRDeck,A,B,C,D,E,F,G,T,55 Cancri e,PSO J318.5-22,TRAPPIST-1e
0,False,-1.171058,True,False,-0.334616,-0.282674,-0.285975,-0.271173,-0.260034,0,1,0,0,0,0,0,0,0,0,1
1,False,-1.171058,False,False,-0.166861,-0.277057,-0.244125,0.217775,-0.220858,0,0,0,0,0,1,0,0,0,0,1
2,False,-1.171058,False,True,-0.268437,1.949128,-0.285975,5.709312,-0.216406,1,0,0,0,0,0,0,0,0,0,1
3,False,-1.171058,False,False,-0.334616,0.518053,0.335083,2.693687,-0.088194,1,0,0,0,0,0,0,0,0,0,1
4,False,-1.169112,False,False,0.131712,-0.238987,-0.033199,0.232025,-0.258253,0,0,0,0,0,1,0,0,0,0,1


In [61]:
hp_label

0        1
1        0
2        1
3        1
4        0
        ..
12962    0
12963    2
12964    0
12965    0
12969    0
Name: HomePlanet, Length: 11076, dtype: int64

## train, test 데이터세트 분리

In [62]:
X_train, X_test, y_train, y_test = train_test_split(hp_data,hp_label, random_state =109) 

## 모델링_랜덤포레스트

In [63]:
rf_clf = RandomForestClassifier()
grid = {
    'n_estimators': [50,90,100,150,200, 250],
    'max_depth': [3,5,7,9,13,15],
    'min_samples_leaf':[3,5,7,9,13,15],
    'min_samples_split': [3,5,7,9,13,15]}

clf_grid = GridSearchCV(rf_clf, param_grid = grid, scoring='accuracy', verbose=1, cv= 5, n_jobs=-1)

clf_grid.fit(X_train, y_train)


Fitting 5 folds for each of 1296 candidates, totalling 6480 fits


In [64]:
print('최고 평균 정확도 : {}'.format(clf_grid.best_score_))
print('최고 파라미터: {}', clf_grid.best_params_)

최고 평균 정확도 : 0.9375231744610375
최고 파라미터: {} {'max_depth': 15, 'min_samples_leaf': 3, 'min_samples_split': 5, 'n_estimators': 250}


In [65]:
# 최적의 파라미터를 이용한 모델링
clf_model = RandomForestClassifier(n_estimators=150, max_depth=15, 
                                   min_samples_leaf=3, min_samples_split=5, random_state=109)

# fitting
clf_model.fit(X_train, y_train)

# pred
tr_clf_pred = clf_model.predict(X_train)
tr_clf_proba = clf_model.predict_proba(X_train)
clf_pred = clf_model.predict(X_test)
clf_proba = clf_model.predict_proba(X_test)

In [66]:
print('훈련셋 평가 지표\n')
get_multi_clf_eval(y_train, tr_clf_pred, tr_clf_proba)
print('\n\n 테스트셋 평가 지표\n')
get_multi_clf_eval(y_test, clf_pred, clf_proba)

훈련셋 평가 지표

오차 행렬
[[4367    3   86]
 [   8 2048   35]
 [ 126   34 1600]]
정확도: 0.9648, 정밀도: 0.9648,     재현율: 0.9648, F1: 0.9648


 테스트셋 평가 지표

오차 행렬
[[1436    1   40]
 [   4  695   11]
 [  89   13  480]]
정확도: 0.9429, 정밀도: 0.9429,     재현율: 0.9429, F1: 0.9429


In [67]:
# Mars의 예측 재현율이 다소 낮은걸 확인
510/(78+16+510)

0.8443708609271523

## 모델링_XGBoost

In [68]:
# hyperopt 적용을 위한 설정 값 담기
xgb_search_space = {'max_depth': hp.quniform('max_depth', 5, 20, 1),
                    'min_child_weight': hp.quniform('min_child_weight', 1, 2, 1),
                    'learning_rate': hp.uniform('learning_rate', 0.01, 0.2),
                    'colsample_bytree': hp.uniform('colsample_bytree', 0.5, 1)
               }

In [69]:
# 실행을 위한 함수 정의
def objective_func(search_space):
    xgb_clf = XGBClassifier(n_estimators=100, max_depth=int(search_space['max_depth']),
                            min_child_weight=int(search_space['min_child_weight']),
                            learning_rate=search_space['learning_rate'],
                            colsample_bytree=search_space['colsample_bytree'], 
                            eval_metric='logloss')
    accuracy = cross_val_score(xgb_clf, X_train, y_train, scoring='accuracy', cv=3)
    return {'loss':-1 * np.mean(accuracy), 'status': STATUS_OK}


In [70]:
# 최적의 파라미터 찾기
trial_val = Trials()
best = fmin(fn=objective_func, # 정의한 실행 함수
            space=xgb_search_space, # 하이퍼 오피티 설정값 정의한거
            algo=tpe.suggest,
            max_evals=50,
            trials=trial_val, rstate=np.random.default_rng(seed=9))
print('best:', best)

100%|███████████████████████████████████████████████| 50/50 [00:50<00:00,  1.01s/trial, best loss: -0.9428193090164921]
best: {'colsample_bytree': 0.7131644932071765, 'learning_rate': 0.16071485807351799, 'max_depth': 5.0, 'min_child_weight': 2.0}


In [71]:
# 모델링
xgb_model = XGBClassifier(n_estimators=100, learning_rate=round(best['learning_rate'], 5), 
                            max_depth=int(best['max_depth']), min_child_weight=int(best['min_child_weight']),
                            colsample_bytree=round(best['colsample_bytree'], 5)
                           )
xgb_model.fit(X_train, y_train)

tr_pred = xgb_model.predict(X_train)
tr_proba = xgb_model.predict_proba(X_train)
t_pred = xgb_model.predict(X_test)
t_proba = xgb_model.predict(X_test)

In [72]:
print('훈련셋 평가 지표\n')
get_multi_clf_eval(y_train, tr_pred, tr_proba)
print('\n\n 테스트셋 평가 지표\n')
get_multi_clf_eval(y_test, t_pred, t_proba)

훈련셋 평가 지표

오차 행렬
[[4349    1  106]
 [   2 2057   32]
 [  91   33 1636]]
정확도: 0.9681, 정밀도: 0.9681,     재현율: 0.9681, F1: 0.9681


 테스트셋 평가 지표

오차 행렬
[[1416    1   60]
 [   2  690   18]
 [  68   13  501]]
정확도: 0.9415, 정밀도: 0.9415,     재현율: 0.9415, F1: 0.9415


In [73]:
# Mars의 정확도
529/(63+14+527)*100

87.58278145695364

## 복합 샘플링
- 샘플의 편향이 심해서 Mars의 확률이 낮은거 같아서 복합 샘플링을 실행

In [74]:
# 샘플 편향
np.unique(hp_label, return_counts = True)

(array([0, 1, 2], dtype=int64), array([5933, 2801, 2342], dtype=int64))

In [75]:
# 복합 샘플링
X_samp, y_samp = SMOTEENN(random_state=109).fit_resample(hp_data, hp_label)

In [76]:
# 샘플링 완료
np.unique(y_samp, return_counts = True)

(array([0, 1, 2], dtype=int64), array([5111, 5614, 5031], dtype=int64))

## 샘플링 데이터세트 분리

In [77]:
X_train, X_test, y_train, y_test = train_test_split( X_samp, y_samp, random_state = 109 )

## 모델링_XGBClassifier & hyperopt

In [78]:
# 실행을 위한 함수 정의
def objective_func(search_space):
    xgb_clf = XGBClassifier(n_estimators=100, max_depth=int(search_space['max_depth']),
                            min_child_weight=int(search_space['min_child_weight']),
                            learning_rate=search_space['learning_rate'],
                            colsample_bytree=search_space['colsample_bytree'], 
                            eval_metric='logloss')
    accuracy = cross_val_score(xgb_clf, X_train, y_train, scoring='accuracy', cv=3)
    return {'loss':-1 * np.mean(accuracy), 'status': STATUS_OK}

In [79]:
# 최적의 파라미터 찾기
trial_val = Trials()
best = fmin(fn=objective_func,
            space=xgb_search_space,
            algo=tpe.suggest,
            max_evals=50,
            trials=trial_val, rstate=np.random.default_rng(seed=9))
print('best:', best)

100%|███████████████████████████████████████████████| 50/50 [01:07<00:00,  1.35s/trial, best loss: -0.9906913768299906]
best: {'colsample_bytree': 0.8052776945627771, 'learning_rate': 0.19910199851014435, 'max_depth': 12.0, 'min_child_weight': 1.0}


In [80]:
# 모델링
xgb_model = XGBClassifier(n_estimators=100, learning_rate=round(best['learning_rate'], 5), 
                            max_depth=int(best['max_depth']), min_child_weight=int(best['min_child_weight']),
                            colsample_bytree=round(best['colsample_bytree'], 5)
                           )
xgb_model.fit(X_train, y_train)

tr_pred = xgb_model.predict(X_train)
tr_proba = xgb_model.predict_proba(X_train)
t_pred = xgb_model.predict(X_test)
t_proba = xgb_model.predict(X_test)

In [81]:
print('훈련셋 평가 지표\n')
get_multi_clf_eval(y_train, tr_pred, tr_proba)
print('\n\n 테스트셋 평가 지표\n')
get_multi_clf_eval(y_test, t_pred, t_proba)

훈련셋 평가 지표

오차 행렬
[[3811    0    0]
 [   0 4229    0]
 [   0    0 3777]]
정확도: 1.0000, 정밀도: 1.0000,     재현율: 1.0000, F1: 1.0000


 테스트셋 평가 지표

오차 행렬
[[1282    2   16]
 [   1 1381    3]
 [  13    6 1235]]
정확도: 0.9896, 정밀도: 0.9896,     재현율: 0.9896, F1: 0.9896


In [82]:
# Mars의 정확도
1270/(5+1+1270)*100

99.52978056426332

## 결측값 채우기

In [83]:
# 이전과 동일한 절차

# 결측치 파일 불러오기
df = pd.read_excel('hp_na.xlsx')
df.head()
data = df.drop(['Transported', 'Name', 'Age', 'Cabin','Combi'], axis=1)
# Cabin1
encode = pd.get_dummies(data['Cabin1'])
data.drop('Cabin1', axis=1)
data = data.join(encode)
# Destination
encode = pd.get_dummies(data['Destination'])
data.drop(['Destination'], axis=1)
data = data.join(encode)
data['Cabin3'] = data['Cabin3'].map({'P':0, 'S':1})
data['HomePlanet'] = data['HomePlanet'].map({'Earth':0, 'Europa':1, 'Mars':2})
data_scaled(data, col)
dt = data.drop(['PassengerId', 'HomePlanet','Destination', 'Cabin1'], axis = 1)
label = data['HomePlanet']
dt.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 279 entries, 0 to 278
Data columns (total 20 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   CryoSleep      279 non-null    bool   
 1   Cabin2         274 non-null    float64
 2   Cabin3         274 non-null    float64
 3   VIP            276 non-null    float64
 4   RoomService    275 non-null    float64
 5   FoodCourt      276 non-null    float64
 6   ShoppingMall   275 non-null    float64
 7   Spa            272 non-null    float64
 8   VRDeck         277 non-null    float64
 9   A              279 non-null    uint8  
 10  B              279 non-null    uint8  
 11  C              279 non-null    uint8  
 12  D              279 non-null    uint8  
 13  E              279 non-null    uint8  
 14  F              279 non-null    uint8  
 15  G              279 non-null    uint8  
 16  T              279 non-null    uint8  
 17  55 Cancri e    279 non-null    uint8  
 18  PSO J318.5

In [84]:
df['predict_hp']= xgb_model.predict(dt)
df['predict_hp'] = df['predict_hp'].map({0:'Earth', 1:'Europa', 2:'Mars'})
df # 샘플링 하지 않고 처리한 것

Unnamed: 0,PassengerId,HomePlanet,CryoSleep,Cabin1,Cabin2,Combi,Cabin3,Cabin,Destination,Age,VIP,RoomService,FoodCourt,ShoppingMall,Spa,VRDeck,Name,Transported,predict_hp
0,0064_02,,True,E,3.0,E3,S,E/3/S,TRAPPIST-1e,33.0,0.0,0.0,0.0,0.0,0.0,0.0,Colatz Keen,1.0,Europa
1,0119_01,,False,A,0.0,A0,P,A/0/P,TRAPPIST-1e,39.0,0.0,0.0,2344.0,0.0,65.0,6898.0,Batan Coning,0.0,Europa
2,0210_01,,True,D,6.0,D6,P,D/6/P,55 Cancri e,24.0,0.0,0.0,0.0,0.0,0.0,0.0,Arraid Inicont,1.0,Europa
3,0242_01,,False,F,46.0,F46,S,F/46/S,TRAPPIST-1e,18.0,0.0,313.0,1.0,691.0,283.0,0.0,Almone St챕,0.0,Mars
4,0251_01,,True,C,11.0,C11,S,C/11/S,55 Cancri e,54.0,0.0,0.0,0.0,0.0,0.0,0.0,Diphah Amsive,1.0,Europa
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
274,8621_01,,False,E,552.0,E552,P,E/552/P,TRAPPIST-1e,19.0,0.0,4.0,0.0,1604.0,0.0,0.0,Vanley Simmonders,,Mars
275,8678_01,,True,G,1399.0,G1399,S,G/1399/S,55 Cancri e,9.0,0.0,0.0,0.0,0.0,0.0,0.0,Eilan Kellson,,Earth
276,8775_01,,True,D,275.0,D275,P,D/275/P,TRAPPIST-1e,40.0,0.0,0.0,0.0,0.0,0.0,0.0,Raston Maltorted,,Europa
277,9025_01,,False,G,1454.0,G1454,S,G/1454/S,TRAPPIST-1e,42.0,0.0,0.0,0.0,28.0,726.0,0.0,Ale Whitersone,,Earth


In [85]:
# 예측값 (샘플링 후)
df['sampling_pred_hp'] = xgb_model.predict(dt)
# 원본 형태로 되돌리기
df['sampling_pred_hp'] = df['sampling_pred_hp'].map({0:'Earth', 1:'Europa', 2:'Mars'})

In [86]:
np.unique(df['sampling_pred_hp'], return_counts = True)

(array(['Earth', 'Europa', 'Mars'], dtype=object),
 array([136,  71,  72], dtype=int64))

In [87]:
#엑셀로 저장하기
df.to_excel('hp_fill(sampling).xlsx')

# Destination 결측값 처리

## 데이터 불러오기

In [88]:
dst_df = spaceship.iloc[:,:]
dst_df.info() 

<class 'pandas.core.frame.DataFrame'>
Int64Index: 11076 entries, 0 to 12969
Data columns (total 13 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   HomePlanet    11076 non-null  object 
 1   CryoSleep     11076 non-null  bool   
 2   Cabin1        11076 non-null  object 
 3   Cabin2        11076 non-null  float64
 4   Cabin3        11076 non-null  bool   
 5   Destination   11076 non-null  object 
 6   Age           11076 non-null  float64
 7   VIP           11076 non-null  bool   
 8   RoomService   11076 non-null  float64
 9   FoodCourt     11076 non-null  float64
 10  ShoppingMall  11076 non-null  float64
 11  Spa           11076 non-null  float64
 12  VRDeck        11076 non-null  float64
dtypes: bool(3), float64(7), object(3)
memory usage: 1.2+ MB


## 전처리

In [89]:
dst_df.dropna(axis=0,inplace=True)
dst_df.shape

(11076, 13)

In [90]:
# 원-핫 인코딩 (cabin1, destination)

## HomePlanet
encoding = pd.get_dummies(dst_df.HomePlanet)
dst_df = dst_df.drop('HomePlanet', axis =1) # 기존 삭제
dst_df = dst_df.join(encoding) # 적용
## Cabin1
encoding = pd.get_dummies(dst_df.Cabin1)
dst_df = dst_df.drop('Cabin1', axis =1) # 기존 삭제
dst_df = dst_df.join(encoding) # 적용

dst_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 11076 entries, 0 to 12969
Data columns (total 22 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   CryoSleep     11076 non-null  bool   
 1   Cabin2        11076 non-null  float64
 2   Cabin3        11076 non-null  bool   
 3   Destination   11076 non-null  object 
 4   Age           11076 non-null  float64
 5   VIP           11076 non-null  bool   
 6   RoomService   11076 non-null  float64
 7   FoodCourt     11076 non-null  float64
 8   ShoppingMall  11076 non-null  float64
 9   Spa           11076 non-null  float64
 10  VRDeck        11076 non-null  float64
 11  Earth         11076 non-null  uint8  
 12  Europa        11076 non-null  uint8  
 13  Mars          11076 non-null  uint8  
 14  A             11076 non-null  uint8  
 15  B             11076 non-null  uint8  
 16  C             11076 non-null  uint8  
 17  D             11076 non-null  uint8  
 18  E             11076 non-nu

In [91]:
# 스케일링

## 위에 정의된 스케일링 함수 호출
col = ['Cabin2', 'RoomService','FoodCourt','ShoppingMall','Spa','VRDeck']
data_scaled(dst_df, col)

Unnamed: 0,CryoSleep,Cabin2,Cabin3,Destination,Age,VIP,RoomService,FoodCourt,ShoppingMall,Spa,...,Europa,Mars,A,B,C,D,E,F,G,T
0,False,-1.171058,True,TRAPPIST-1e,39.0,False,-0.334616,-0.282674,-0.285975,-0.271173,...,1,0,0,1,0,0,0,0,0,0
1,False,-1.171058,False,TRAPPIST-1e,24.0,False,-0.166861,-0.277057,-0.244125,0.217775,...,0,0,0,0,0,0,0,1,0,0
2,False,-1.171058,False,TRAPPIST-1e,58.0,True,-0.268437,1.949128,-0.285975,5.709312,...,1,0,1,0,0,0,0,0,0,0
3,False,-1.171058,False,TRAPPIST-1e,33.0,False,-0.334616,0.518053,0.335083,2.693687,...,1,0,1,0,0,0,0,0,0,0
4,False,-1.169112,False,TRAPPIST-1e,16.0,False,0.131712,-0.238987,-0.033199,0.232025,...,0,0,0,0,0,0,0,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12962,True,1.737845,False,TRAPPIST-1e,43.0,False,-0.334616,-0.282674,-0.285975,-0.271173,...,0,0,0,0,0,0,0,0,1,0
12963,False,-0.630138,False,TRAPPIST-1e,43.0,False,-0.262281,-0.282674,6.160647,-0.271173,...,0,1,0,0,0,1,0,0,0,0
12964,False,2.323517,False,TRAPPIST-1e,40.0,False,-0.334616,0.257177,-0.285975,-0.268502,...,0,0,0,0,0,0,0,1,0,0
12965,True,1.739790,False,TRAPPIST-1e,34.0,False,-0.334616,-0.282674,-0.285975,-0.271173,...,0,0,0,0,0,0,0,0,1,0


In [92]:
dst_df.Destination.unique()

array(['TRAPPIST-1e', 'PSO J318.5-22', '55 Cancri e'], dtype=object)

In [93]:
# Object 타입을 숫자형으로 변경하기
# 'TRAPPIST-1e' = 0, 'PSO J318.5-22' = 1, '55 Cancri e' = 2
dst_df.Destination.replace({'TRAPPIST-1e':0, '55 Cancri e':1, 'PSO J318.5-22':2}, inplace=True)

In [94]:
dst_df['Destination'].value_counts()

0    7715
1    2325
2    1036
Name: Destination, dtype: int64

## 데이터와 타겟값 분리

In [95]:
dst_data = dst_df.drop('Destination', axis =1) 
dst_label = dst_df['Destination']

## 복합샘플링

In [96]:
# 복합 샘플링
X_sample, y_sample = SMOTEENN(random_state =109).fit_resample(dst_data, dst_label)

In [97]:
y_sample.value_counts() # 아직 편향이 좀 있지만, 나아졌네

2    5619
1    4456
0    2504
Name: Destination, dtype: int64

## train, test 데이터 세트 분리

In [98]:
X_train, X_test, y_train, y_test = train_test_split(X_sample, y_sample, random_state=109)

## 모델링_XGBClassifier

In [99]:
# hyperopt 적용을 위한 설정 값 담기
xgb_search_space = {'max_depth': hp.quniform('max_depth', 5, 20, 1),
                    'min_child_weight': hp.quniform('min_child_weight', 1, 2, 1),
                    'learning_rate': hp.uniform('learning_rate', 0.01, 0.2),
                    'colsample_bytree': hp.uniform('colsample_bytree', 0.5, 0.95)
               }

In [100]:
# 실행을 위한 함수 정의
def objective_func(search_space):
    xgb_clf = XGBClassifier(n_estimators=100, max_depth=int(search_space['max_depth']),
                            min_child_weight=int(search_space['min_child_weight']),
                            learning_rate=search_space['learning_rate'],
                            colsample_bytree=search_space['colsample_bytree'], 
                            eval_metric='logloss')
    accuracy = cross_val_score(xgb_clf, X_train, y_train, scoring='accuracy', cv=3)
    return {'loss':-1 * np.mean(accuracy), 'status': STATUS_OK}

In [101]:
# 최적의 파라미터 찾기
trial_val = Trials()
best = fmin(fn=objective_func,
            space=xgb_search_space,
            algo=tpe.suggest,
            max_evals=50,
            trials=trial_val, rstate=np.random.default_rng(seed=9))
print('best:', best)

100%|████████████████████████████████████████████████| 50/50 [01:52<00:00,  2.25s/trial, best loss: -0.885519342872284]
best: {'colsample_bytree': 0.7689868312949918, 'learning_rate': 0.15499060596585923, 'max_depth': 16.0, 'min_child_weight': 1.0}


In [102]:
# HyperOPT를 통해 얻어진 파라미터를 적용시킨 XGBClassifier 모델로 학습
xgb_model = XGBClassifier(n_estimators=100, learning_rate=round(best['learning_rate'], 5), 
                            max_depth=int(best['max_depth']), min_child_weight=int(best['min_child_weight']),
                            colsample_bytree=round(best['colsample_bytree'], 5)
                           )
xgb_model.fit(X_train, y_train)

In [103]:
# 앞서 만든 평가 함수(사용자 지정 함수)를 위해 필요한 변수들
tr_pred = xgb_model.predict(X_train)
tr_proba = xgb_model.predict_proba(X_train)

t_pred = xgb_model.predict(X_test)
t_proba = xgb_model.predict_proba(X_test)

In [104]:
print('훈련셋 평가 지표\n')
get_multi_clf_eval(y_train, tr_pred, tr_proba)
print('\n\n 테스트셋 평가 지표\n')
get_multi_clf_eval(y_test, t_pred, t_proba)

훈련셋 평가 지표

오차 행렬
[[1893    0    0]
 [   0 3332    2]
 [   0    0 4207]]
정확도: 0.9998, 정밀도: 0.9998,     재현율: 0.9998, F1: 0.9998


 테스트셋 평가 지표

오차 행렬
[[ 485   53   73]
 [  24 1034   64]
 [  39   18 1355]]
정확도: 0.9138, 정밀도: 0.9138,     재현율: 0.9138, F1: 0.9138


## 결측값 채우기

In [106]:
# 이전과 동일한 절차

# 결측치 파일 불러오기
df = pd.read_excel('dest_na.xlsx')
df.head()
data = df.drop(['Name', 'Cabin','Combi', 'Destination'], axis=1)
# Cabin1
encode = pd.get_dummies(data['HomePlanet'])
data = data.join(encode)
encode = pd.get_dummies(data['Cabin1'])
data = data.join(encode)

data['Cabin3'] = data['Cabin3'].map({'P':0, 'S':1})
data_scaled(data, col)
dt = data.drop(['PassengerId', 'HomePlanet', 'Cabin1'], axis = 1)
dt['T'] = pd.Series(np.zeros((266))).astype('uint8')

dt.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 266 entries, 0 to 265
Data columns (total 21 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   CryoSleep     266 non-null    bool   
 1   Cabin2        264 non-null    float64
 2   Cabin3        264 non-null    float64
 3   Age           266 non-null    int64  
 4   VIP           266 non-null    bool   
 5   RoomService   266 non-null    float64
 6   FoodCourt     266 non-null    float64
 7   ShoppingMall  266 non-null    float64
 8   Spa           266 non-null    float64
 9   VRDeck        266 non-null    float64
 10  Earth         266 non-null    uint8  
 11  Europa        266 non-null    uint8  
 12  Mars          266 non-null    uint8  
 13  A             266 non-null    uint8  
 14  B             266 non-null    uint8  
 15  C             266 non-null    uint8  
 16  D             266 non-null    uint8  
 17  E             266 non-null    uint8  
 18  F             266 non-null    

In [107]:
X_train.columns

Index(['CryoSleep', 'Cabin2', 'Cabin3', 'Age', 'VIP', 'RoomService',
       'FoodCourt', 'ShoppingMall', 'Spa', 'VRDeck', 'Earth', 'Europa', 'Mars',
       'A', 'B', 'C', 'D', 'E', 'F', 'G', 'T'],
      dtype='object')

In [108]:
dt.dropna(axis=0, inplace=True)

In [109]:
dt_pred = xgb_model.predict(dt)
dt_pred = pd.DataFrame(dt_pred)
dt_pred.value_counts()

0    135
2     65
1     64
dtype: int64

In [110]:
# 'TRAPPIST-1e':0, '55 Cancri e':1, 'PSO J318.5-22':2 로 변환시킨거 되돌리기
dt_pred = dt_pred.iloc[:,0].map({0:'TRAPPIST-1e', 1:'55 Cancri e', 2:'PSO J318.5-22' })
dt_pred.value_counts() # 변경 확인

TRAPPIST-1e      135
PSO J318.5-22     65
55 Cancri e       64
Name: 0, dtype: int64

In [111]:
# 예측한 값 엑셀로 내보내기
dt_pred.to_excel('Destination_fillna.xlsx')

# VIP 결측값 처리

## 데이터 불러오기

In [112]:
vip_df = spaceship.iloc[:,:]
vip_df.info() 

<class 'pandas.core.frame.DataFrame'>
Int64Index: 11076 entries, 0 to 12969
Data columns (total 13 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   HomePlanet    11076 non-null  object 
 1   CryoSleep     11076 non-null  bool   
 2   Cabin1        11076 non-null  object 
 3   Cabin2        11076 non-null  float64
 4   Cabin3        11076 non-null  bool   
 5   Destination   11076 non-null  object 
 6   Age           11076 non-null  float64
 7   VIP           11076 non-null  bool   
 8   RoomService   11076 non-null  float64
 9   FoodCourt     11076 non-null  float64
 10  ShoppingMall  11076 non-null  float64
 11  Spa           11076 non-null  float64
 12  VRDeck        11076 non-null  float64
dtypes: bool(3), float64(7), object(3)
memory usage: 1.2+ MB


## 데이터 탐색

In [113]:
vip_df.VIP.value_counts() 

False    10584
True       492
Name: VIP, dtype: int64

- 굉장히 심한 편향성을 띔

## 전처리

In [114]:
vip_df.dropna(axis=0,inplace=True)
vip_df.shape

(11076, 13)

In [115]:
vip_df.head()

Unnamed: 0,HomePlanet,CryoSleep,Cabin1,Cabin2,Cabin3,Destination,Age,VIP,RoomService,FoodCourt,ShoppingMall,Spa,VRDeck
0,Europa,False,B,0.0,True,TRAPPIST-1e,39.0,False,0.0,0.0,0.0,0.0,0.0
1,Earth,False,F,0.0,False,TRAPPIST-1e,24.0,False,109.0,9.0,25.0,549.0,44.0
2,Europa,False,A,0.0,False,TRAPPIST-1e,58.0,True,43.0,3576.0,0.0,6715.0,49.0
3,Europa,False,A,0.0,False,TRAPPIST-1e,33.0,False,0.0,1283.0,371.0,3329.0,193.0
4,Earth,False,F,1.0,False,TRAPPIST-1e,16.0,False,303.0,70.0,151.0,565.0,2.0


In [116]:
vip_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 11076 entries, 0 to 12969
Data columns (total 13 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   HomePlanet    11076 non-null  object 
 1   CryoSleep     11076 non-null  bool   
 2   Cabin1        11076 non-null  object 
 3   Cabin2        11076 non-null  float64
 4   Cabin3        11076 non-null  bool   
 5   Destination   11076 non-null  object 
 6   Age           11076 non-null  float64
 7   VIP           11076 non-null  bool   
 8   RoomService   11076 non-null  float64
 9   FoodCourt     11076 non-null  float64
 10  ShoppingMall  11076 non-null  float64
 11  Spa           11076 non-null  float64
 12  VRDeck        11076 non-null  float64
dtypes: bool(3), float64(7), object(3)
memory usage: 984.3+ KB


In [117]:
# 원-핫 인코딩 (cabin1, destination)

## HomePlanet
encoding = pd.get_dummies(vip_df.HomePlanet)
vip_df = vip_df.drop('HomePlanet', axis =1) # 기존 삭제
vip_df = vip_df.join(encoding) # 적용
## Cabin1
encoding = pd.get_dummies(vip_df.Cabin1)
vip_df = vip_df.drop('Cabin1', axis =1) # 기존 삭제
vip_df = vip_df.join(encoding) # 적용

## Cabin1
encoding = pd.get_dummies(vip_df.Destination)
vip_df = vip_df.drop('Destination', axis =1) # 기존 삭제
vip_df = vip_df.join(encoding) # 적용

vip_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 11076 entries, 0 to 12969
Data columns (total 24 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   CryoSleep      11076 non-null  bool   
 1   Cabin2         11076 non-null  float64
 2   Cabin3         11076 non-null  bool   
 3   Age            11076 non-null  float64
 4   VIP            11076 non-null  bool   
 5   RoomService    11076 non-null  float64
 6   FoodCourt      11076 non-null  float64
 7   ShoppingMall   11076 non-null  float64
 8   Spa            11076 non-null  float64
 9   VRDeck         11076 non-null  float64
 10  Earth          11076 non-null  uint8  
 11  Europa         11076 non-null  uint8  
 12  Mars           11076 non-null  uint8  
 13  A              11076 non-null  uint8  
 14  B              11076 non-null  uint8  
 15  C              11076 non-null  uint8  
 16  D              11076 non-null  uint8  
 17  E              11076 non-null  uint8  
 18  F     

In [118]:
# 스케일링을 위한 함수 생성
col = ['Cabin2', 'Age', 'RoomService','FoodCourt','ShoppingMall','Spa','VRDeck']
data_scaled(vip_df, col)

Unnamed: 0,CryoSleep,Cabin2,Cabin3,Age,VIP,RoomService,FoodCourt,ShoppingMall,Spa,VRDeck,...,B,C,D,E,F,G,T,55 Cancri e,PSO J318.5-22,TRAPPIST-1e
0,False,-1.171058,True,0.707877,False,-0.334616,-0.282674,-0.285975,-0.271173,-0.260034,...,1,0,0,0,0,0,0,0,0,1
1,False,-1.171058,False,-0.329018,False,-0.166861,-0.277057,-0.244125,0.217775,-0.220858,...,0,0,0,0,1,0,0,0,0,1
2,False,-1.171058,False,2.021278,True,-0.268437,1.949128,-0.285975,5.709312,-0.216406,...,0,0,0,0,0,0,0,0,0,1
3,False,-1.171058,False,0.293119,False,-0.334616,0.518053,0.335083,2.693687,-0.088194,...,0,0,0,0,0,0,0,0,0,1
4,False,-1.169112,False,-0.882029,False,0.131712,-0.238987,-0.033199,0.232025,-0.258253,...,0,0,0,0,1,0,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12962,True,1.737845,False,0.984383,False,-0.334616,-0.282674,-0.285975,-0.271173,-0.260034,...,0,0,0,0,0,1,0,0,0,1
12963,False,-0.630138,False,0.984383,False,-0.262281,-0.282674,6.160647,-0.271173,-0.260034,...,0,0,1,0,0,0,0,0,0,1
12964,False,2.323517,False,0.777004,False,-0.334616,0.257177,-0.285975,-0.268502,-0.260034,...,0,0,0,0,1,0,0,0,0,1
12965,True,1.739790,False,0.362246,False,-0.334616,-0.282674,-0.285975,-0.271173,-0.260034,...,0,0,0,0,0,1,0,0,0,1


## 데이터와 타겟값 분리

In [119]:
vip_data = vip_df.drop(['VIP'],axis=1)
vip_label = vip_df['VIP'].replace({True:1,False:0})

In [120]:
vip_data.head()

Unnamed: 0,CryoSleep,Cabin2,Cabin3,Age,RoomService,FoodCourt,ShoppingMall,Spa,VRDeck,Earth,...,B,C,D,E,F,G,T,55 Cancri e,PSO J318.5-22,TRAPPIST-1e
0,False,-1.171058,True,0.707877,-0.334616,-0.282674,-0.285975,-0.271173,-0.260034,0,...,1,0,0,0,0,0,0,0,0,1
1,False,-1.171058,False,-0.329018,-0.166861,-0.277057,-0.244125,0.217775,-0.220858,1,...,0,0,0,0,1,0,0,0,0,1
2,False,-1.171058,False,2.021278,-0.268437,1.949128,-0.285975,5.709312,-0.216406,0,...,0,0,0,0,0,0,0,0,0,1
3,False,-1.171058,False,0.293119,-0.334616,0.518053,0.335083,2.693687,-0.088194,0,...,0,0,0,0,0,0,0,0,0,1
4,False,-1.169112,False,-0.882029,0.131712,-0.238987,-0.033199,0.232025,-0.258253,1,...,0,0,0,0,1,0,0,0,0,1


In [121]:
vip_label.head()

0    0
1    0
2    1
3    0
4    0
Name: VIP, dtype: int64

## 오버샘플링

In [122]:
X_samp, y_samp = RandomOverSampler(random_state=109).fit_resample(vip_data, vip_label)

In [123]:
print(X_samp.shape)
print(y_samp.shape)

(21168, 23)
(21168,)


## train, test 데이터세트 분리

In [124]:
X_train,X_test,y_train,y_test = train_test_split(X_samp, y_samp, random_state=109)

## 모델링_XGBClassifier

In [125]:
# hyperopt 적용을 위한 설정 값 담기
xgb_search_space = {'max_depth': hp.quniform('max_depth', 5, 20, 1),
                    'min_child_weight': hp.quniform('min_child_weight', 1, 2, 1),
                    'learning_rate': hp.uniform('learning_rate', 0.01, 0.2),
                    'colsample_bytree': hp.uniform('colsample_bytree', 0.5, 1)
               }

In [126]:
# 실행을 위한 함수 정의
from xgboost import XGBClassifier
def objective_func(search_space):
    xgb_clf = XGBClassifier(n_estimators=100, max_depth=int(search_space['max_depth']),
                            min_child_weight=int(search_space['min_child_weight']),
                            learning_rate=search_space['learning_rate'],
                            colsample_bytree=search_space['colsample_bytree'], 
                            eval_metric='logloss')
    accuracy = cross_val_score(xgb_clf, X_train, y_train, scoring='accuracy', cv=3)
    return {'loss':-1 * np.mean(accuracy), 'status': STATUS_OK}

In [127]:
# 최적의 파라미터 찾기
from sklearn.model_selection import cross_val_score
trial_val = Trials()
best = fmin(fn=objective_func,
            space=xgb_search_space,
            algo=tpe.suggest,
            max_evals=50,
            trials=trial_val, rstate=np.random.default_rng(seed=9))
print('best:', best)

100%|████████████████████████████████████████████████| 50/50 [00:45<00:00,  1.10trial/s, best loss: -0.985575711766188]
best: {'colsample_bytree': 0.5978109120491624, 'learning_rate': 0.11202409007972883, 'max_depth': 20.0, 'min_child_weight': 1.0}


In [128]:
# hyperopt를 통해 얻은 최적의 파라미터를 이용한 XGBClassifier
xgb_model = XGBClassifier(n_estimators=100, learning_rate=round(best['learning_rate'], 5), 
                            max_depth=int(best['max_depth']), min_child_weight=int(best['min_child_weight']),
                            colsample_bytree=round(best['colsample_bytree'], 5)
                           )
xgb_model.fit(X_train, y_train)
pred= xgb_model.predict(X_test)

# 테스트 데이터 평가
print('정확도 : ', accuracy_score(y_test, pred))
print('혼돈행렬 : \n', confusion_matrix(y_test, pred))

정확도 :  0.9924414210128496
혼돈행렬 : 
 [[2620   40]
 [   0 2632]]


In [129]:
tr_pred = xgb_model.predict(X_train)
tr_proba = xgb_model.predict_proba(X_train)
t_pred= xgb_model.predict(X_test)
t_proba = xgb_model.predict_proba(X_test)

In [130]:
# 트레인 데이터 평가
pred= xgb_model.predict(X_train)
print('정확도 : ', accuracy_score(y_train, pred))
print('혼돈행렬 : \n', confusion_matrix(y_train, pred))

정확도 :  0.99949609473419
혼돈행렬 : 
 [[7916    8]
 [   0 7952]]


## 결측값 채우기

In [131]:
# VIP의 결측값만 비어있는 파일 불러오기
vip_nan_df = pd.read_excel('na_vip.xlsx')

In [132]:
# 이전 과정 반복 

vip_nan_df.drop(['PassengerId','Combi','Cabin','Name','Transported'],axis=1,inplace=True)
## Cabin3의 P(좌현)를 False으로 S(우현)를 True로 변경
vip_nan_df['Cabin3'].replace({'P': True,'S': False}, inplace=True)

## boolean으로 변환
vip_nan_df['CryoSleep'] = vip_nan_df['CryoSleep'].astype(bool)

vip_nan_df['Cabin3'] = vip_nan_df['Cabin3'].astype(bool)

## 변환 확인
vip_nan_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 296 entries, 0 to 295
Data columns (total 13 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   HomePlanet    296 non-null    object 
 1   CryoSleep     296 non-null    bool   
 2   Cabin1        287 non-null    object 
 3   Cabin2        287 non-null    float64
 4   Cabin3        296 non-null    bool   
 5   Destination   289 non-null    object 
 6   Age           287 non-null    float64
 7   VIP           0 non-null      float64
 8   RoomService   291 non-null    float64
 9   FoodCourt     293 non-null    float64
 10  ShoppingMall  292 non-null    float64
 11  Spa           293 non-null    float64
 12  VRDeck        294 non-null    float64
dtypes: bool(2), float64(8), object(3)
memory usage: 26.1+ KB


In [133]:
## object 타입의 데이터들 더미화
train_encoding = pd.get_dummies(vip_nan_df['HomePlanet'])
vip_nan_df=vip_nan_df.drop('HomePlanet',axis=1)
vip_nan_df = vip_nan_df.join(train_encoding)

train_encoding = pd.get_dummies(vip_nan_df['Cabin1'])
## 기존의 팀명 컬러 삭제
vip_nan_df=vip_nan_df.drop('Cabin1',axis=1)
vip_nan_df = vip_nan_df.join(train_encoding)


train_encoding = pd.get_dummies(vip_nan_df['Destination'])
## 기존의 팀명 컬러 삭제
vip_nan_df = vip_nan_df.drop('Destination',axis=1)
vip_nan_df = vip_nan_df.join(train_encoding)

col = ['Cabin2', 'Age', 'RoomService','FoodCourt','ShoppingMall','Spa','VRDeck']
data_scaled(vip_nan_df, col)

Unnamed: 0,CryoSleep,Cabin2,Cabin3,Age,VIP,RoomService,FoodCourt,ShoppingMall,Spa,VRDeck,...,B,C,D,E,F,G,T,55 Cancri e,PSO J318.5-22,TRAPPIST-1e
0,False,-1.167170,False,-0.953023,,-0.302677,-0.030337,-0.220319,-0.286713,0.022392,...,0,0,0,0,1,0,0,1,0,0
1,False,-1.146095,False,-1.970321,,-0.302677,-0.225656,-0.363966,-0.302602,-0.250833,...,0,0,0,0,0,1,0,0,0,1
2,True,-1.111608,True,0.403375,,-0.302677,-0.225656,-0.363966,-0.302602,-0.250833,...,0,0,0,0,1,0,0,0,0,1
3,True,-1.092449,False,-1.020843,,-0.302677,-0.225656,-0.363966,-0.302602,-0.250833,...,0,0,0,0,1,0,0,1,0,0
4,False,-1.136515,True,,,-0.247317,-0.165313,0.180697,-0.301808,1.115295,...,0,0,1,0,0,0,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
291,False,-0.550247,False,0.199915,,-0.269203,0.452404,-0.363966,2.869616,0.076901,...,0,1,0,0,0,0,0,0,0,1
292,True,-0.619219,True,0.674654,,-0.302677,-0.225656,-0.363966,-0.302602,-0.250833,...,0,1,0,0,0,0,0,0,0,1
293,False,,True,0.606834,,0.706693,-0.225656,-0.238275,-0.272413,-0.244020,...,0,0,0,0,0,0,0,0,0,1
294,False,2.432825,True,-0.546103,,0.521298,-0.225656,-0.301120,-0.302602,-0.199050,...,0,0,0,0,1,0,0,0,0,1


In [134]:
vip_nan_data = vip_nan_df.drop(['VIP'],axis=1)
vip_nan_label = vip_nan_df.VIP

pred= xgb_model.predict(vip_nan_data)
pred = pd.DataFrame(pred)
pred.replace({0:False,1:True},inplace=True)
pred

Unnamed: 0,0
0,False
1,False
2,False
3,False
4,False
...,...
291,False
292,False
293,False
294,False


In [135]:
vip_nan_df.drop('VIP',axis=1,inplace=True)
vip_nan_df.insert(23,'VIP',pred)
vip_nan_df

Unnamed: 0,CryoSleep,Cabin2,Cabin3,Age,RoomService,FoodCourt,ShoppingMall,Spa,VRDeck,Earth,...,C,D,E,F,G,T,55 Cancri e,PSO J318.5-22,TRAPPIST-1e,VIP
0,False,-1.167170,False,-0.953023,-0.302677,-0.030337,-0.220319,-0.286713,0.022392,1,...,0,0,0,1,0,0,1,0,0,False
1,False,-1.146095,False,-1.970321,-0.302677,-0.225656,-0.363966,-0.302602,-0.250833,1,...,0,0,0,0,1,0,0,0,1,False
2,True,-1.111608,True,0.403375,-0.302677,-0.225656,-0.363966,-0.302602,-0.250833,0,...,0,0,0,1,0,0,0,0,1,False
3,True,-1.092449,False,-1.020843,-0.302677,-0.225656,-0.363966,-0.302602,-0.250833,0,...,0,0,0,1,0,0,1,0,0,False
4,False,-1.136515,True,,-0.247317,-0.165313,0.180697,-0.301808,1.115295,0,...,0,1,0,0,0,0,0,0,1,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
291,False,-0.550247,False,0.199915,-0.269203,0.452404,-0.363966,2.869616,0.076901,0,...,1,0,0,0,0,0,0,0,1,False
292,True,-0.619219,True,0.674654,-0.302677,-0.225656,-0.363966,-0.302602,-0.250833,0,...,1,0,0,0,0,0,0,0,1,False
293,False,,True,0.606834,0.706693,-0.225656,-0.238275,-0.272413,-0.244020,0,...,0,0,0,0,0,0,0,0,1,False
294,False,2.432825,True,-0.546103,0.521298,-0.225656,-0.301120,-0.302602,-0.199050,1,...,0,0,0,1,0,0,0,0,1,False


In [136]:
vip_nan_df.VIP.value_counts()

False    280
True      16
Name: VIP, dtype: int64

# Age 결측값 처리

- 중앙값인 27로 채워주었다

# 돈과 관련된 피처들 결측값 처리
- (RoomService, ShoppingMall, Spa, VRDeck, FoodCourt) 

- 중앙값인 0으로 채워주었다