In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score

from sklearn.preprocessing import StandardScaler

In [2]:
from imblearn.over_sampling import SMOTE

In [3]:
df = pd.read_csv('./data/creditcard.csv')
df.head()

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
0,0.0,-1.359807,-0.072781,2.536347,1.378155,-0.338321,0.462388,0.239599,0.098698,0.363787,...,-0.018307,0.277838,-0.110474,0.066928,0.128539,-0.189115,0.133558,-0.021053,149.62,0
1,0.0,1.191857,0.266151,0.16648,0.448154,0.060018,-0.082361,-0.078803,0.085102,-0.255425,...,-0.225775,-0.638672,0.101288,-0.339846,0.16717,0.125895,-0.008983,0.014724,2.69,0
2,1.0,-1.358354,-1.340163,1.773209,0.37978,-0.503198,1.800499,0.791461,0.247676,-1.514654,...,0.247998,0.771679,0.909412,-0.689281,-0.327642,-0.139097,-0.055353,-0.059752,378.66,0
3,1.0,-0.966272,-0.185226,1.792993,-0.863291,-0.010309,1.247203,0.237609,0.377436,-1.387024,...,-0.1083,0.005274,-0.190321,-1.175575,0.647376,-0.221929,0.062723,0.061458,123.5,0
4,2.0,-1.158233,0.877737,1.548718,0.403034,-0.407193,0.095921,0.592941,-0.270533,0.817739,...,-0.009431,0.798278,-0.137458,0.141267,-0.20601,0.502292,0.219422,0.215153,69.99,0


# 전처리

- outlier 제거 및 SMOTE로 비대칭 데이터 oversampling
- 그 외 전과 동일하게 진행

In [4]:
def drop_outlier(data):
    for column in data.columns[:-1]: #target column 제외
        q25, q75 = np.quantile(data[column], 0.25), np.quantile(data[column], 0.75)
        iqr = q75 - q25
        lower, upper = q25 - iqr*1.5, q75 + iqr*1.5
        
        df_no = data[data[column] > lower]
        df_no = df_no[df_no[column] < upper]
        df_no.reset_index(drop=True)
        
    return df_no

In [5]:
df_wo = drop_outlier(df)
df_wo.head()

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
0,0.0,-1.359807,-0.072781,2.536347,1.378155,-0.338321,0.462388,0.239599,0.098698,0.363787,...,-0.018307,0.277838,-0.110474,0.066928,0.128539,-0.189115,0.133558,-0.021053,149.62,0
1,0.0,1.191857,0.266151,0.16648,0.448154,0.060018,-0.082361,-0.078803,0.085102,-0.255425,...,-0.225775,-0.638672,0.101288,-0.339846,0.16717,0.125895,-0.008983,0.014724,2.69,0
3,1.0,-0.966272,-0.185226,1.792993,-0.863291,-0.010309,1.247203,0.237609,0.377436,-1.387024,...,-0.1083,0.005274,-0.190321,-1.175575,0.647376,-0.221929,0.062723,0.061458,123.5,0
4,2.0,-1.158233,0.877737,1.548718,0.403034,-0.407193,0.095921,0.592941,-0.270533,0.817739,...,-0.009431,0.798278,-0.137458,0.141267,-0.20601,0.502292,0.219422,0.215153,69.99,0
5,2.0,-0.425966,0.960523,1.141109,-0.168252,0.420987,-0.029728,0.476201,0.260314,-0.568671,...,-0.208254,-0.559825,-0.026398,-0.371427,-0.232794,0.105915,0.253844,0.08108,3.67,0


In [6]:
df_wo.describe()

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
count,252903.0,252903.0,252903.0,252903.0,252903.0,252903.0,252903.0,252903.0,252903.0,252903.0,...,252903.0,252903.0,252903.0,252903.0,252903.0,252903.0,252903.0,252903.0,252903.0,252903.0
mean,94986.492228,0.094504,0.230435,0.060689,-0.01903,0.116567,-0.059286,-0.083767,0.024857,0.008731,...,-0.018557,0.002675,0.006637,0.001982,0.002301,-0.001892,0.002238,-0.001567,34.974887,0.001586
std,47594.978003,1.820271,1.293522,1.471043,1.393801,1.214682,1.264431,1.038737,1.156919,1.081381,...,0.705072,0.714132,0.433929,0.602656,0.505696,0.469089,0.371147,0.266247,42.17225,0.039788
min,0.0,-40.470142,-47.429676,-33.680984,-5.683171,-23.669726,-23.496714,-43.557242,-41.484823,-13.434066,...,-22.797604,-8.887017,-36.666,-2.836627,-7.495741,-1.732008,-9.895244,-8.478686,0.0,0.0
25%,54533.5,-0.859648,-0.397907,-0.816557,-0.850251,-0.567459,-0.793053,-0.576955,-0.196253,-0.615814,...,-0.237517,-0.544744,-0.141823,-0.35517,-0.314581,-0.322206,-0.063949,-0.054103,4.49,0.0
50%,85041.0,0.035238,0.165842,0.225603,-0.024476,0.021158,-0.318741,0.016034,0.029424,-0.045243,...,-0.047347,0.005992,-0.005033,0.040299,0.012927,-0.041085,0.006947,0.008817,16.0,0.0
75%,139586.0,1.356675,0.878353,1.067617,0.717123,0.666514,0.311231,0.515631,0.335343,0.583818,...,0.158554,0.52877,0.141507,0.439066,0.349265,0.229586,0.096233,0.072347,49.99,0.0
max,172788.0,2.45493,22.057729,4.226108,12.114672,34.099309,8.933762,6.491054,20.007208,10.392889,...,27.202839,8.361985,22.083545,4.022866,7.519589,3.463246,8.254376,22.620072,184.5,1.0


## smote는 데이터 간 거리에 영향을 받으므로 scaling 후 진행

In [7]:
df_shuffled=df_wo.sample(frac=1).reset_index(drop=True)
df_shuffled.head()

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
0,60795.0,-0.095796,-0.468491,1.286204,-2.247276,-1.383614,-1.371909,-0.348979,-0.033371,-1.788838,...,-0.161878,-0.14821,0.103647,0.605609,-0.32043,-0.419694,0.288428,0.102088,20.0,0
1,80161.0,0.954676,-0.380701,1.105345,1.49178,-1.124356,-0.239813,-0.468562,0.08542,0.826036,...,0.180608,0.407974,-0.174698,0.39786,0.43855,-0.251751,0.041837,0.053328,115.0,0
2,66910.0,-1.839533,2.513215,-0.392007,-0.298029,-0.137494,1.631292,-2.507942,-7.800496,0.139315,...,7.311845,-3.637817,0.876874,-1.020208,1.150886,0.282829,0.326039,0.166255,5.99,0
3,91041.0,0.003097,0.551709,1.374499,-0.954777,0.540655,0.020932,0.473265,-0.221213,2.004189,...,-0.438012,-0.781667,-0.201302,-1.097818,-0.150994,-0.27614,-0.162938,-0.222145,0.77,0
4,151241.0,-1.363889,-0.040194,0.775424,0.645808,1.766148,-1.524398,0.212912,-0.106101,-0.474329,...,0.096298,0.168467,-0.3211,0.012715,0.824791,0.937049,-0.024933,0.109263,19.99,0


In [8]:
df_x = df_shuffled[df_shuffled.columns[:-1]]
df_y = df_shuffled[df_shuffled.columns[-1]]

scaler = StandardScaler()
df_x_scale = scaler.fit_transform(df_x)

In [9]:
sm = SMOTE(random_state=42, sampling_strategy=0.2)

X_resampled, y_resampled = sm.fit_sample(df_x_scale,df_y)

In [10]:
df_reX = pd.DataFrame(X_resampled)
df_rey = pd.DataFrame(y_resampled)

df_reX.columns = df_x.columns
df_reX.describe()

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V20,V21,V22,V23,V24,V25,V26,V27,V28,Amount
count,303002.0,303002.0,303002.0,303002.0,303002.0,303002.0,303002.0,303002.0,303002.0,303002.0,...,303002.0,303002.0,303002.0,303002.0,303002.0,303002.0,303002.0,303002.0,303002.0,303002.0
mean,-0.060613,-0.450002,0.509864,-0.839135,0.562185,-0.449934,-0.200287,-0.961667,0.097017,-0.420473,...,0.141059,0.175476,-0.01007,-0.001434,-0.033062,0.039741,0.019413,0.06114,0.077714,-0.025613
std,1.002724,2.112843,1.955496,2.934683,1.7635,2.309104,1.156165,3.694563,2.647675,1.608427,...,1.346004,2.523977,1.256107,1.377183,0.968242,1.080361,0.977336,1.786865,1.205663,0.9925
min,-1.995729,-22.284996,-36.845292,-22.937283,-4.063817,-19.582355,-18.535978,-41.852313,-35.879573,-12.431161,...,-44.518695,-32.307486,-12.448277,-84.51314,-4.710172,-14.827201,-3.688252,-26.667342,-31.83942,-0.829336
25%,-0.915277,-0.6813,-0.389287,-1.056675,-0.492113,-0.726214,-0.741094,-0.795165,-0.18262,-0.905004,...,-0.294461,-0.291979,-0.759937,-0.386012,-0.612967,-0.609794,-0.64434,-0.172013,-0.191115,-0.759384
50%,-0.25488,-0.163757,0.115456,-0.100412,0.197343,-0.142846,-0.306658,-0.030884,0.03511,-0.18525,...,-0.003593,0.014567,0.00686,-0.052542,0.040476,0.062667,-0.054462,0.043297,0.063287,-0.473651
75%,0.895065,0.639431,0.760838,0.572394,0.957182,0.431672,0.203308,0.487312,0.384025,0.420238,...,0.401329,0.369,0.7314,0.308993,0.671949,0.724722,0.540991,0.448755,0.414986,0.355273
max,1.634661,1.296747,16.874343,2.831614,8.705494,27.976707,7.112339,6.329641,17.272074,9.602699,...,21.725369,38.608061,11.705581,50.876866,6.67195,14.865259,7.386958,22.234197,84.965179,3.545588


In [11]:
df_rey.value_counts()

Class
0        252502
1         50500
dtype: int64

In [12]:
x_train, x_test, y_train, y_test = train_test_split(df_reX, df_rey, test_size=0.1, random_state=42)

# 모델 학습

- logstic regresstion with penalty
- decision tree
- random forest
- xgboost
- lightgbmboost
- catboost

## logsitic regression

In [13]:
from sklearn.linear_model import LogisticRegression

log_reg = LogisticRegression(random_state=42)

param_grid = [{'penalty' : ['none', 'l2']},
              {'penalty' : ['elasticnet'], 'l1_ratio' : [0.5,0.25,0.75], 'solver' : ['saga']},
              {'penalty' : ['l1'], 'solver' : ['saga']}]

cross_validation = StratifiedKFold(n_splits=5)

log_grid = GridSearchCV(log_reg, param_grid, cv=cross_validation, scoring='accuracy')
log_grid.fit(x_train, y_train)

  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)


  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)


GridSearchCV(cv=StratifiedKFold(n_splits=5, random_state=None, shuffle=False),
             estimator=LogisticRegression(random_state=42),
             param_grid=[{'penalty': ['none', 'l2']},
                         {'l1_ratio': [0.5, 0.25, 0.75],
                          'penalty': ['elasticnet'], 'solver': ['saga']},
                         {'penalty': ['l1'], 'solver': ['saga']}],
             scoring='accuracy')

In [14]:
log_result = pd.DataFrame(log_grid.cv_results_)
log_result

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_penalty,param_l1_ratio,param_solver,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,1.488458,0.109012,0.008678,0.000398,none,,,{'penalty': 'none'},0.979447,0.979483,0.978878,0.979611,0.979556,0.979395,0.000265,1
1,1.486793,0.109641,0.008178,0.001163,l2,,,{'penalty': 'l2'},0.979447,0.979483,0.978878,0.979611,0.979556,0.979395,0.000265,1
2,15.544097,1.046063,0.008158,0.000399,elasticnet,0.5,saga,"{'l1_ratio': 0.5, 'penalty': 'elasticnet', 'so...",0.979447,0.979483,0.978878,0.979611,0.97952,0.979388,0.000261,3
3,15.131169,0.685021,0.008174,0.000401,elasticnet,0.25,saga,"{'l1_ratio': 0.25, 'penalty': 'elasticnet', 's...",0.979447,0.979483,0.978878,0.979611,0.97952,0.979388,0.000261,3
4,14.969314,0.900835,0.008358,0.000502,elasticnet,0.75,saga,"{'l1_ratio': 0.75, 'penalty': 'elasticnet', 's...",0.979447,0.979483,0.978878,0.979611,0.97952,0.979388,0.000261,3
5,14.98292,0.703653,0.008367,0.000803,l1,,saga,"{'penalty': 'l1', 'solver': 'saga'}",0.979447,0.979483,0.978878,0.979611,0.979501,0.979384,0.000259,6


In [15]:
log_grid.best_params_

{'penalty': 'none'}

In [16]:
log_model = log_grid.best_estimator_
log_y_pred = log_model.predict(x_test)

log_cm = confusion_matrix(y_test, log_y_pred)
log_cm

array([[25208,   116],
       [  452,  4525]], dtype=int64)

## decision tree

In [17]:
from sklearn.tree import DecisionTreeClassifier

tree = DecisionTreeClassifier(random_state=42)

parameter_grid = {'max_depth': [5, 10, 20],
                  'max_features': [1, 5, 10, 20, 25]}

cross_validation = StratifiedKFold(n_splits=5)

tree_grid = GridSearchCV(tree, param_grid = parameter_grid,
                          cv = cross_validation)

tree_grid.fit(x_train, y_train)

GridSearchCV(cv=StratifiedKFold(n_splits=5, random_state=None, shuffle=False),
             estimator=DecisionTreeClassifier(random_state=42),
             param_grid={'max_depth': [5, 10, 20],
                         'max_features': [1, 5, 10, 20, 25]})

In [18]:
tree_result = pd.DataFrame(tree_grid.cv_results_)
tree_result

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_max_depth,param_max_features,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.236767,0.00975,0.011162,0.000374,5,1,"{'max_depth': 5, 'max_features': 1}",0.931043,0.93216,0.93205,0.93194,0.931481,0.931735,0.000416,15
1,0.824234,0.015779,0.010977,1.2e-05,5,5,"{'max_depth': 5, 'max_features': 5}",0.976183,0.976091,0.975578,0.976879,0.976659,0.976278,0.000456,13
2,1.616231,0.049762,0.01147,0.000619,5,10,"{'max_depth': 5, 'max_features': 10}",0.980363,0.977118,0.977924,0.980785,0.977998,0.978838,0.001457,12
3,3.113654,0.051217,0.010972,0.000631,5,20,"{'max_depth': 5, 'max_features': 20}",0.980528,0.98051,0.979373,0.98029,0.979721,0.980084,0.00046,11
4,3.9374,0.04324,0.010775,0.000756,5,25,"{'max_depth': 5, 'max_features': 25}",0.980217,0.979611,0.980198,0.980986,0.98062,0.980326,0.000461,10
5,0.385926,0.01199,0.011767,0.000394,10,1,"{'max_depth': 10, 'max_features': 1}",0.973451,0.976348,0.964081,0.975358,0.977888,0.973425,0.004889,14
6,1.458227,0.021098,0.012375,0.001017,10,5,"{'max_depth': 10, 'max_features': 5}",0.983719,0.984507,0.982893,0.981426,0.98216,0.982941,0.001092,9
7,3.025437,0.105551,0.01157,0.000489,10,10,"{'max_depth': 10, 'max_features': 10}",0.986891,0.988944,0.987495,0.987789,0.987349,0.987693,0.000689,8
8,5.809574,0.118077,0.011562,0.001015,10,20,"{'max_depth': 10, 'max_features': 20}",0.989604,0.990099,0.988577,0.989604,0.98766,0.989109,0.000878,7
9,7.294395,0.117064,0.01163,0.000759,10,25,"{'max_depth': 10, 'max_features': 25}",0.990759,0.989256,0.989384,0.990429,0.990337,0.990033,0.0006,6


In [19]:
tree_grid.best_params_

{'max_depth': 20, 'max_features': 10}

In [20]:
tree_model = tree_grid.best_estimator_
tree_y_pred = tree_model.predict(x_test)

tree_cm = confusion_matrix(y_test, tree_y_pred)
tree_cm

array([[25258,    66],
       [   24,  4953]], dtype=int64)

## random forest

In [21]:
from sklearn.ensemble import RandomForestClassifier

forest = RandomForestClassifier(random_state=42, n_estimators = 100, max_depth = 5, max_features=20)
forest.fit(x_train, y_train)

  forest.fit(x_train, y_train)


RandomForestClassifier(max_depth=5, max_features=20, random_state=42)

In [22]:
forest_y_pred = forest.predict(x_test)

forest_cm = confusion_matrix(y_test, forest_y_pred)
forest_cm

array([[25284,    40],
       [  474,  4503]], dtype=int64)

## xgboost

In [23]:
import xgboost as xgb

xgbc = xgb.XGBClassifier(random_state=42, n_estimators = 100, max_depth = 5)
xgbc.fit(x_train, y_train)
xg_y_pred = xgbc.predict(x_test)

xgbc_cm = confusion_matrix(y_test, xg_y_pred)
xgbc_cm

  return f(*args, **kwargs)




array([[25315,     9],
       [    0,  4977]], dtype=int64)

## lightgbmboost

In [24]:
from lightgbm import LGBMClassifier

lgbm = LGBMClassifier(random_state=42)
lgbm.fit(x_train, y_train)
lgbm_y_pred = lgbm.predict(x_test)

lgbm_cm = confusion_matrix(y_test, lgbm_y_pred)
lgbm_cm

  return f(*args, **kwargs)


array([[25312,    12],
       [   10,  4967]], dtype=int64)

## catboost

In [25]:
from catboost import CatBoostClassifier

cat = CatBoostClassifier(verbose=0, n_estimators=100)
cat.fit(x_train, y_train)
cat_y_pred = cat.predict(x_test)

cat_cm = confusion_matrix(y_test, cat_y_pred)
cat_cm

array([[25311,    13],
       [    4,  4973]], dtype=int64)

## score

In [26]:
def evaluation_score(cm):
    accuracy = (cm[0,0] + cm[1,1]) / (cm[0,0]+cm[1,0]+cm[0,1]+cm[1,1])
    error_rate = 1-accuracy
    specificity = cm[1,1] / (cm[0,1] + cm[1,1])
    recall = cm[0,0] / (cm[0,0] + cm[1,0])
    precision = cm[0,0] / (cm[0,0] + cm[0,1])
    f1_score = 2 * (precision*recall) / (precision+recall)
    
    score_array = np.array([round(accuracy,6), round(error_rate,6), round(specificity,6), round(recall,6), round(precision,6), round(f1_score,6)])
    
    return score_array

In [27]:
log_score = evaluation_score(log_cm)
tree_score = evaluation_score(tree_cm)
forest_score = evaluation_score(forest_cm)
xgb_score = evaluation_score(xgbc_cm)
lgbm_score = evaluation_score(lgbm_cm)
cat_score = evaluation_score(cat_cm)

In [28]:
df_score = pd.DataFrame([log_score, tree_score, forest_score, xgb_score, lgbm_score, cat_score], 
                        columns = ['accuracy', 'error_rate', 'specificity', 'recall', 'precision', 'f1_score'],
                       index = ['log', 'tree', 'forest', 'xgb', 'lbgm', 'cat'])
df_score

Unnamed: 0,accuracy,error_rate,specificity,recall,precision,f1_score
log,0.981255,0.018745,0.975005,0.982385,0.995419,0.988859
tree,0.99703,0.00297,0.98685,0.999051,0.997394,0.998222
forest,0.983037,0.016963,0.991195,0.981598,0.99842,0.989938
xgb,0.999703,0.000297,0.998195,1.0,0.999645,0.999822
lbgm,0.999274,0.000726,0.99759,0.999605,0.999526,0.999566
cat,0.999439,0.000561,0.997393,0.999842,0.999487,0.999664


## stacking with logistic
기존 logistic model을 제외한 5가지 model 이용

In [31]:
new_data = np.array([tree_y_pred, forest_y_pred, xg_y_pred, lgbm_y_pred, cat_y_pred])
new_data = np.transpose(new_data)
new_data.shape

(30301, 5)

In [32]:
log_stack = LogisticRegression(random_state=42)
log_stack.fit(new_data, y_test)
stack_y_pred = log_stack.predict(new_data)

stack_cm = confusion_matrix(y_test, stack_y_pred)
stack_score = evaluation_score(stack_cm)
stack_cm

  return f(*args, **kwargs)


array([[25314,    10],
       [    4,  4973]], dtype=int64)

## stacking with xgboost
기존 xgboost model을 제외한 5가지 model 이용

In [33]:
new_data2 = np.transpose(np.array([log_y_pred, tree_y_pred, forest_y_pred, lgbm_y_pred, cat_y_pred]))
new_data2.shape

(30301, 5)

In [36]:
xg_stack = xgb.XGBClassifier(random_state=42, n_estimators = 100, max_depth = 5)
xg_stack.fit(new_data2, y_test)
stack_y_pred2 = xg_stack.predict(new_data2)

stack_xg_cm = confusion_matrix(y_test, stack_y_pred2)
stack_xg_score = evaluation_score(stack_xg_cm)
stack_xg_cm



array([[25316,     8],
       [    4,  4973]], dtype=int64)

In [37]:
df_score_stack = pd.DataFrame([log_score, tree_score, forest_score, xgb_score, lgbm_score, cat_score, stack_score, stack_xg_score], 
                        columns = ['accuracy', 'error_rate', 'specificity', 'recall', 'precision', 'f1_score'],
                       index = ['log', 'tree', 'forest', 'xgb', 'lbgm', 'cat', 'stack_log', 'stack_xg'])
df_score_stack

Unnamed: 0,accuracy,error_rate,specificity,recall,precision,f1_score
log,0.981255,0.018745,0.975005,0.982385,0.995419,0.988859
tree,0.99703,0.00297,0.98685,0.999051,0.997394,0.998222
forest,0.983037,0.016963,0.991195,0.981598,0.99842,0.989938
xgb,0.999703,0.000297,0.998195,1.0,0.999645,0.999822
lbgm,0.999274,0.000726,0.99759,0.999605,0.999526,0.999566
cat,0.999439,0.000561,0.997393,0.999842,0.999487,0.999664
stack_log,0.999538,0.000462,0.997993,0.999842,0.999605,0.999724
stack_xg,0.999604,0.000396,0.998394,0.999842,0.999684,0.999763


기존 model 중 가장 성능이 좋았던 xgboost model과 stack_log model을 비교했을 때는 모든 성능지표가 떨어졌다.  
하지만 stack_xg model과 비교했을 때에는 specificity, precision이 더 좋아졌으며 나머지 성능지표는 xgboost model이 더 좋았다.  
이는 평가지표의 기준에 따라 model selection에 영향을 줄 것으로 보인다.