In [1]:
import pandas as pd
import numpy as np

In [2]:
churn = pd.read_csv('data/churn_ver02.csv')
churn.head()

Unnamed: 0,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,619,0,42,2,0.0,1,1,1,101348.88,1
1,608,0,41,1,83807.86,1,0,1,112542.58,0
2,502,0,42,8,159660.8,3,1,0,113931.57,1
3,699,0,39,1,0.0,2,0,0,93826.63,0
4,850,0,43,2,125510.82,1,1,1,79084.1,0


In [3]:
from sklearn.model_selection import train_test_split
np.random.seed(123)
target = churn.Exited
churn = churn.drop(columns='Exited')
x_train, x_test, y_train, y_test = train_test_split(churn, target, test_size=0.3,
                                                   stratify=target)

In [9]:
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier

In [10]:
from sklearn.pipeline import Pipeline

In [11]:
pipe = Pipeline([('전처리',None), ('알고리즘',None)])

In [12]:
param = [
    {'전처리':[StandardScaler()],
     '알고리즘':[LogisticRegression()],
     '알고리즘__C': [0.01, 0.1, 1, 10]},
    {'전처리':[StandardScaler(), None],
     '알고리즘':[KNeighborsClassifier()],
     '알고리즘__n_neighbors':[10,20,30,40,50,100,200]},
    {'전처리':[MinMaxScaler(), None],
     '알고리즘':[DecisionTreeClassifier()],
     '알고리즘__max_depth':np.arange(2,11)}]

In [13]:
from sklearn.model_selection import GridSearchCV
grid = GridSearchCV(pipe, param, scoring='roc_auc', cv=4, n_jobs=-1, verbose=1)
grid.fit(x_train, y_train)

Fitting 4 folds for each of 36 candidates, totalling 144 fits


GridSearchCV(cv=4, estimator=Pipeline(steps=[('전처리', None), ('알고리즘', None)]),
             n_jobs=-1,
             param_grid=[{'알고리즘': [LogisticRegression()],
                          '알고리즘__C': [0.01, 0.1, 1, 10],
                          '전처리': [StandardScaler()]},
                         {'알고리즘': [KNeighborsClassifier()],
                          '알고리즘__n_neighbors': [10, 20, 30, 40, 50, 100, 200],
                          '전처리': [StandardScaler(), None]},
                         {'알고리즘': [DecisionTreeClassifier(max_depth=5)],
                          '알고리즘__max_depth': array([ 2,  3,  4,  5,  6,  7,  8,  9, 10]),
                          '전처리': [MinMaxScaler(), None]}],
             scoring='roc_auc', verbose=1)

In [14]:
grid.best_params_

{'알고리즘': DecisionTreeClassifier(max_depth=5),
 '알고리즘__max_depth': 5,
 '전처리': MinMaxScaler()}

In [18]:
grid.best_estimator_

Pipeline(steps=[('전처리', MinMaxScaler()),
                ('알고리즘', DecisionTreeClassifier(max_depth=5))])

In [19]:
from sklearn.metrics import roc_auc_score
pred = grid.predict_proba(x_test)[:,1]
roc_auc_score(y_test, pred)

0.8371987950775478