# Classification 종합실습

 ## 신용대출 심사

* 고객사는 ## 은행입니다. 신용평가 업무를 인공지능으로 전환하고자 여러분에게 모델링을 의뢰하였습니다.
* 대출업무는
    * 은행 창구에서 신청을 받고
    * 본사의 심사부서에서는 신용평가를 통해 대출 신청에 대한 승인 여부를 결정해 왔습니다.

* 현장의 요구
    * 경쟁사의 공격적인 대출상품 판매로, 본사에서는 자사 은행의 대출 실적이 줄어들고 있는 것에 부담을 느끼고 있습니다.
    * 그런데, 자사 은행에서는 신용평가 결과의 정확성에 의문을 품고 있으며, 신용평가 기준을 완화하여 가급적 대출승인 범위를 더 확대해 주기를 요구합니다. 

* 신용평가 업무를 인공지능으로 전환
    * 현장의 요구를 감안하여, 과거 사람이 하던 평가방식을 개선하고자 인공지능에 의한 예측 모델을 만들고, 정확도를 높이고자 합니다.
    * 최적의 모델을 생성해 봅시다.


## 1.환경준비

### (1) import

In [1]:
#라이브러리들을 불러오자.
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# 전처리
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

# 모델링
from sklearn.linear_model import LogisticRegression #로지스틱 회귀
from sklearn.tree import DecisionTreeClassifier, plot_tree #의사결정나무
from sklearn.neighbors import KNeighborsClassifier #KNN
from sklearn.svm import SVC #SVM Classifier
from sklearn.metrics import *  #모델 평가
from sklearn.model_selection import RandomizedSearchCV, GridSearchCV #랜덤서치, 그리드 서치

import warnings    # 경고메시지 제외
warnings.filterwarnings(action='ignore')

### (2) 데이터 준비

- 1. 학자금 대출(학비) 관련 신용대출 데이터
    - 행의 의미: 신용대출 신청건 하나 (한 사람 한 사람)


In [2]:
path = 'https://raw.githubusercontent.com/DA4BAM/dataset/master/credit_all.csv'
data = pd.read_csv(path)
data.loc[data['Payment'] == 4, 'Payment'] = 3
data.head()

Unnamed: 0,Creditability,AccountBalance,Duration,Payment,Purpose,CreditAmount,Employment,SexMarital,CurrentAddress,MostValuableAsset,Age,Apartment,CreditCount,Occupation,Dependents,Telephone,ForeignWorker
0,1,3,24,2,0,1249,2,4,2,1,28,2,1,3,1,1,1
1,1,2,9,2,0,276,3,4,4,1,22,1,1,2,1,1,1
2,1,1,18,3,2,1049,2,2,4,2,21,1,1,3,1,1,1
3,1,1,24,3,1,6419,5,2,4,4,44,3,2,4,2,2,1
4,1,3,12,2,2,1424,5,2,4,1,55,2,1,4,1,2,1


|	칼럼명	|	설명	|	 	|	값 의미	|
|	-----	|	-----	|	-----	|	-----	|
|	Creditability	|	Creditability(Target)	|	향후 신용도	|	0 : Bad, 1 : Good	|
|	AccountBalance	|	Account Balance	|	은행잔고	|	1: No account, 2 : None (No balance), 3 : Some Balance	|
|	CreditDuration	|	Duration of Credit (month)	|	신청한 대출기간(월)	|	숫자	|
|	Payment	|	Payment Status of Previous Credit	|	과거 대출 납입 상태	|	0 : 연체, 1 : 기타신용, 2 : 완납, 3 : 정상 대출상환 중 |
|	Purpose	|	Purpose	|	신청한 대출목적	|	1 : New Car , 2 : Used Car , 3 : Furniture , 4 : TV , 5 : Appliances , 6 : Repair , 8 : Vacation , 9 :Retraining , 10 : Business , 0 : Other	|
|	CreditAmount	|	Credit Amount($)	|	신청한 대출금액	|		|
|	Employment	|	Length of current employment(Month)	|	현 직업 근무 기간	|	1: Unemployed,  2: <1 Year,  3: [1, 4),  4: [4, 7),  5: Above 7	|
|	SexMarital	|	Sex & Marital Status	|	성별 & 결혼상태	|	1: Male, Divorced, 2: Male, Single , 3: Male, Married/Widowed , 4: Female	|
|	CurrentAddress	|	Duration in Current address	|	현 거주지 거주기간	|	1: <1 Year , 2: [1, 4) , 3: [4, 7) , 4: Above 7	|
|	MostValuable	|	Most valuable available asset	|	가장 가치있는 자산	|	1: None , 2: Car , 3: Life Insurance , 4: Real Estate	|
|	Age	|	Age (years)	|	나이	|		|
|	AppartmentType	|	Type of apartment	|	주거환경	|	1: free apartment, 2: Rented, 3: Owned	|
|	NoCredits	|	No of Credits at this Bank	|	현재 총 대출 건수	|	1 : one, 2 : 2 ~ 3, 3 : 4 ~ 5, 4 : 6 ~	|
|	Occupation	|	Occupation	|	직업	|	1: Unemployed, unskilled, 2: Unskilled Permanent Resident, 3: Skilled, 4: Executive	|
|	Telephone	|	Telephone	|	전화기 소유 여부	|	2: Yes , 1: No	|
|	ForeignWorker	|	Foreign Worker	|	외국인 근로자 여부	|	2: Yes , 1: No	|


## 2.데이터 준비

### (1) 데이터 정리

### (2) 데이터분할1 : x, y 나누기

In [3]:
target = 'Creditability'
x = data.drop(target, axis = 1)
y = data.loc[:, target]

### (3) NA 조치

### (4) 가변수화

In [4]:
dummy_vars = ['Employment', 'CurrentAddress', 'CreditCount', 'Dependents', 'Telephone', 'AccountBalance', 'Payment', 'Purpose', 'SexMarital', 'MostValuableAsset', 'Apartment','Occupation','ForeignWorker']
x = pd.get_dummies(x, columns = dummy_vars, drop_first = True)
x.head()

Unnamed: 0,Duration,CreditAmount,Age,Employment_2,Employment_3,Employment_4,Employment_5,CurrentAddress_2,CurrentAddress_3,CurrentAddress_4,...,SexMarital_4,MostValuableAsset_2,MostValuableAsset_3,MostValuableAsset_4,Apartment_2,Apartment_3,Occupation_2,Occupation_3,Occupation_4,ForeignWorker_2
0,24,1249,28,1,0,0,0,1,0,0,...,1,0,0,0,1,0,0,1,0,0
1,9,276,22,0,1,0,0,0,0,1,...,1,0,0,0,0,0,1,0,0,0
2,18,1049,21,1,0,0,0,0,0,1,...,0,1,0,0,0,0,0,1,0,0
3,24,6419,44,0,0,0,1,0,0,1,...,0,0,0,1,0,1,0,0,1,0
4,12,1424,55,0,0,0,1,0,0,1,...,0,0,0,0,1,0,0,0,1,0


### (5) 데이터분할2 : train : validation 나누기

In [5]:
x_train, x_val, y_train, y_val = train_test_split(x, y, test_size = .3, random_state = 2022)

### (6) Scaling

In [6]:
scaler = MinMaxScaler()
x_train_s = scaler.fit_transform(x_train)
x_val_s = scaler.transform(x_val)

## 3.모델링

* 사용 알고리즘 : LogisticRegression, DecisionTreeClassifier, KNeighborsClassifier, SVC
* 3가지의 알고리즘을 선정하고 성능을 튜닝해 봅시다.

### (1) 로지스틱 회귀

* 함수 생성

In [7]:
# 아래 함수는 로지스틱 회귀를 위한 전진선택법 함수 입니다.
import statsmodels.api as sm

def forward_stepwise_logistic(x_train, y_train):

    # 변수목록, 선택된 변수 목록, 단계별 모델과 AIC 저장소 정의
    features = list(x_train)
    selected = []
    step_df = pd.DataFrame({ 'step':[], 'feature':[],'aic':[]})

    # 
    for s in range(0, len(features)) :
        result =  { 'step':[], 'feature':[],'aic':[]}

        # 변수 목록에서 변수 한개씩 뽑아서 모델에 추가
        for f in features :
            vars = selected + [f]
            x_tr = x_train[vars]
            model = sm.Logit(y_train, x_tr).fit()
            result['step'].append(s+1)
            result['feature'].append(vars)
            result['aic'].append(model.aic)
        
        # 모델별 aic 집계
        temp = pd.DataFrame(result).sort_values('aic').reset_index(drop = True)

        # 만약 이전 aic보다 새로운 aic 가 크다면 멈추기
        if step_df['aic'].min() < temp['aic'].min() :
            break
        step_df = pd.concat([step_df, temp], axis = 0).reset_index(drop = True)

        # 선택된 변수 제거
        v = temp.loc[0,'feature'][s]
        features.remove(v)

        selected.append(v)
    
    # 선택된 변수와 step_df 결과 반환
    return selected, step_df

* 전진선택법 수행

In [13]:
vars, result = forward_stepwise_logistic(x_train, y_train)

Optimization terminated successfully.
         Current function value: 0.655913
         Iterations 4
Optimization terminated successfully.
         Current function value: 0.667719
         Iterations 4
Optimization terminated successfully.
         Current function value: 0.591570
         Iterations 5
Optimization terminated successfully.
         Current function value: 0.692746
         Iterations 3
Optimization terminated successfully.
         Current function value: 0.660513
         Iterations 5
Optimization terminated successfully.
         Current function value: 0.664357
         Iterations 5
Optimization terminated successfully.
         Current function value: 0.652765
         Iterations 5
Optimization terminated successfully.
         Current function value: 0.667777
         Iterations 5
Optimization terminated successfully.
         Current function value: 0.682336
         Iterations 5
Optimization terminated successfully.
         Current function value: 0.651675
  

Optimization terminated successfully.
         Current function value: 0.536031
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.532104
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.536083
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.535871
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.534011
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.535075
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.535588
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.536105
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.534943
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.532929
  

Optimization terminated successfully.
         Current function value: 0.523171
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.519291
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.500693
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.509681
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.502428
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.510108
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.508014
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.509944
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.508811
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.510137
  

Optimization terminated successfully.
         Current function value: 0.492187
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.490239
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.492710
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.493336
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.493136
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.492316
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.490674
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.486088
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.485475
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.485117
  

Optimization terminated successfully.
         Current function value: 0.476666
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.479443
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.479429
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.479573
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.478804
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.478319
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.479208
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.479635
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.479495
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.478592
  

Optimization terminated successfully.
         Current function value: 0.468317
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.468048
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.468755
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.467622
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.468628
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.468773
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.468654
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.468667
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.467365
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.468597
  

In [14]:
vars

['AccountBalance_3',
 'Payment_3',
 'Purpose_1',
 'CreditAmount',
 'Payment_2',
 'Duration',
 'Employment_2',
 'AccountBalance_2',
 'ForeignWorker_2',
 'Purpose_3',
 'SexMarital_3',
 'CreditCount_2',
 'CurrentAddress_2',
 'MostValuableAsset_4',
 'Telephone_2',
 'Occupation_4']

In [16]:
list(x_train)

['Duration',
 'CreditAmount',
 'Age',
 'Employment_2',
 'Employment_3',
 'Employment_4',
 'Employment_5',
 'CurrentAddress_2',
 'CurrentAddress_3',
 'CurrentAddress_4',
 'CreditCount_2',
 'CreditCount_3',
 'CreditCount_4',
 'Dependents_2',
 'Telephone_2',
 'AccountBalance_2',
 'AccountBalance_3',
 'Payment_1',
 'Payment_2',
 'Payment_3',
 'Purpose_1',
 'Purpose_2',
 'Purpose_3',
 'Purpose_4',
 'Purpose_5',
 'Purpose_6',
 'Purpose_8',
 'Purpose_9',
 'Purpose_10',
 'SexMarital_2',
 'SexMarital_3',
 'SexMarital_4',
 'MostValuableAsset_2',
 'MostValuableAsset_3',
 'MostValuableAsset_4',
 'Apartment_2',
 'Apartment_3',
 'Occupation_2',
 'Occupation_3',
 'Occupation_4',
 'ForeignWorker_2']

* 선택된 변수

In [17]:
vars

['AccountBalance_3',
 'Payment_3',
 'Purpose_1',
 'CreditAmount',
 'Payment_2',
 'Duration',
 'Employment_2',
 'AccountBalance_2',
 'ForeignWorker_2',
 'Purpose_3',
 'SexMarital_3',
 'CreditCount_2',
 'CurrentAddress_2',
 'MostValuableAsset_4',
 'Telephone_2',
 'Occupation_4']

* 전체 변수로 모델링

In [20]:
m1 = LogisticRegression()
m1.fit(x_train, y_train)
pred1 = m1.predict(x_val)
print(accuracy_score(y_val, pred1))
print(classification_report(y_val, pred1))

0.7266666666666667
              precision    recall  f1-score   support

           0       0.60      0.46      0.52        97
           1       0.77      0.85      0.81       203

    accuracy                           0.73       300
   macro avg       0.68      0.66      0.67       300
weighted avg       0.71      0.73      0.72       300



* 전진선택법 변수로 모델링

In [21]:
m2 = LogisticRegression()
m2.fit(x_train[vars], y_train)
pred2 = m2.predict(x_val[vars])
print(accuracy_score(y_val, pred2))
print(classification_report(y_val, pred2))

0.7333333333333333
              precision    recall  f1-score   support

           0       0.62      0.46      0.53        97
           1       0.77      0.86      0.81       203

    accuracy                           0.73       300
   macro avg       0.69      0.66      0.67       300
weighted avg       0.72      0.73      0.72       300



### (2) 의사결정나무

In [24]:
params = {
    "max_depth" : [0.1, 0.3, 0.5, 1, 3, 5, 10, 20],
    "min_samples_leaf": range(10, 101, 10)
}

In [25]:
m3 = DecisionTreeClassifier()

In [34]:
m_gs = GridSearchCV(m3, params, cv=5)

In [35]:
m_gs.fit(x_train, y_train)

GridSearchCV(cv=5, estimator=DecisionTreeClassifier(),
             param_grid={'max_depth': [0.1, 0.3, 0.5, 1, 3, 5, 10, 20],
                         'min_samples_leaf': range(10, 101, 10)})

In [30]:
m_rs.cv_results_

{'mean_fit_time': array([0.00592623, 0.00391002, 0.0033627 , 0.00346432, 0.00384483]),
 'std_fit_time': array([0.00259126, 0.00076149, 0.0007787 , 0.0010033 , 0.00064189]),
 'mean_score_time': array([0.00291743, 0.00284762, 0.00262947, 0.00225935, 0.00221987]),
 'std_score_time': array([0.00063789, 0.00071109, 0.0004889 , 0.00072114, 0.00035039]),
 'param_min_samples_leaf': masked_array(data=[40, 80, 30, 60, 100],
              mask=[False, False, False, False, False],
        fill_value='?',
             dtype=object),
 'param_max_depth': masked_array(data=[0.5, 3, 1, 0.3, 10],
              mask=[False, False, False, False, False],
        fill_value='?',
             dtype=object),
 'params': [{'min_samples_leaf': 40, 'max_depth': 0.5},
  {'min_samples_leaf': 80, 'max_depth': 3},
  {'min_samples_leaf': 30, 'max_depth': 1},
  {'min_samples_leaf': 60, 'max_depth': 0.3},
  {'min_samples_leaf': 100, 'max_depth': 10}],
 'split0_test_score': array([0.70714286, 0.75714286, 0.70714286, 0.70

In [36]:
m_gs.cv_results_['params']

[{'max_depth': 0.1, 'min_samples_leaf': 10},
 {'max_depth': 0.1, 'min_samples_leaf': 20},
 {'max_depth': 0.1, 'min_samples_leaf': 30},
 {'max_depth': 0.1, 'min_samples_leaf': 40},
 {'max_depth': 0.1, 'min_samples_leaf': 50},
 {'max_depth': 0.1, 'min_samples_leaf': 60},
 {'max_depth': 0.1, 'min_samples_leaf': 70},
 {'max_depth': 0.1, 'min_samples_leaf': 80},
 {'max_depth': 0.1, 'min_samples_leaf': 90},
 {'max_depth': 0.1, 'min_samples_leaf': 100},
 {'max_depth': 0.3, 'min_samples_leaf': 10},
 {'max_depth': 0.3, 'min_samples_leaf': 20},
 {'max_depth': 0.3, 'min_samples_leaf': 30},
 {'max_depth': 0.3, 'min_samples_leaf': 40},
 {'max_depth': 0.3, 'min_samples_leaf': 50},
 {'max_depth': 0.3, 'min_samples_leaf': 60},
 {'max_depth': 0.3, 'min_samples_leaf': 70},
 {'max_depth': 0.3, 'min_samples_leaf': 80},
 {'max_depth': 0.3, 'min_samples_leaf': 90},
 {'max_depth': 0.3, 'min_samples_leaf': 100},
 {'max_depth': 0.5, 'min_samples_leaf': 10},
 {'max_depth': 0.5, 'min_samples_leaf': 20},
 {'max_d

In [37]:
m_gs.cv_results_['mean_test_score']

array([0.71      , 0.71      , 0.71      , 0.71      , 0.71      ,
       0.71      , 0.71      , 0.71      , 0.71      , 0.71      ,
       0.71      , 0.71      , 0.71      , 0.71      , 0.71      ,
       0.71      , 0.71      , 0.71      , 0.71      , 0.71      ,
       0.71      , 0.71      , 0.71      , 0.71      , 0.71      ,
       0.71      , 0.71      , 0.71      , 0.71      , 0.71      ,
       0.71      , 0.71      , 0.71      , 0.71      , 0.71      ,
       0.71      , 0.71      , 0.71      , 0.71      , 0.71      ,
       0.7       , 0.70714286, 0.71      , 0.73      , 0.73285714,
       0.73285714, 0.73571429, 0.74285714, 0.71571429, 0.71285714,
       0.72714286, 0.71428571, 0.71428571, 0.72571429, 0.72571429,
       0.72714286, 0.73142857, 0.74285714, 0.71571429, 0.71285714,
       0.69142857, 0.71142857, 0.72428571, 0.72571429, 0.72571429,
       0.72714286, 0.73142857, 0.74285714, 0.71571429, 0.71285714,
       0.69142857, 0.71142857, 0.72428571, 0.72571429, 0.72571

In [38]:
m_gs.best_score_

0.7428571428571429

In [39]:
m_gs.best_params_

{'max_depth': 3, 'min_samples_leaf': 80}

In [107]:
m_gs.best_estimator_

DecisionTreeClassifier(max_depth=3, min_samples_leaf=80)

In [109]:
def plot_feature_importance(importance, names):#중요도와 이름을 받아서
    feature_importance = np.array(importance)
    feature_names = np.array(names)
    #numpy로 바꾸고

    data={'feature_names':feature_names,'feature_importance':feature_importance}
    fi_df = pd.DataFrame(data)

    # 정렬
    fi_df.sort_values(by=['feature_importance'], ascending=False,inplace=True)
    fi_df.reset_index(drop=True, inplace = True)

    #그림그리기
    plt.figure(figsize=(10,8))
    sns.barplot(x='feature_importance', y='feature_names', data = fi_df)

    plt.xlabel('FEATURE IMPORTANCE')
    plt.ylabel('FEATURE NAMES')
    plt.grid()

    return fi_df

In [116]:
m_gs.best_index_

47

In [40]:
pred3  = m_gs.predict(x_val)

In [41]:
print(accuracy_score(y_val, pred3))
print(classification_report(y_val, pred3))

0.7033333333333334
              precision    recall  f1-score   support

           0       0.55      0.46      0.50        97
           1       0.76      0.82      0.79       203

    accuracy                           0.70       300
   macro avg       0.66      0.64      0.65       300
weighted avg       0.69      0.70      0.70       300



### (3) KNN

In [94]:
params = {"n_neighbors": range(1, 101), "metric":['euclidean', 'manhattan']}

In [95]:
model4 = KNeighborsClassifier()

In [96]:
m_gs2 = GridSearchCV(model4, params, cv=5)

In [97]:
m_gs2.fit(x_train_s, y_train)

GridSearchCV(cv=5, estimator=KNeighborsClassifier(),
             param_grid={'metric': ['euclidean', 'manhattan'],
                         'n_neighbors': range(1, 101)})

In [98]:
m_gs2.cv_results_

{'mean_fit_time': array([0.00141439, 0.00116334, 0.00116959, 0.0011569 , 0.00075746,
        0.00096684, 0.00099211, 0.0013968 , 0.00079083, 0.00095744,
        0.00121117, 0.00127711, 0.00126572, 0.00113969, 0.00140009,
        0.00116434, 0.00126896, 0.00103211, 0.00166321, 0.00158935,
        0.00167499, 0.00129933, 0.00117517, 0.00141163, 0.00152292,
        0.0013504 , 0.00118279, 0.00085382, 0.00100489, 0.00099721,
        0.0014708 , 0.00086031, 0.00125914, 0.00128975, 0.00098982,
        0.00122051, 0.00098805, 0.00157022, 0.00113845, 0.00112834,
        0.00113168, 0.00102   , 0.00126033, 0.00110621, 0.00126905,
        0.00120616, 0.00099835, 0.00101795, 0.00115156, 0.00101142,
        0.00121517, 0.00103683, 0.0009726 , 0.00121622, 0.00099597,
        0.00126491, 0.00118976, 0.00119677, 0.00094848, 0.00169396,
        0.00139923, 0.00101552, 0.00119934, 0.0011982 , 0.00101762,
        0.00091724, 0.00133882, 0.00104475, 0.00136871, 0.00147452,
        0.00100055, 0.00119934,

In [99]:
m_gs2.cv_results_['params']

[{'metric': 'euclidean', 'n_neighbors': 1},
 {'metric': 'euclidean', 'n_neighbors': 2},
 {'metric': 'euclidean', 'n_neighbors': 3},
 {'metric': 'euclidean', 'n_neighbors': 4},
 {'metric': 'euclidean', 'n_neighbors': 5},
 {'metric': 'euclidean', 'n_neighbors': 6},
 {'metric': 'euclidean', 'n_neighbors': 7},
 {'metric': 'euclidean', 'n_neighbors': 8},
 {'metric': 'euclidean', 'n_neighbors': 9},
 {'metric': 'euclidean', 'n_neighbors': 10},
 {'metric': 'euclidean', 'n_neighbors': 11},
 {'metric': 'euclidean', 'n_neighbors': 12},
 {'metric': 'euclidean', 'n_neighbors': 13},
 {'metric': 'euclidean', 'n_neighbors': 14},
 {'metric': 'euclidean', 'n_neighbors': 15},
 {'metric': 'euclidean', 'n_neighbors': 16},
 {'metric': 'euclidean', 'n_neighbors': 17},
 {'metric': 'euclidean', 'n_neighbors': 18},
 {'metric': 'euclidean', 'n_neighbors': 19},
 {'metric': 'euclidean', 'n_neighbors': 20},
 {'metric': 'euclidean', 'n_neighbors': 21},
 {'metric': 'euclidean', 'n_neighbors': 22},
 {'metric': 'euclid

In [100]:
m_gs2.cv_results_['mean_test_score']

array([0.65285714, 0.62142857, 0.69857143, 0.69142857, 0.72142857,
       0.69571429, 0.71      , 0.71142857, 0.71      , 0.72      ,
       0.70428571, 0.70857143, 0.70857143, 0.70571429, 0.71428571,
       0.71714286, 0.71428571, 0.71857143, 0.71571429, 0.71571429,
       0.72142857, 0.71428571, 0.71714286, 0.72142857, 0.71714286,
       0.71857143, 0.71285714, 0.71571429, 0.71285714, 0.71428571,
       0.71      , 0.70571429, 0.70857143, 0.71      , 0.71      ,
       0.71285714, 0.71142857, 0.71      , 0.70857143, 0.71      ,
       0.70428571, 0.70285714, 0.70714286, 0.70571429, 0.70285714,
       0.70571429, 0.70428571, 0.70571429, 0.70571429, 0.70857143,
       0.70714286, 0.71      , 0.70714286, 0.70714286, 0.70714286,
       0.70571429, 0.70714286, 0.70714286, 0.70714286, 0.70571429,
       0.70571429, 0.70285714, 0.70428571, 0.70428571, 0.70714286,
       0.70571429, 0.70714286, 0.70714286, 0.71      , 0.70714286,
       0.70857143, 0.70571429, 0.70857143, 0.70857143, 0.71   

In [101]:
m_gs2.best_score_

0.7214285714285715

In [104]:
m_gs2.best_params_

{'metric': 'euclidean', 'n_neighbors': 5}

In [106]:
# 최적의 모델은 아래와 같은 이름을 갖게될 것이다.
m_gs2.best_estimator_

KNeighborsClassifier(metric='euclidean')

In [102]:
pred4 = m_gs2.predict(x_val_s)

In [103]:
print(accuracy_score(y_val, pred4))
print(classification_report(y_val, pred4))

0.6733333333333333
              precision    recall  f1-score   support

           0       0.49      0.29      0.36        97
           1       0.72      0.86      0.78       203

    accuracy                           0.67       300
   macro avg       0.60      0.57      0.57       300
weighted avg       0.64      0.67      0.65       300



### (4) SVM

In [60]:
model5 =SVC()

In [61]:
params = {
    'C':[0.1, 0.3, 0.5, 1, 5, 10, 20],
    'gamma':[0.1, 0.2, 0.3, 1, 5, 10, 20]
}

In [64]:
m_gs3 = GridSearchCV(model5, params, cv=5)

In [65]:
m_gs3.fit(x_train_s, y_train)

GridSearchCV(cv=5, estimator=SVC(),
             param_grid={'C': [0.1, 0.3, 0.5, 1, 5, 10, 20],
                         'gamma': [0.1, 0.2, 0.3, 1, 5, 10, 20]})

In [66]:
m_gs3.best_score_

0.7342857142857143

In [67]:
m_gs3.best_params_

{'C': 1, 'gamma': 0.2}

In [69]:
m_gs3.cv_results_['params']

[{'C': 0.1, 'gamma': 0.1},
 {'C': 0.1, 'gamma': 0.2},
 {'C': 0.1, 'gamma': 0.3},
 {'C': 0.1, 'gamma': 1},
 {'C': 0.1, 'gamma': 5},
 {'C': 0.1, 'gamma': 10},
 {'C': 0.1, 'gamma': 20},
 {'C': 0.3, 'gamma': 0.1},
 {'C': 0.3, 'gamma': 0.2},
 {'C': 0.3, 'gamma': 0.3},
 {'C': 0.3, 'gamma': 1},
 {'C': 0.3, 'gamma': 5},
 {'C': 0.3, 'gamma': 10},
 {'C': 0.3, 'gamma': 20},
 {'C': 0.5, 'gamma': 0.1},
 {'C': 0.5, 'gamma': 0.2},
 {'C': 0.5, 'gamma': 0.3},
 {'C': 0.5, 'gamma': 1},
 {'C': 0.5, 'gamma': 5},
 {'C': 0.5, 'gamma': 10},
 {'C': 0.5, 'gamma': 20},
 {'C': 1, 'gamma': 0.1},
 {'C': 1, 'gamma': 0.2},
 {'C': 1, 'gamma': 0.3},
 {'C': 1, 'gamma': 1},
 {'C': 1, 'gamma': 5},
 {'C': 1, 'gamma': 10},
 {'C': 1, 'gamma': 20},
 {'C': 5, 'gamma': 0.1},
 {'C': 5, 'gamma': 0.2},
 {'C': 5, 'gamma': 0.3},
 {'C': 5, 'gamma': 1},
 {'C': 5, 'gamma': 5},
 {'C': 5, 'gamma': 10},
 {'C': 5, 'gamma': 20},
 {'C': 10, 'gamma': 0.1},
 {'C': 10, 'gamma': 0.2},
 {'C': 10, 'gamma': 0.3},
 {'C': 10, 'gamma': 1},
 {'C': 10, 

In [72]:
pred5 = m_gs3.predict(x_val_s)

In [73]:
print(accuracy_score(y_val, pred5))
print(classification_report(y_val, pred5))

0.72
              precision    recall  f1-score   support

           0       0.62      0.35      0.45        97
           1       0.74      0.90      0.81       203

    accuracy                           0.72       300
   macro avg       0.68      0.62      0.63       300
weighted avg       0.70      0.72      0.69       300



In [105]:
# 튜닝 모델은 시각화, 변수 중요도 확인 어떻게 함?
m_gs3.best_estimator_

SVC(C=1, gamma=0.2)