In [None]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression

from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score , cross_validate

from sklearn.metrics import classification_report, confusion_matrix
from sklearn.metrics import accuracy_score, precision_score , recall_score , f1_score , roc_auc_score , confusion_matrix

from sklearn.model_selection import KFold
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import GridSearchCV

from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import Binarizer

import graphviz
from sklearn.tree import export_graphviz
from IPython.display import display

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
adult = pd.read_csv('./adult_data/adult.csv')
adult.head(3)

Unnamed: 0,age,workclass,fnlwgt,education,educational-num,marital-status,occupation,relationship,race,gender,capital-gain,capital-loss,hours-per-week,native-country,income
0,25,Private,226802,11th,7,Never-married,Machine-op-inspct,Own-child,Black,Male,0,0,40,United-States,<=50K
1,38,Private,89814,HS-grad,9,Married-civ-spouse,Farming-fishing,Husband,White,Male,0,0,50,United-States,<=50K
2,28,Local-gov,336951,Assoc-acdm,12,Married-civ-spouse,Protective-serv,Husband,White,Male,0,0,40,United-States,>50K


<b> null value replace </b>

In [None]:
print(adult.isnull().sum().sum())

adult = adult.replace({' ?'},np.nan)
adult = adult.replace({'?'},np.nan)

print(adult.isnull().sum().sum())

adult.fillna('Unknown',inplace=True)

print(adult.isnull().sum().sum())

0
0
0


<b> label encoding </b>

In [None]:
from sklearn import preprocessing

def encode_features(dataDF):
    features = ['workclass', 'education', 'marital-status','occupation','relationship','race','gender','native-country','income']
    for feature in features:
        le = preprocessing.LabelEncoder()
        le = le.fit(dataDF[feature])
        dataDF[feature] = le.transform(dataDF[feature])
        
    return dataDF

encode_features(adult)
adult_label = adult.iloc[:,-1]
adult = adult.iloc[:,:-1]
adult.head(15)
adult_label.head()

0    0
1    0
2    1
3    1
4    0
Name: income, dtype: int64

# No Drops

<b> train & test split</b>

In [None]:
train_x, test_x, train_y, test_y = train_test_split(adult,adult_label,test_size = 0.2, random_state = 123)

In [None]:
train_x.head()

Unnamed: 0,age,workclass,fnlwgt,education,educational-num,marital-status,occupation,relationship,race,gender,capital-gain,capital-loss,hours-per-week,native-country
21502,18,4,300379,2,8,4,1,3,4,1,0,0,12,39
24794,29,4,34292,11,9,4,8,3,4,1,0,0,38,39
32958,29,4,102345,15,10,4,13,1,4,1,0,0,52,39
4441,49,5,83444,9,13,2,12,0,4,1,0,0,85,39
30752,38,4,40077,11,9,2,3,0,4,1,0,0,50,39


In [None]:
test_x.head()

Unnamed: 0,age,workclass,fnlwgt,education,educational-num,marital-status,occupation,relationship,race,gender,capital-gain,capital-loss,hours-per-week,native-country
20668,52,4,117700,11,9,0,1,1,4,0,0,0,40,39
1722,19,4,351757,0,6,4,8,4,4,1,0,0,30,8
39609,31,1,101345,11,9,4,6,3,4,0,0,0,40,39
15858,25,4,324854,9,13,4,12,1,4,0,0,0,40,39
41078,36,4,245521,5,4,2,5,0,4,1,0,0,35,26


In [None]:
print(train_y)

21502    0
24794    0
32958    0
4441     1
30752    1
        ..
7763     0
15377    0
17730    1
28030    0
15725    1
Name: income, Length: 39073, dtype: int64


In [None]:
print(test_y)

20668    0
1722     0
39609    0
15858    0
41078    0
        ..
147      1
46161    0
15119    1
38550    0
22739    0
Name: income, Length: 9769, dtype: int64


## PreProcessing with Scaler

### 1. StandardScaler

In [None]:
std_scaler = StandardScaler()
std_train_x = std_scaler.fit_transform(train_x)
std_test_x = std_scaler.fit_transform(test_x)

### 2. MinMaxScaler

In [None]:
mm_scaler = MinMaxScaler()
mm_train_x = mm_scaler.fit_transform(train_x)
mm_test_x = mm_scaler.fit_transform(test_x)

## No PreProcessing Scaler

### Decision Tree & Random Forest & Logistic Regression

In [None]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression

dt_clf = DecisionTreeClassifier(random_state=156)
rf_clf = RandomForestClassifier(random_state=156)
lr_clf = LogisticRegression()

In [None]:
dt_clf.fit(train_x,train_y)
rf_clf.fit(train_x,train_y)
lr_clf.fit(train_x,train_y)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=None, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)

In [None]:
pred_dt = dt_clf.predict(test_x)
pred_rf = rf_clf.predict(test_x)
pred_lr = lr_clf.predict(test_x)

In [None]:
pred_dt_proba = dt_clf.predict_proba(test_x)[:, 1]
pred_rf_proba = rf_clf.predict_proba(test_x)[:, 1]
pred_lr_proba = lr_clf.predict_proba(test_x)[:, 1]

## PreProcessing Standard Scaler

### Decision Tree & Random Forest & Logistic Regression

In [None]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression

std_dt_clf = DecisionTreeClassifier(random_state=156)
std_rf_clf = RandomForestClassifier(random_state=156)
std_lr_clf = LogisticRegression()

In [None]:
std_dt_clf.fit(std_train_x,train_y)
std_rf_clf.fit(std_train_x,train_y)
std_lr_clf.fit(std_train_x,train_y)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=None, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)

In [None]:
std_pred_dt = std_dt_clf.predict(std_test_x)
std_pred_rf = std_rf_clf.predict(std_test_x)
std_pred_lr = std_lr_clf.predict(std_test_x)

In [None]:
std_pred_dt_proba = std_dt_clf.predict_proba(std_test_x)[:, 1]
std_pred_rf_proba = std_rf_clf.predict_proba(std_test_x)[:, 1]
std_pred_lr_proba = std_lr_clf.predict_proba(std_test_x)[:, 1]

## PreProcessing MinMax Scaler

### Decision Tree & Random Forest & Logistic Regression

In [None]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression

mm_dt_clf = DecisionTreeClassifier(random_state=156)
mm_rf_clf = RandomForestClassifier(random_state=156)
mm_lr_clf = LogisticRegression()

In [None]:
mm_dt_clf.fit(mm_train_x,train_y)
mm_rf_clf.fit(mm_train_x,train_y)
mm_lr_clf.fit(mm_train_x,train_y)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=None, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)

In [None]:
mm_pred_dt = mm_dt_clf.predict(mm_test_x)
mm_pred_rf = mm_rf_clf.predict(mm_test_x)
mm_pred_lr = mm_lr_clf.predict(mm_test_x)

In [None]:
mm_pred_dt_proba = mm_dt_clf.predict_proba(mm_test_x)[:, 1]
mm_pred_rf_proba = mm_rf_clf.predict_proba(mm_test_x)[:, 1]
mm_pred_lr_proba = mm_lr_clf.predict_proba(mm_test_x)[:, 1]

## 평가

In [None]:
def get_clf_eval(y_test, pred=None, pred_proba=None):
    confusion = confusion_matrix( y_test, pred)
    accuracy = accuracy_score(y_test , pred)
    precision = precision_score(y_test , pred)
    recall = recall_score(y_test , pred)
    f1 = f1_score(y_test,pred)
    # ROC-AUC 추가 
    roc_auc = roc_auc_score(y_test, pred_proba)
    print('오차 행렬')
    print(confusion)
    # ROC-AUC print 추가
    print('정확도: {0:.4f}, 정밀도: {1:.4f}, 재현율: {2:.4f},\
    F1: {3:.4f}, AUC:{4:.4f}'.format(accuracy, precision, recall, f1, roc_auc))

### 1. Decision Tree

In [None]:
print('no scaler')
get_clf_eval(test_y, pred_dt, pred_dt_proba)
print('standard scaler')
get_clf_eval(test_y, std_pred_dt, std_pred_dt_proba)
print('minmax scaler')
get_clf_eval(test_y, mm_pred_dt, pred_dt_proba)


no scaler
오차 행렬
[[6448  969]
 [ 875 1477]]
정확도: 0.8112, 정밀도: 0.6038, 재현율: 0.6280,    F1: 0.6157, AUC:0.7487
standard scaler
오차 행렬
[[6443  974]
 [ 956 1396]]
정확도: 0.8024, 정밀도: 0.5890, 재현율: 0.5935,    F1: 0.5913, AUC:0.7313
minmax scaler
오차 행렬
[[6359 1058]
 [1034 1318]]
정확도: 0.7859, 정밀도: 0.5547, 재현율: 0.5604,    F1: 0.5575, AUC:0.7487



### 2. Random Forest

In [None]:
print('no scaler')
get_clf_eval(test_y, pred_rf, pred_rf_proba)
print('standard scaler')
get_clf_eval(test_y, std_pred_rf, std_pred_rf_proba)
print('minmax scaler')
get_clf_eval(test_y, mm_pred_rf, pred_rf_proba)

no scaler
오차 행렬
[[6934  483]
 [ 880 1472]]
정확도: 0.8605, 정밀도: 0.7529, 재현율: 0.6259,    F1: 0.6835, AUC:0.9064
standard scaler
오차 행렬
[[6938  479]
 [ 936 1416]]
정확도: 0.8552, 정밀도: 0.7472, 재현율: 0.6020,    F1: 0.6668, AUC:0.9010
minmax scaler
오차 행렬
[[6856  561]
 [ 900 1452]]
정확도: 0.8504, 정밀도: 0.7213, 재현율: 0.6173,    F1: 0.6653, AUC:0.9064


### 3.Logistic Regression

In [None]:
print('no scaler')
get_clf_eval(test_y, pred_lr, pred_lr_proba)
print('standard scaler')
get_clf_eval(test_y, std_pred_lr, std_pred_lr_proba)
print('minmax scaler')
get_clf_eval(test_y, mm_pred_lr, pred_lr_proba)

no scaler
오차 행렬
[[7169  248]
 [1728  624]]
정확도: 0.7977, 정밀도: 0.7156, 재현율: 0.2653,    F1: 0.3871, AUC:0.5591
standard scaler
오차 행렬
[[6980  437]
 [1298 1054]]
정확도: 0.8224, 정밀도: 0.7069, 재현율: 0.4481,    F1: 0.5485, AUC:0.8528
minmax scaler
오차 행렬
[[6926  491]
 [1262 1090]]
정확도: 0.8206, 정밀도: 0.6894, 재현율: 0.4634,    F1: 0.5543, AUC:0.5591


## **쓰레쉬홀드 찾아서 최적 parameter 찾기 연습**

In [None]:
from sklearn.preprocessing import Binarizer

def get_eval_by_threshold(y_test , pred_proba_c1, thresholds):
    # thresholds 리스트 객체내의 값을 차례로 iteration하면서 Evaluation 수행.
    for custom_threshold in thresholds:
        binarizer = Binarizer(threshold=custom_threshold).fit(pred_proba_c1) 
        custom_predict = binarizer.transform(pred_proba_c1)
        print('임곗값:',custom_threshold)
        get_clf_eval(y_test , custom_predict, pred_proba_c1)

1. No scale

In [None]:
thresholds = [0.3 , 0.33 ,0.36,0.39, 0.42 , 0.45 ,0.48, 0.50]
get_eval_by_threshold(test_y, pred_dt_proba.reshape(-1,1), thresholds )

임곗값: 0.3
오차 행렬
[[6448  969]
 [ 875 1477]]
정확도: 0.8112, 정밀도: 0.6038, 재현율: 0.6280,    F1: 0.6157, AUC:0.7487
임곗값: 0.33
오차 행렬
[[6448  969]
 [ 875 1477]]
정확도: 0.8112, 정밀도: 0.6038, 재현율: 0.6280,    F1: 0.6157, AUC:0.7487
임곗값: 0.36
오차 행렬
[[6448  969]
 [ 875 1477]]
정확도: 0.8112, 정밀도: 0.6038, 재현율: 0.6280,    F1: 0.6157, AUC:0.7487
임곗값: 0.39
오차 행렬
[[6448  969]
 [ 875 1477]]
정확도: 0.8112, 정밀도: 0.6038, 재현율: 0.6280,    F1: 0.6157, AUC:0.7487
임곗값: 0.42
오차 행렬
[[6448  969]
 [ 875 1477]]
정확도: 0.8112, 정밀도: 0.6038, 재현율: 0.6280,    F1: 0.6157, AUC:0.7487
임곗값: 0.45
오차 행렬
[[6448  969]
 [ 875 1477]]
정확도: 0.8112, 정밀도: 0.6038, 재현율: 0.6280,    F1: 0.6157, AUC:0.7487
임곗값: 0.48
오차 행렬
[[6448  969]
 [ 875 1477]]
정확도: 0.8112, 정밀도: 0.6038, 재현율: 0.6280,    F1: 0.6157, AUC:0.7487
임곗값: 0.5
오차 행렬
[[6448  969]
 [ 875 1477]]
정확도: 0.8112, 정밀도: 0.6038, 재현율: 0.6280,    F1: 0.6157, AUC:0.7487


In [None]:
thresholds = [0.3 , 0.33 ,0.36,0.39, 0.42 , 0.45 ,0.48, 0.50]
get_eval_by_threshold(test_y, pred_rf_proba.reshape(-1,1), thresholds )

임곗값: 0.3
오차 행렬
[[6288 1129]
 [ 503 1849]]
정확도: 0.8329, 정밀도: 0.6209, 재현율: 0.7861,    F1: 0.6938, AUC:0.9064
임곗값: 0.33
오차 행렬
[[6425  992]
 [ 554 1798]]
정확도: 0.8417, 정밀도: 0.6444, 재현율: 0.7645,    F1: 0.6993, AUC:0.9064
임곗값: 0.36
오차 행렬
[[6520  897]
 [ 609 1743]]
정확도: 0.8458, 정밀도: 0.6602, 재현율: 0.7411,    F1: 0.6983, AUC:0.9064
임곗값: 0.39
오차 행렬
[[6641  776]
 [ 668 1684]]
정확도: 0.8522, 정밀도: 0.6846, 재현율: 0.7160,    F1: 0.6999, AUC:0.9064
임곗값: 0.42
오차 행렬
[[6739  678]
 [ 716 1636]]
정확도: 0.8573, 정밀도: 0.7070, 재현율: 0.6956,    F1: 0.7012, AUC:0.9064
임곗값: 0.45
오차 행렬
[[6807  610]
 [ 779 1573]]
정확도: 0.8578, 정밀도: 0.7206, 재현율: 0.6688,    F1: 0.6937, AUC:0.9064
임곗값: 0.48
오차 행렬
[[6885  532]
 [ 840 1512]]
정확도: 0.8596, 정밀도: 0.7397, 재현율: 0.6429,    F1: 0.6879, AUC:0.9064
임곗값: 0.5
오차 행렬
[[6934  483]
 [ 880 1472]]
정확도: 0.8605, 정밀도: 0.7529, 재현율: 0.6259,    F1: 0.6835, AUC:0.9064


In [None]:
thresholds = [0.3 , 0.33 ,0.36,0.39, 0.42 , 0.45 ,0.48, 0.50]
get_eval_by_threshold(test_y, pred_lr_proba.reshape(-1,1), thresholds )

임곗값: 0.3
오차 행렬
[[6773  644]
 [1612  740]]
정확도: 0.7691, 정밀도: 0.5347, 재현율: 0.3146,    F1: 0.3961, AUC:0.5591
임곗값: 0.33
오차 행렬
[[6895  522]
 [1623  729]]
정확도: 0.7804, 정밀도: 0.5827, 재현율: 0.3099,    F1: 0.4047, AUC:0.5591
임곗값: 0.36
오차 행렬
[[6940  477]
 [1627  725]]
정확도: 0.7846, 정밀도: 0.6032, 재현율: 0.3082,    F1: 0.4080, AUC:0.5591
임곗값: 0.39
오차 행렬
[[6987  430]
 [1635  717]]
정확도: 0.7886, 정밀도: 0.6251, 재현율: 0.3048,    F1: 0.4098, AUC:0.5591
임곗값: 0.42
오차 행렬
[[7030  387]
 [1643  709]]
정확도: 0.7922, 정밀도: 0.6469, 재현율: 0.3014,    F1: 0.4113, AUC:0.5591
임곗값: 0.45
오차 행렬
[[7080  337]
 [1666  686]]
정확도: 0.7950, 정밀도: 0.6706, 재현율: 0.2917,    F1: 0.4065, AUC:0.5591
임곗값: 0.48
오차 행렬
[[7129  288]
 [1702  650]]
정확도: 0.7963, 정밀도: 0.6930, 재현율: 0.2764,    F1: 0.3951, AUC:0.5591
임곗값: 0.5
오차 행렬
[[7169  248]
 [1728  624]]
정확도: 0.7977, 정밀도: 0.7156, 재현율: 0.2653,    F1: 0.3871, AUC:0.5591


2. standard scale

In [None]:
thresholds = [0.3 , 0.33 ,0.36,0.39, 0.42 , 0.45 ,0.48, 0.50]
get_eval_by_threshold(test_y, std_pred_dt_proba.reshape(-1,1), thresholds )

임곗값: 0.3
오차 행렬
[[6443  974]
 [ 955 1397]]
정확도: 0.8025, 정밀도: 0.5892, 재현율: 0.5940,    F1: 0.5916, AUC:0.7313
임곗값: 0.33
오차 행렬
[[6443  974]
 [ 955 1397]]
정확도: 0.8025, 정밀도: 0.5892, 재현율: 0.5940,    F1: 0.5916, AUC:0.7313
임곗값: 0.36
오차 행렬
[[6443  974]
 [ 955 1397]]
정확도: 0.8025, 정밀도: 0.5892, 재현율: 0.5940,    F1: 0.5916, AUC:0.7313
임곗값: 0.39
오차 행렬
[[6443  974]
 [ 955 1397]]
정확도: 0.8025, 정밀도: 0.5892, 재현율: 0.5940,    F1: 0.5916, AUC:0.7313
임곗값: 0.42
오차 행렬
[[6443  974]
 [ 955 1397]]
정확도: 0.8025, 정밀도: 0.5892, 재현율: 0.5940,    F1: 0.5916, AUC:0.7313
임곗값: 0.45
오차 행렬
[[6443  974]
 [ 955 1397]]
정확도: 0.8025, 정밀도: 0.5892, 재현율: 0.5940,    F1: 0.5916, AUC:0.7313
임곗값: 0.48
오차 행렬
[[6443  974]
 [ 955 1397]]
정확도: 0.8025, 정밀도: 0.5892, 재현율: 0.5940,    F1: 0.5916, AUC:0.7313
임곗값: 0.5
오차 행렬
[[6443  974]
 [ 956 1396]]
정확도: 0.8024, 정밀도: 0.5890, 재현율: 0.5935,    F1: 0.5913, AUC:0.7313


In [None]:
thresholds = [0.3 , 0.33 ,0.36,0.39, 0.42 , 0.45 ,0.48, 0.50]
get_eval_by_threshold(test_y, std_pred_rf_proba.reshape(-1,1), thresholds )

임곗값: 0.3
오차 행렬
[[6278 1139]
 [ 534 1818]]
정확도: 0.8287, 정밀도: 0.6148, 재현율: 0.7730,    F1: 0.6849, AUC:0.9010
임곗값: 0.33
오차 행렬
[[6415 1002]
 [ 593 1759]]
정확도: 0.8367, 정밀도: 0.6371, 재현율: 0.7479,    F1: 0.6881, AUC:0.9010
임곗값: 0.36
오차 행렬
[[6525  892]
 [ 645 1707]]
정확도: 0.8427, 정밀도: 0.6568, 재현율: 0.7258,    F1: 0.6896, AUC:0.9010
임곗값: 0.39
오차 행렬
[[6639  778]
 [ 711 1641]]
정확도: 0.8476, 정밀도: 0.6784, 재현율: 0.6977,    F1: 0.6879, AUC:0.9010
임곗값: 0.42
오차 행렬
[[6729  688]
 [ 766 1586]]
정확도: 0.8512, 정밀도: 0.6974, 재현율: 0.6743,    F1: 0.6857, AUC:0.9010
임곗값: 0.45
오차 행렬
[[6810  607]
 [ 835 1517]]
정확도: 0.8524, 정밀도: 0.7142, 재현율: 0.6450,    F1: 0.6778, AUC:0.9010
임곗값: 0.48
오차 행렬
[[6891  526]
 [ 897 1455]]
정확도: 0.8543, 정밀도: 0.7345, 재현율: 0.6186,    F1: 0.6716, AUC:0.9010
임곗값: 0.5
오차 행렬
[[6938  479]
 [ 936 1416]]
정확도: 0.8552, 정밀도: 0.7472, 재현율: 0.6020,    F1: 0.6668, AUC:0.9010


In [None]:
thresholds = [0.3 , 0.33 ,0.36,0.39, 0.42 , 0.45 ,0.48, 0.50]
get_eval_by_threshold(test_y, std_pred_lr_proba.reshape(-1,1), thresholds )

임곗값: 0.3
오차 행렬
[[6220 1197]
 [ 774 1578]]
정확도: 0.7982, 정밀도: 0.5686, 재현율: 0.6709,    F1: 0.6156, AUC:0.8528
임곗값: 0.33
오차 행렬
[[6399 1018]
 [ 858 1494]]
정확도: 0.8080, 정밀도: 0.5947, 재현율: 0.6352,    F1: 0.6143, AUC:0.8528
임곗값: 0.36
오차 행렬
[[6538  879]
 [ 952 1400]]
정확도: 0.8126, 정밀도: 0.6143, 재현율: 0.5952,    F1: 0.6046, AUC:0.8528
임곗값: 0.39
오차 행렬
[[6667  750]
 [1027 1325]]
정확도: 0.8181, 정밀도: 0.6386, 재현율: 0.5634,    F1: 0.5986, AUC:0.8528
임곗값: 0.42
오차 행렬
[[6775  642]
 [1101 1251]]
정확도: 0.8216, 정밀도: 0.6609, 재현율: 0.5319,    F1: 0.5894, AUC:0.8528
임곗값: 0.45
오차 행렬
[[6862  555]
 [1181 1171]]
정확도: 0.8223, 정밀도: 0.6784, 재현율: 0.4979,    F1: 0.5743, AUC:0.8528
임곗값: 0.48
오차 행렬
[[6933  484]
 [1258 1094]]
정확도: 0.8217, 정밀도: 0.6933, 재현율: 0.4651,    F1: 0.5567, AUC:0.8528
임곗값: 0.5
오차 행렬
[[6980  437]
 [1298 1054]]
정확도: 0.8224, 정밀도: 0.7069, 재현율: 0.4481,    F1: 0.5485, AUC:0.8528


3. minmax scale

In [None]:
thresholds = [0.3 , 0.33 ,0.36,0.39, 0.42 , 0.45 ,0.48, 0.50]
get_eval_by_threshold(test_y, mm_pred_dt_proba.reshape(-1,1), thresholds )

임곗값: 0.3
오차 행렬
[[6359 1058]
 [1034 1318]]
정확도: 0.7859, 정밀도: 0.5547, 재현율: 0.5604,    F1: 0.5575, AUC:0.7089
임곗값: 0.33
오차 행렬
[[6359 1058]
 [1034 1318]]
정확도: 0.7859, 정밀도: 0.5547, 재현율: 0.5604,    F1: 0.5575, AUC:0.7089
임곗값: 0.36
오차 행렬
[[6359 1058]
 [1034 1318]]
정확도: 0.7859, 정밀도: 0.5547, 재현율: 0.5604,    F1: 0.5575, AUC:0.7089
임곗값: 0.39
오차 행렬
[[6359 1058]
 [1034 1318]]
정확도: 0.7859, 정밀도: 0.5547, 재현율: 0.5604,    F1: 0.5575, AUC:0.7089
임곗값: 0.42
오차 행렬
[[6359 1058]
 [1034 1318]]
정확도: 0.7859, 정밀도: 0.5547, 재현율: 0.5604,    F1: 0.5575, AUC:0.7089
임곗값: 0.45
오차 행렬
[[6359 1058]
 [1034 1318]]
정확도: 0.7859, 정밀도: 0.5547, 재현율: 0.5604,    F1: 0.5575, AUC:0.7089
임곗값: 0.48
오차 행렬
[[6359 1058]
 [1034 1318]]
정확도: 0.7859, 정밀도: 0.5547, 재현율: 0.5604,    F1: 0.5575, AUC:0.7089
임곗값: 0.5
오차 행렬
[[6359 1058]
 [1034 1318]]
정확도: 0.7859, 정밀도: 0.5547, 재현율: 0.5604,    F1: 0.5575, AUC:0.7089


In [None]:
thresholds = [0.3 , 0.33 ,0.36,0.39, 0.42 , 0.45 ,0.48, 0.50]
get_eval_by_threshold(test_y, mm_pred_rf_proba.reshape(-1,1), thresholds )

임곗값: 0.3
오차 행렬
[[6156 1261]
 [ 499 1853]]
정확도: 0.8198, 정밀도: 0.5951, 재현율: 0.7878,    F1: 0.6780, AUC:0.8986
임곗값: 0.33
오차 행렬
[[6282 1135]
 [ 556 1796]]
정확도: 0.8269, 정밀도: 0.6128, 재현율: 0.7636,    F1: 0.6799, AUC:0.8986
임곗값: 0.36
오차 행렬
[[6409 1008]
 [ 605 1747]]
정확도: 0.8349, 정밀도: 0.6341, 재현율: 0.7428,    F1: 0.6842, AUC:0.8986
임곗값: 0.39
오차 행렬
[[6522  895]
 [ 666 1686]]
정확도: 0.8402, 정밀도: 0.6532, 재현율: 0.7168,    F1: 0.6836, AUC:0.8986
임곗값: 0.42
오차 행렬
[[6627  790]
 [ 736 1616]]
정확도: 0.8438, 정밀도: 0.6717, 재현율: 0.6871,    F1: 0.6793, AUC:0.8986
임곗값: 0.45
오차 행렬
[[6718  699]
 [ 795 1557]]
정확도: 0.8471, 정밀도: 0.6902, 재현율: 0.6620,    F1: 0.6758, AUC:0.8986
임곗값: 0.48
오차 행렬
[[6798  619]
 [ 869 1483]]
정확도: 0.8477, 정밀도: 0.7055, 재현율: 0.6305,    F1: 0.6659, AUC:0.8986
임곗값: 0.5
오차 행렬
[[6856  561]
 [ 900 1452]]
정확도: 0.8504, 정밀도: 0.7213, 재현율: 0.6173,    F1: 0.6653, AUC:0.8986


In [None]:
thresholds = [0.3 , 0.33 ,0.36,0.39, 0.42 , 0.45 ,0.48, 0.50]
get_eval_by_threshold(test_y, mm_pred_lr_proba.reshape(-1,1), thresholds )

임곗값: 0.3
오차 행렬
[[6078 1339]
 [ 702 1650]]
정확도: 0.7911, 정밀도: 0.5520, 재현율: 0.7015,    F1: 0.6179, AUC:0.8506
임곗값: 0.33
오차 행렬
[[6273 1144]
 [ 809 1543]]
정확도: 0.8001, 정밀도: 0.5742, 재현율: 0.6560,    F1: 0.6124, AUC:0.8506
임곗값: 0.36
오차 행렬
[[6432  985]
 [ 886 1466]]
정확도: 0.8085, 정밀도: 0.5981, 재현율: 0.6233,    F1: 0.6105, AUC:0.8506
임곗값: 0.39
오차 행렬
[[6577  840]
 [ 975 1377]]
정확도: 0.8142, 정밀도: 0.6211, 재현율: 0.5855,    F1: 0.6028, AUC:0.8506
임곗값: 0.42
오차 행렬
[[6685  732]
 [1051 1301]]
정확도: 0.8175, 정밀도: 0.6399, 재현율: 0.5531,    F1: 0.5934, AUC:0.8506
임곗값: 0.45
오차 행렬
[[6790  627]
 [1126 1226]]
정확도: 0.8206, 정밀도: 0.6616, 재현율: 0.5213,    F1: 0.5831, AUC:0.8506
임곗값: 0.48
오차 행렬
[[6881  536]
 [1212 1140]]
정확도: 0.8211, 정밀도: 0.6802, 재현율: 0.4847,    F1: 0.5660, AUC:0.8506
임곗값: 0.5
오차 행렬
[[6926  491]
 [1262 1090]]
정확도: 0.8206, 정밀도: 0.6894, 재현율: 0.4634,    F1: 0.5543, AUC:0.8506
