In [None]:
import os
import time
import warnings
import graphviz
import pydotplus
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from glob import glob


from sklearn.linear_model import SGDClassifier
from sklearn.linear_model import LogisticRegression

from sklearn.svm import SVC

from sklearn.tree import DecisionTreeClassifier
from sklearn.tree import plot_tree
from sklearn.tree import export_graphviz

from sklearn.neighbors import KNeighborsClassifier

from sklearn.model_selection import cross_val_score
from sklearn.model_selection import cross_val_predict
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import cross_validate
from sklearn.model_selection import train_test_split

from sklearn.metrics import confusion_matrix
from sklearn.metrics import precision_score, recall_score, accuracy_score
from sklearn.metrics import f1_score, roc_auc_score
from sklearn.metrics import roc_curve

from sklearn.feature_selection import SelectFromModel
from sklearn.feature_selection import VarianceThreshold
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2, f_classif, mutual_info_classif

from sklearn.ensemble import ExtraTreesClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier

from sklearn.calibration import CalibratedClassifierCV
from sklearn.calibration import calibration_curve

from imblearn.over_sampling import SMOTE

import xgboost as xgb
from xgboost import XGBClassifier

import lightgbm
from lightgbm import LGBMClassifier
from lightgbm import plot_importance

import shap

In [None]:
# selection
from sklearn.feature_selection import SelectKBest, f_classif, mutual_info_classif, RFECV, SelectFromModel
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier

In [None]:
# evaluation
import sklearn
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import roc_auc_score, classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import precision_score, recall_score, accuracy_score, f1_score, roc_auc_score
from sklearn.metrics import classification_report
from delong import auc_ci

In [None]:
def get_clf_eval(y_test, pred = None, pred_proba = None):
    confusion = confusion_matrix(y_test, pred)
    accuracy = accuracy_score(y_test, pred)
    precision = precision_score(y_test, pred)
    recall = recall_score(y_test, pred)
    f1 = f1_score(y_test, pred)
    
    # ROC_AUC 추가
    roc_auc = roc_auc_score(y_test, pred_proba)
    print('오차 행렬')
    print(confusion)
    
    # ROC_AUC print 추가
    print('정확도: {:.4f}, 정밀도: {:.4f}, 재현율: {:.4f}, F1: {:.4f}, AUC: {:.4f}'.format(accuracy,precision,recall,f1, roc_auc)) 

# Load Excel

In [None]:
reference_df = pd.read_excel("File name", sheet_name = 'sheet name')

In [None]:
reference_df 

In [None]:
reference_df[reference_df['combination'] == 'K12']['TEST_proba'].values[0]

In [None]:
label_info = reference_df[reference_df['combination'] == 'K12']['TEST_test_label'].values[0][1:-1].split(',')
label_info = np.array([int(i.replace(' ', '') ) for i in label_info])

In [None]:
label_info

# TPR & FPR 

In [None]:
np.array([ float(i) for i in reference_df[reference_df['combination'] == 'K12']['TEST_proba'].values[0][1:-1].split(',')])

In [None]:
reference_df 

In [None]:
K12_proba = np.array([ float(i) for i in reference_df[reference_df['combination'] == 'K12']['TEST_proba'].values[0][1:-1].split(',')])
K12_Ve_proba = np.array([ float(i) for i in reference_df[reference_df['combination'] == 'K12+Ve']['TEST_proba'].values[0][1:-1].split(',')])
Conven_proba = np.array([ float(i) for i in reference_df[reference_df['combination'] == 'conven']['TEST_proba'].values[0][1:-1].split(',')])
Ve_proba = np.array([ float(i) for i in reference_df[reference_df['combination'] == 'Ve']['TEST_proba'].values[0][1:-1].split(',')])
Conven_K12_Ve_proba =  np.array([ float(i) for i in reference_df[reference_df['combination'] == 'conven +K12 +Ve']['TEST_proba'].values[0][1:-1].split(',')])

In [None]:
K12_fpr, K12_tpr, K12_thresholds = roc_curve(label_info , K12_proba)
K12_Ve_fpr , K12_Ve_tpr, K12_Ve_thresholds = roc_curve(label_info , K12_Ve_proba)
Conven_fpr , Conven_tpr, Conven_thresholds = roc_curve(label_info , Conven_proba)
Ve_fpr , Ve_tpr, Ve_thresholds = roc_curve(label_info , Ve_proba)
Conven_K12_Ve_fpr , Conven_K12_Ve_tpr, Conven_K12_Ve_thresholds = roc_curve(label_info , Conven_K12_Ve_proba)

In [None]:
roc_auc_score(label_info , K12_proba)

In [None]:
roc_auc_score(label_info , K12_Ve_proba)

In [None]:
roc_auc_score(label_info , Conven_proba)

In [None]:
roc_auc_score(label_info , Ve_proba)

In [None]:
roc_auc_score(label_info , Conven_K12_Ve_proba)

# Draw ROC-AUC Curve

In [None]:
plt.figure(figsize = (10,10))
plt.title("ROC-AUC Curve(Total)", fontsize =20, pad = 20)

plt.plot(K12_fpr , K12_tpr, 'C0-',  label = "K12 : 0.801" ,linewidth = 3)
plt.plot(K12_Ve_fpr , K12_Ve_tpr,  'C2-', label = 'K12 + Ve : 0.774', linewidth = 3)
plt.plot(Conven_fpr , Conven_tpr,  'C3-', label = 'Conv : 0.763', linewidth = 3)
plt.plot(Ve_fpr , Ve_tpr,  'C4-', label = 'Ve : 0.741', linewidth = 3)
plt.plot(Conven_K12_Ve_fpr , Conven_K12_Ve_tpr,  'C5-', label = 'Conv + K12 + Ve : 0.681', linewidth = 3)


# 인자 순서 중요!! 안그러면 오류 뜸...
plt.plot([0,1], [0,1], 'k--')
plt.xlabel('False Positive Rate(FPR)',fontsize =15, labelpad = 15)
plt.ylabel('True Positive Rate(TPR)',fontsize =15, labelpad = 15)
plt.legend(loc ='lower right', fontsize = 13)
plt.grid(False)
plt.xlim(0,1)
plt.ylim(0,1)
# plt.xticks(np.arange(0.1, 1.0, 0.05))
plt.show()

In [None]:
plt.figure(figsize = (10,10))
plt.title("ROC-AUC Curve(Total)", fontsize =20, pad = 20)

plt.plot(K12_fpr , K12_tpr, 'C0-',  label = "Ktrans : 0.801" ,linewidth = 3)
plt.plot(K12_Ve_fpr , K12_Ve_tpr,  'C2-', label = 'Ktrans + Ve : 0.774', linewidth = 3)
plt.plot(Conven_fpr , Conven_tpr,  'C3-', label = 'Conventional : 0.763', linewidth = 3)
plt.plot(Ve_fpr , Ve_tpr,  'C4-', label = 'Ve : 0.741', linewidth = 3)
plt.plot(Conven_K12_Ve_fpr , Conven_K12_Ve_tpr,  'C5-', label = 'Conventional + Ktrans + Ve : 0.681', linewidth = 3)


# 인자 순서 중요!! 안그러면 오류 뜸...
plt.plot([0,1], [0,1], 'k--')
plt.xlabel('False Positive Rate(FPR)',fontsize =15, labelpad = 15)
plt.ylabel('True Positive Rate(TPR)',fontsize =15, labelpad = 15)
plt.legend(loc ='lower right', fontsize = 13)
plt.grid(False)
plt.xlim(0,1)
plt.ylim(0,1)
# plt.xticks(np.arange(0.1, 1.0, 0.05))
plt.show()