https://gist.github.com/maidens/29939b3383a5e57935491303cf0d8e0b

In [None]:
import os
import time
import warnings
import graphviz
import pydotplus
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from glob import glob


from sklearn.linear_model import SGDClassifier
from sklearn.linear_model import LogisticRegression

from sklearn.svm import SVC

from sklearn.tree import DecisionTreeClassifier
from sklearn.tree import plot_tree
from sklearn.tree import export_graphviz

from sklearn.neighbors import KNeighborsClassifier

from sklearn.model_selection import cross_val_score
from sklearn.model_selection import cross_val_predict
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import cross_validate
from sklearn.model_selection import train_test_split

from sklearn.metrics import confusion_matrix
from sklearn.metrics import precision_score, recall_score, accuracy_score
from sklearn.metrics import f1_score, roc_auc_score
from sklearn.metrics import roc_curve

from sklearn.feature_selection import SelectFromModel
from sklearn.feature_selection import VarianceThreshold
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2, f_classif, mutual_info_classif

from sklearn.ensemble import ExtraTreesClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier

from sklearn.calibration import CalibratedClassifierCV
from sklearn.calibration import calibration_curve

from imblearn.over_sampling import SMOTE

import xgboost as xgb
from xgboost import XGBClassifier

import lightgbm
from lightgbm import LGBMClassifier
from lightgbm import plot_importance

import shap

In [None]:
from __future__ import print_function, division

from math import sqrt

from scipy.special import ndtri

In [None]:
def _proportion_confidence_interval(r, n, z):
    
    A = 2*r + z**2
    B = z*sqrt(z**2 + 4*r*(1 - r/n))
    C = 2*(n + z**2)
    return ((A-B)/C, (A+B)/C)


In [None]:
def sensitivity_and_specificity_with_confidence_intervals(TP, FP, FN, TN, alpha=0.95):
    z = -ndtri((1.0-alpha)/2)
    
    # Compute sensitivity using method described in [1]
    sensitivity_point_estimate = TP/(TP + FN)
    sensitivity_confidence_interval = _proportion_confidence_interval(TP, TP + FN, z)
    
    # Compute specificity using method described in [1]
    specificity_point_estimate = TN/(TN + FP)
    specificity_confidence_interval = _proportion_confidence_interval(TN, TN + FP, z)
    
    return sensitivity_point_estimate, specificity_point_estimate, sensitivity_confidence_interval, specificity_confidence_interval

# Read Excel File

In [None]:
reference_df = pd.read_excel()

In [None]:
reference_df 

# Get Confidence Interval

In [None]:
combination_lst = reference_df['combination'].values

In [None]:
combination_lst

In [None]:
reference_df[reference_df['combination'] == 'K12']

In [None]:
reference_df[reference_df['combination'] == 'K12']['Sensitivity'].values[0]

In [None]:
total = 0
total_df = pd.DataFrame()

for comb in combination_lst:
    total += 1
    print('#{} : {}'.format(total, comb))
    
    # 데이터 뽑아내기
    target_df = reference_df[reference_df['combination'] == comb]
    original_sensitivity = target_df['Sensitivity'].values[0]
    original_specificity = target_df['Specificity'].values[0]
    
    TN = target_df['Optimal_CutOff_Test_TN'].values[0]
    TP = target_df['Optimal_CutOff_Test_TP'].values[0]
    FN = target_df['Optimal_CutOff_Test_FN'].values[0]
    FP = target_df['Optimal_CutOff_Test_FP'].values[0]
    
    print('& Original Info')
    print(original_sensitivity, original_specificity)
    print(TN, FP, FN, TP)
    print()
    
    # 신뢰구간 계산 - Wilson 방식
    sensitivity_point_estimate, specificity_point_estimate, sensitivity_confidence_interval, specificity_confidence_interval = sensitivity_and_specificity_with_confidence_intervals(TP, FP, FN, TN, alpha=0.95)
    
    a = 0.95
    print("Sensitivity: %f, Specificity: %f" %(sensitivity_point_estimate, specificity_point_estimate))
    print("alpha = %f CI for sensitivity:"%a, sensitivity_confidence_interval)
    print("alpha = %f CI for specificity:"%a, specificity_confidence_interval)
    print("")   
    
    # 데이터 프레임 정리
    info_dict = dict()

    info_dict['Model_Combination'] = comb
    info_dict['Sensitivity_wilson'] = [sensitivity_point_estimate]
    info_dict['Sensitivity_upper_wilson'] = [sensitivity_confidence_interval[-1]]
    info_dict['Sensitivity_lower_wilson'] = [sensitivity_confidence_interval[0]]

    info_dict['Specificity_wilson'] = [specificity_point_estimate]
    info_dict['Specificity_upper_wilson'] = [specificity_confidence_interval[-1]]
    info_dict['Specificity_lower_wilson'] = [specificity_confidence_interval[0]]

    info_df = pd.DataFrame(info_dict)

    total_df = pd.concat([total_df, info_df])   

In [None]:
total_df