# Setup

In [1]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from random import randint
import math
from datetime import datetime
import pickle

from sklearn.ensemble import ExtraTreesClassifier
from sklearn import metrics
from sklearn.metrics import roc_curve
from sklearn.model_selection import KFold
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_recall_fscore_support
from sklearn.metrics import classification_report
from sklearn.metrics import precision_recall_curve
from sklearn.metrics import roc_curve
from sklearn.metrics import balanced_accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.metrics import roc_auc_score

In [2]:
os.getcwd()

'E:\\Github\\Stomach-Status-Classification\\Experiment\\Approach\\ExtraTreeClassifier'

In [3]:
for i in range(3):
    os.chdir("..")

In [4]:
os.getcwd()

'E:\\Github\\Stomach-Status-Classification'

In [5]:
main_data_dir = os.getcwd() + "\\Data set"
kmean_data_dir = main_data_dir + "\\kmean_dataset"
kmean_data_10cv_dir = kmean_data_dir + "\\10cv_512"
import glob

kmean_data_10cv_lst = [x for x in glob.glob(kmean_data_10cv_dir + '\\*') if 'orb' in x and 'index' in x and '10000' in x]
print(kmean_data_10cv_lst)

[]


In [6]:
kmean_train_paths = [x for x in kmean_data_10cv_lst if 'train' in x]
kmean_test_paths = [x for x in kmean_data_10cv_lst if 'test' in x]
print(kmean_train_paths)
print()
print(kmean_test_paths)
print(len(kmean_train_paths))
print(len(kmean_test_paths))

[]

[]
0
0


In [7]:
def to_categorical(y, num_classes=None, dtype='float32'):
    """to_categorical _summary_

    Arguments:
        y -- The label of the data set with the shape of [None, 1]

    Keyword Arguments:
        num_classes -- The num_classes in the data set (default: {None})
        dtype -- the type of each element of the label after reshape (default: {'float32'})

    Returns:
        the label of the data set with the shape of [number of samples, number of classes].
    """    
    y = np.array(y, dtype='int')
    input_shape = y.shape
    if input_shape and input_shape[-1] == 1 and len(input_shape) > 1:
        input_shape = tuple(input_shape[:-1])
    y = y.ravel()
    if not num_classes:
        num_classes = np.max(y) + 1
    n = y.shape[0]
    categorical = np.zeros((n, num_classes), dtype=dtype)
    categorical[np.arange(n), y] = 1
    output_shape = input_shape + (num_classes,)
    categorical = np.reshape(categorical, output_shape)
    return categorical

In [8]:
class roc_curve_score:
    def __init__(self, y_true, y_pred, num_class):
        self.y_test = to_categorical(y_true)
        self.y_score = to_categorical(y_pred)
        print(np.unique(self.y_test), np.unique(self.y_score))
        self.fprs = {}
        self.tprs = {}
        self.thresh_holds = {}
        for x in range(num_class):
            self.fprs[x], self.tprs[x], self.thresh_holds[x] = roc_curve(self.y_test[:, x], self.y_score[:, x], drop_intermediate=False)
            self.fprs[x] = self.fprs[x].tolist()
            self.tprs[x] = self.tprs[x].tolist()
            self.thresh_holds[x] = self.thresh_holds[x].tolist()
        
        self.fpr_micro_avg, self.tpr_micro_avg, self.threshold_micro_avg, = roc_curve(self.y_test.ravel(), self.y_score.ravel())
        
        all_fpr = np.unique(np.concatenate([self.fprs[i] for i in range(num_class)]))
        mean_tpr = np.zeros_like(all_fpr)
        for i in range(num_class):
            mean_tpr += np.interp(all_fpr, self.fprs[i], self.tprs[i])
        mean_tpr /= num_class
        self.fpr_macro_avg = all_fpr.tolist()
        self.tpr_macro_avg = mean_tpr.tolist()
    
    def get_tpr(self, _class = None):
        if(_class):
            return self.tprs[_class]
        else:
            return self.tprs
    
    def get_fpr(self, _class = None):
        if(_class):
            return self.fprs[_class]
        else:
            return self.fprs

    def get_thresholds(self, _class = None):
        if(_class):
            return self.thresh_holds[_class]
        else:
            return self.thresh_holds

    def get_roc_dict(self):
        return {
            "tpr" : self.get_tpr(),
            "fpr" : self.get_fpr(),
            "thresholds" : self.get_thresholds(),
            "fpr_micro_avg" : self.fpr_micro_avg.tolist(),
            "tpr_micro_avg" : self.tpr_micro_avg.tolist(),
            "fpr_macro_avg" : self.fpr_macro_avg,
            "tpr_macro_avg" : self.tpr_macro_avg,
        }

In [9]:
class utils:
    def __init__(self, confusion_matrix, y_true, y_pred):
        """
            - confusion_matrix: 2x2 numpy array
            - y_true: array of label
            - y_pred: array of output value calculated by model
            - fold_count: number of folds
        """
        
        # Initilize all indicator
        self.TP = confusion_matrix[0][0] # true positive
        self.FN = confusion_matrix[1][0] # false negative
        self.FP = confusion_matrix[1][0] # false positive
        self.TN = confusion_matrix[1][1] # true negative
        self.precision = self.TP/(self.FN +  self.TP) # Precision Score - Positive Predictive Value
        self.recall = self.TP / (self.TP + self.FN)
        self.f1_score =  (2 * self.TP) / (2 * self.TP + self.FP + self.FN)
        self.sensitivity = self.TP / (self.TP + self.FN) # True Positive Rate
        self.specificity = self.TN / (self.TN + self.FP) # False Positive Rate
        self.negative_predictive_value = self.TN / (self.TN + self.FN) # Negative Predictive Value
        self.false_negative_rate = self.FN / (self.FN + self.TP) # False Negative Rate
        self.false_positive_rate = self.FP / (self.FP + self.TN) # False Positive Rate
        self.false_discovery_rate = self.FP / (self.FP + self.TP) # False Discovery Rate
        self.false_omission_rate = self.FN / (self.FN + self.TN) # False Ommision Rate
        self.positive_likelihood_ratio = self.sensitivity / self.false_positive_rate # Positive Likelihood Ratio
        self.negative_likelihood_ratio = self.false_negative_rate / self.specificity # Negative Likelihood Ratio
        self.prevalence_threshold = math.sqrt(self.false_positive_rate) / (math.sqrt(self.sensitivity) + 
                                                                      math.sqrt(self.false_positive_rate)) # Prevalance Threshold
        self.threat_score = self.TP / (self.TN + self.FN + self.FP) # Threat Score
        self.prevalence = (self.TP + self.FN)/(self.TP + self.FN + self.TN + self.FP) # Prevalance 
        #  Matthews correlation coefficient
        self.matthews_correlation_coefficient = (self.TP*self.TN - self.FN*self.FP) / ((self.TP + self.FP)
                                                                                       *(self.TP + self.FN)
                                                                                       *(self.TN + self.FP)
                                                                                       *(self.TN + self.FN))
        self.fowlkes_mallows_index = math.sqrt(self.sensitivity + self.precision) # Fowlkes–Mallows index
        self.informedness = self.sensitivity + self.specificity - 1 # informedness
        self.markedness = self.precision + self.negative_predictive_value - 1 # markedness
        self.diagnostic_odds_ratio = self.positive_likelihood_ratio / self.negative_likelihood_ratio # Diagnostic odds ratio
        self.accuracy = (self.TP + self.TN) / (self.TP + self.TN + self.FP + self.FN)
        self.balanced_accuracy = (self.sensitivity + self.specificity) / 2
        self.roc_auc_macro = roc_auc_score(y_true, y_pred)
        self.roc_auc_micro = roc_auc_score(y_true, y_pred, average = 'micro')
        self.roc_auc_weighted = roc_auc_score(y_true, y_pred, average = 'weighted')
        self.cls_report = classification_report(y_true, y_pred)
        
        # Initilize the structure of output_dicts
        self.confusion_matrix = {
            "TP" : self.TP,
            "TN" : self.TN,
            "FN" : self.FN,
            "FP" : self.FP,
            "precision" : self.precision,
            "recall" : self.recall,
            "f1_score" : self.f1_score,
            "sensitivity" : self.sensitivity,
            "specificity" : self.specificity,
            "negative_predictive_value" : self.negative_predictive_value,
            "false_negative_rate" : self.false_negative_rate,
            "false_positive_rate" : self.false_positive_rate,
            "false_discovery_rate" : self.false_discovery_rate,
            "false_omission_rate" : self.false_omission_rate,
            "Positive_likelihood_ratio" : self.positive_likelihood_ratio,
            "Negative_likelihood_ratio" : self.negative_likelihood_ratio,
            "prevalence_threshold" : self.prevalence_threshold,
            "threat_score" : self.threat_score,
            "Prevalence" : self.prevalence,
            "Matthews_correlation_coefficient" : self.matthews_correlation_coefficient,
            "Fowlkes_Mallows_index" : self.fowlkes_mallows_index,
            "informedness" : self.informedness,
            "markedness" : self.markedness,
            "Diagnostic_odds_ratio" : self.diagnostic_odds_ratio,
            "accuracy" : self.accuracy,
            "balanced_accuracy" : self.balanced_accuracy
        }
        
        self.roc_auc_score = {
            "Macro": self.roc_auc_macro, 
            "Micro": self.roc_auc_micro,
            "Weight": self.roc_auc_weighted
        }
        
#         self.roc_curve = roc_curve_score(y_true=y_true, y_pred=y_pred, num_class=2).get_roc_dict()
        
        self.sub_dict = {
            "Confusion Matrix" : self.confusion_matrix,
            "ROC_AUC_SCORE" : self.roc_auc_score,
            "Classification Report" : classification_report(y_true.tolist(), y_pred.tolist(), 
                                                            labels = [0, 1], # 0 : Licit, 1 : Illicit
                                                            output_dict = True),
#             "ROC_DRAW" : self.roc_curve
        }
    def get_value(self):
        return self.sub_dict
        
# Test 
y_true = np.array([randint(0,1) for x in range(200)])
y_pred = np.array([randint(0,1) for x in range(200)])
confusion_matrix_test = confusion_matrix(y_true, y_pred)
base_utils = utils(confusion_matrix_test, y_true, y_pred)
base_utils.get_value()

{'Confusion Matrix': {'TP': 49,
  'TN': 49,
  'FN': 61,
  'FP': 61,
  'precision': 0.44545454545454544,
  'recall': 0.44545454545454544,
  'f1_score': 0.44545454545454544,
  'sensitivity': 0.44545454545454544,
  'specificity': 0.44545454545454544,
  'negative_predictive_value': 0.44545454545454544,
  'false_negative_rate': 0.5545454545454546,
  'false_positive_rate': 0.5545454545454546,
  'false_discovery_rate': 0.5545454545454546,
  'false_omission_rate': 0.5545454545454546,
  'Positive_likelihood_ratio': 0.8032786885245901,
  'Negative_likelihood_ratio': 1.2448979591836735,
  'prevalence_threshold': 0.5273543557211182,
  'threat_score': 0.28654970760233917,
  'Prevalence': 0.5,
  'Matthews_correlation_coefficient': -9.015777610818933e-06,
  'Fowlkes_Mallows_index': 0.9438798074485389,
  'informedness': -0.10909090909090913,
  'markedness': -0.10909090909090913,
  'Diagnostic_odds_ratio': 0.6452566514377854,
  'accuracy': 0.44545454545454544,
  'balanced_accuracy': 0.44545454545454544

In [10]:
class Training:
    def __init__(self, fold_count, X, y):
        self.fold_count = fold_count
        self.param_grid = {
            "n_estimators" : [50, 100, 150, 200],
            "criterion" : ["gini", "entropy", "log_loss"],
            "max_features" : ["sqrt", "log2", None],
            "bootstrap" : [True, False],
            "class_weight" : ["balanced", "balanced_subsample", None]
        }
        self.kf = KFold(n_splits=fold_count)
        self.history = {}
        self.X = X
        self.y = y
    
    def get_fold_value(self):
        return self.kf        
        
    def training(self, n_estimators, 
                 criterion, 
                 max_features, 
                 bootstrap, 
                 class_weight, 
                 njob, train_case):
    
        # output_dict initilize
        output_dict = {}
    
        # poiter track the index of fold
        fold_index = 0
    
        for train_index, test_index in zip(self.X, self.y):
            train_df = pd.read_csv(train_index)
            test_df = pd.read_csv(test_index)
            
            print("\tFold: {}".format(fold_index))
            print("\tTRAIN:", train_index, "\n\tTEST:", test_index)
        
            # folding data
            x_train, y_train = train_df.iloc[:, 2:-1], train_df.iloc[:, -1]
            x_test, y_test = test_df.iloc[:, 2:-1], test_df.iloc[:, -1]
    
            # Training
            print("\t\tTraining : {}".format(fold_index), end = " -- ")
            print("Start: {}".format(datetime.now().strftime("%m/%d/%Y, %H:%M:%S")), end=" --- ")
            model = ExtraTreesClassifier(n_estimators = n_estimators, 
                                              criterion = criterion, 
                                              max_features = max_features, 
                                              bootstrap = bootstrap,
                                              class_weight = class_weight, 
                                              n_jobs = njob)
            model.fit(x_train,y_train)
            print("End: {}".format(datetime.now().strftime("%m/%d/%Y, %H:%M:%S")))
        
            # Testing
            print("\t\tValidation: {}".format(fold_index), end = " -- ")
            print("Start: {}".format(datetime.now().strftime("%m/%d/%Y, %H:%M:%S")), end="---")
            y_pred = model.predict(x_test)
            print("End: {}".format(datetime.now().strftime("%m/%d/%Y, %H:%M:%S")))
        
            # Evaluation
            cm = confusion_matrix(y_test, y_pred)
            current_utils = utils(cm, y_test, y_pred)
            output_dict["fold_{}".format(fold_index)] = current_utils.get_value()
            
            fold_index += 1
        print("\n")
        return output_dict 
    
    def train(self, path):
        count = 0
        for x in self.param_grid["n_estimators"]:
            for i in self.param_grid["criterion"]:
                for j in self.param_grid["max_features"]:
                    for k in self.param_grid["bootstrap"]:
                        for l in self.param_grid["class_weight"]:
                            print("Traning Case: {}".format(count))
                            self.history["train_{}".format(count)] = {
                                "param" : {
                                "n_estimators" : x,
                                "criterion" : i,
                                "max_features" : j,
                                "bootstrap" : k,
                                "class_weight" : l
                                },
                                "train_fold" : self.training(x, i, j, k, l, 5, count)
                            }
                            count += 1
        with open(path.format(self.fold_count), 'wb') as f:
            pickle.dump(self.history, f)
        
        return self.history

# Training

In [11]:
training = Training(10, kmean_train_paths, kmean_test_paths)
approach_dir = os.getcwd() + "\\Experiment\\Approach"
svm_dir = approach_dir + "\\ExtraTreeClassifier"
save_result_path = svm_dir + '\\etc_km512_orb100000_result_fold{0}_0.9.pkl'
training.train(path = save_result_path)

Traning Case: 0


Traning Case: 1


Traning Case: 2


Traning Case: 3


Traning Case: 4


Traning Case: 5


Traning Case: 6


Traning Case: 7


Traning Case: 8


Traning Case: 9


Traning Case: 10


Traning Case: 11


Traning Case: 12


Traning Case: 13


Traning Case: 14


Traning Case: 15


Traning Case: 16


Traning Case: 17


Traning Case: 18


Traning Case: 19


Traning Case: 20


Traning Case: 21


Traning Case: 22


Traning Case: 23


Traning Case: 24


Traning Case: 25


Traning Case: 26


Traning Case: 27


Traning Case: 28


Traning Case: 29


Traning Case: 30


Traning Case: 31


Traning Case: 32


Traning Case: 33


Traning Case: 34


Traning Case: 35


Traning Case: 36


Traning Case: 37


Traning Case: 38


Traning Case: 39


Traning Case: 40


Traning Case: 41


Traning Case: 42


Traning Case: 43


Traning Case: 44


Traning Case: 45


Traning Case: 46


Traning Case: 47


Traning Case: 48


Traning Case: 49


Traning Case: 50


Traning Case: 51


Traning Case: 52


Tra

{'train_0': {'param': {'n_estimators': 50,
   'criterion': 'gini',
   'max_features': 'sqrt',
   'bootstrap': True,
   'class_weight': 'balanced'},
  'train_fold': {}},
 'train_1': {'param': {'n_estimators': 50,
   'criterion': 'gini',
   'max_features': 'sqrt',
   'bootstrap': True,
   'class_weight': 'balanced_subsample'},
  'train_fold': {}},
 'train_2': {'param': {'n_estimators': 50,
   'criterion': 'gini',
   'max_features': 'sqrt',
   'bootstrap': True,
   'class_weight': None},
  'train_fold': {}},
 'train_3': {'param': {'n_estimators': 50,
   'criterion': 'gini',
   'max_features': 'sqrt',
   'bootstrap': False,
   'class_weight': 'balanced'},
  'train_fold': {}},
 'train_4': {'param': {'n_estimators': 50,
   'criterion': 'gini',
   'max_features': 'sqrt',
   'bootstrap': False,
   'class_weight': 'balanced_subsample'},
  'train_fold': {}},
 'train_5': {'param': {'n_estimators': 50,
   'criterion': 'gini',
   'max_features': 'sqrt',
   'bootstrap': False,
   'class_weight': Non

# Result Analyzing

In [12]:
file = open(save_result_path.format(10), 'rb')
result = pickle.load(file)
result

{'train_0': {'param': {'n_estimators': 50,
   'criterion': 'gini',
   'max_features': 'sqrt',
   'bootstrap': True,
   'class_weight': 'balanced'},
  'train_fold': {}},
 'train_1': {'param': {'n_estimators': 50,
   'criterion': 'gini',
   'max_features': 'sqrt',
   'bootstrap': True,
   'class_weight': 'balanced_subsample'},
  'train_fold': {}},
 'train_2': {'param': {'n_estimators': 50,
   'criterion': 'gini',
   'max_features': 'sqrt',
   'bootstrap': True,
   'class_weight': None},
  'train_fold': {}},
 'train_3': {'param': {'n_estimators': 50,
   'criterion': 'gini',
   'max_features': 'sqrt',
   'bootstrap': False,
   'class_weight': 'balanced'},
  'train_fold': {}},
 'train_4': {'param': {'n_estimators': 50,
   'criterion': 'gini',
   'max_features': 'sqrt',
   'bootstrap': False,
   'class_weight': 'balanced_subsample'},
  'train_fold': {}},
 'train_5': {'param': {'n_estimators': 50,
   'criterion': 'gini',
   'max_features': 'sqrt',
   'bootstrap': False,
   'class_weight': Non

In [13]:
print("Number of Training: {}".format(len(list(result.keys()))))

Number of Training: 216


In [14]:
training_case_lst = list(result.keys())
print(training_case_lst[0])

train_0


In [15]:
train_sample = result[training_case_lst[0]]
print(train_sample['param'])
print("Number of Fold: {}".format(len(train_sample['train_fold'])))

{'n_estimators': 50, 'criterion': 'gini', 'max_features': 'sqrt', 'bootstrap': True, 'class_weight': 'balanced'}
Number of Fold: 0


In [16]:
fold_sample = train_sample['train_fold']['fold_0']
print(fold_sample['Classification Report'])

KeyError: 'fold_0'

In [None]:
result_analyze_dict = {
    'Train' : [],
    'MACC' : [],
    'MAP' : [],
    'MASens' : [],
    'MASpec' : [],
    'MAF1' : [],
    'AUC' : []
}

for result_key in result:
    result_analyze_dict['Train'].append(result_key)
    train_base = result[result_key]
    fold_base = train_base['train_fold']
    acc_lst, pre_lst, re_lst, spec_lst, f1_lst, auc_lst = [], [], [], [], [], []
    for fold_key in fold_base:
        current_fold = fold_base[fold_key]
        current_fold_cls = current_fold['Classification Report']
        acc_lst.append(current_fold_cls['accuracy'])
        pre_lst.append(current_fold_cls['macro avg']['precision'])
        re_lst.append(current_fold_cls['macro avg']['recall'])
        spec_lst.append(current_fold['Confusion Matrix']['specificity'])
        auc_lst.append(current_fold['ROC_AUC_SCORE']['Macro'])
        f1_lst.append(current_fold_cls['macro avg']['f1-score'])
    result_analyze_dict['MACC'].append(sum(acc_lst)/len(acc_lst))
    result_analyze_dict['MAP'].append(sum(pre_lst)/len(pre_lst))
    result_analyze_dict['MASens'].append(sum(re_lst)/len(re_lst))
    result_analyze_dict['MASpec'].append(sum(spec_lst)/len(spec_lst))
    result_analyze_dict['MAF1'].append(sum(f1_lst)/len(f1_lst))
    result_analyze_dict['AUC'].append(sum(auc_lst)/len(auc_lst))

result_df = pd.DataFrame(result_analyze_dict)
result_df.head()

In [None]:
result_max = result_df.max()
result_max

In [None]:
id_result_max = result_df.iloc[:, 1:].idxmax()
id_result_max

In [None]:
id_max = 'train_{}'.format(id_result_max.mode()[0])
print(id_max)

In [None]:
result_df.loc[result_df['Train'] == id_max]

In [None]:
train_max = result[id_max]
train_max['param']

In [None]:
train_max_fold = train_max['train_fold']
train_max_analyze = {
    'Fold' : [],
    'ACC' : [],
    'AP' : [],
    'ASens' : [],
    'ASpec' : [],
    'AF1' : [],
    'AUC' : []
}

for fold_key in train_max_fold:
    train_max_analyze['Fold'].append(fold_key)
    current_fold = train_max_fold[fold_key]
    current_fold_cls = current_fold['Classification Report']
    train_max_analyze['ACC'].append(current_fold_cls['accuracy'])
    train_max_analyze['AP'].append(current_fold_cls['macro avg']['precision'])
    train_max_analyze['ASens'].append(current_fold_cls['macro avg']['recall'])
    train_max_analyze['ASpec'].append(current_fold['Confusion Matrix']['specificity'])
    train_max_analyze['AF1'].append(current_fold_cls['macro avg']['f1-score'])
    train_max_analyze['AUC'].append(current_fold['ROC_AUC_SCORE']['Macro'])

train_max_analyze_df = pd.DataFrame(train_max_analyze)
train_max_analyze_df

# N_times_K_folds

In [None]:
# Setup

train_max_param = train_max['param']
n_times = 100

nt_kf_output_dict = {
    'Train' : [], # <index>_<fold>
    'ACC' : [],
    'AP' : [],
    'ASens' : [],
    'ASpec' : [],
    'AF1' : [],
    'AUC' : []
}

for n in range(n_times):
    print("Training {}".format(n))
    fold_index = 0
    for train_index, test_index in zip(kmean_train_paths, kmean_test_paths):
        train_df = pd.read_csv(train_index)
        test_df = pd.read_csv(test_index)      
    
        print("\tFold: {}".format(fold_index))
        print("\tTRAIN:", train_index, "\n\tTEST:", test_index)

        # folding data
        x_train, y_train = train_df.iloc[:, 2:-1], train_df.iloc[:, -1]
        x_test, y_test = test_df.iloc[:, 2:-1], test_df.iloc[:, -1]
        
        # Training
        print("\t\tTraining : {}".format(fold_index), end = " -- ")
        print("Start: {}".format(datetime.now().strftime("%m/%d/%Y, %H:%M:%S")), end=" --- ")
        model_RF = ExtraTreesClassifier(n_estimators = train_max_param['n_estimators'], 
                                              criterion = train_max_param['criterion'], 
                                              max_features = train_max_param['max_features'], 
                                              bootstrap = train_max_param['bootstrap'],
                                              class_weight = train_max_param['class_weight'], 
                                              n_jobs = 3)
        model_RF.fit(x_train, y_train)
        print("End: {}".format(datetime.now().strftime("%m/%d/%Y, %H:%M:%S")))

        # Testing
        print("\t\tValidation: {}".format(fold_index), end = " -- ")
        print("Start: {}".format(datetime.now().strftime("%m/%d/%Y, %H:%M:%S")), end="---")
        y_pred = model_RF.predict(x_test)
        print("End: {}".format(datetime.now().strftime("%m/%d/%Y, %H:%M:%S")))
        
        # Evaluation
        cm = confusion_matrix(y_test, y_pred)
        base_result = utils(cm, y_test, y_pred).get_value()
        
        # Result Storing
        nt_kf_output_dict['Train'].append('{0}_{1}'.format(n, fold_index))
        cls = base_result['Classification Report']
        nt_kf_output_dict['ACC'].append(cls['accuracy'])
        nt_kf_output_dict['AP'].append(cls['macro avg']['precision'])
        nt_kf_output_dict['ASens'].append(cls['macro avg']['recall'])
        nt_kf_output_dict['ASpec'].append(base_result['Confusion Matrix']['specificity'])
        nt_kf_output_dict['AF1'].append(cls['macro avg']['f1-score'])
        nt_kf_output_dict['AUC'].append(base_result['ROC_AUC_SCORE']['Macro'])
        
        # Fold index increment
        fold_index += 1

In [None]:
nt_kf_output_df = pd.DataFrame(nt_kf_output_dict)
nt_kf_output_df.head(10)

In [None]:
nt_kf_output_df.mean()

In [None]:
nt_kf_output_df.std(numeric_only=True)

In [None]:
acc_values = nt_kf_output_df['ACC'].values.tolist()
ap_values = nt_kf_output_df['AP'].values.tolist()
asens_values = nt_kf_output_df['ASens'].values.tolist()
aspec_values = nt_kf_output_df['ASpec'].values.tolist()
af1_values = nt_kf_output_df['AF1'].values.tolist()
auc_values = nt_kf_output_df['AUC'].values.tolist()
print(len(acc_values), len(ap_values), len(asens_values), len(aspec_values), len(af1_values), len(auc_values))

In [None]:
import scipy.stats as st

def CI(data, confidence_level, dis_type = 't'):
    if dis_type == 't':
        return st.t.interval(confidence=confidence_level, df=len(data)-1, 
                             loc=np.mean(data), scale=st.sem(data))
    elif dis_type == 'g':
        return st.norm.interval(confidence=confidence_level, loc=np.mean(data), scale=st.sem(data))

def CI_calculator(confidence_level, dis_type = 't'):
    for x in nt_kf_output_df.columns[1:]:
        base = nt_kf_output_df[x].values.tolist()
        print("CI of {}".format(x), CI(base, confidence_level, dis_type))

In [None]:
cl = 0.95

CI_calculator(confidence_level = cl, dis_type = 't')

In [None]:
cl = 0.95

CI_calculator(confidence_level = cl, dis_type = 'g')

In [None]:
cl = 0.99

CI_calculator(confidence_level = cl, dis_type = 'g')