In [2]:
import sys
import random
import numpy as np
from numpy import interp
from collections import defaultdict
from sklearn.model_selection import StratifiedKFold
from sklearn import metrics
from sklearn.metrics import precision_recall_curve
from sklearn.metrics import f1_score
from sklearn.metrics import plot_precision_recall_curve
from sklearn.utils import resample
import keras
import tensorflow as tf
from keras.wrappers.scikit_learn import KerasClassifier
from keras import optimizers
from keras.models import model_from_json
import matplotlib.pyplot as plt

In [3]:
##################
# You have to modify file names and path to files as you need.
path_to_data_file = 'PATH/TO/DATA/FILE'
test_index_file = 'PATH/TO/TEST/INDEX'
train_index_file = 'PATH/TO/TRAIN/INDEX'
saved_model_folder = 'PATH/TO/MODEL/SAVED'
performance_file = 'PATH/TO/PERFORMANCE/FILE'
n_seed = 1 # Insert the seed of your best model
##################

whole_data = [line.strip().split('\t') for line in open(path_to_data_file)]

del(whole_data[0])

featDic = {}
data_list_x = []
data_list_y = []

for line in whole_data:
    featDic[line[0]] = list(map(float,line[1:11])) + list(map(int,line[11:13])) + list(map(float,line[13:15])) + list(map(int,line[15]))
    data_list_x.append(featDic[line[0]][:-1])
    data_list_y.append(featDic[line[0]][-1])
    
data_x_bf_st = np.array(data_list_x)
data_x = (data_x_bf_st - np.mean(data_x_bf_st, axis=0))/np.std(data_x_bf_st, axis=0)
data_y = np.array(data_list_y)

In [None]:
seed = n_seed

dict_train = {} # key = model name, value = index
dict_test = {}  # key = model name, value = index
train_index_data = [line.strip().split('\t') for line in open(train_index_file)]
test_index_data = [line.strip().split('\t') for line in open(test_index_file)]

for line in train_index_data:
    dict_train[line[0]] = [int(x) for x in line[1].split(',')]
for line in test_index_data:
    dict_test[line[0]] = [int(x) for x in line[1].split(',')]

In [5]:
mean_fpr = np.linspace(0, 1, 100)

n_iterations = 1000

for k in range(1,4):

    train = dict_train['%i_%i' % (seed, k)]
    test = dict_test['%i_%i' % (seed, k)]
        
    model = keras.models.load_model(saved_model_folder + '/%i_%i_noca.h5' % (seed,k))
    
    n_size_train = len(data_x[train])
    n_size_test = len(data_y[test])
    
    stats = defaultdict(list)
    for i in range(n_iterations):
        train_bootstrap_x, train_bootstrap_y = resample(data_x[train], data_y[train], n_samples=n_size_train)
        test_bootstrap_x, test_bootstrap_y = resample(data_x[test], data_y[test], n_samples=n_size_test)
        
        _ , acc_b = model.evaluate(test_bootstrap_x, test_bootstrap_y)
        predictions_bootstrap = model.predict(test_bootstrap_x)
        
        fpr_b, tpr_b, _ = metrics.roc_curve(test_bootstrap_y, predictions_bootstrap)
        roc_b = metrics.auc(fpr_b, tpr_b)
        
        precision_b, recall_b, _ = precision_recall_curve(test_bootstrap_y, predictions_bootstrap)
        f1_b = f1_score(test_bootstrap_y, predictions_bootstrap.round())
        
        rp_b = metrics.auc(recall_b, precision_b)
        
        stats['acc'].append(acc_b)
        stats['roc'].append(roc_b)
        stats['f1'].append(f1_b)
        stats['rp'].append(rp_b)
    
    alpha=0.95
    p_l = ((1.0-alpha)/2) * 100
    p_u = (alpha + ((1.0 - alpha)/ 2.0)) * 100

    mean_perf = dict()
    lower = dict()
    upper = dict()

    with open(performance_file, 'a') as result:
        perf_list = ['acc', 'roc', 'f1', 'rp']
        for perf in perf_list:
            mean_perf[perf] = sum(stats[perf]) / len(stats[perf])
            lower[perf] = max(0.0, np.percentile(stats[perf],p_l))
            upper[perf] = min(1.0, np.percentile(stats[perf],p_u))
            result.write('\t'.join([perf, str(mean_perf[perf]), str(lower[perf]), str(upper[perf]), str(k)]) + '\n')
        
    k = k + 1