# Importing Library

In [19]:
# TensorFlow and tf.keras
import tensorflow as tf
from tensorflow import keras
from keras import optimizers
from keras import backend as K
from keras.layers import Input, Dense, Dropout, Input, Activation, BatchNormalization
from keras.callbacks import EarlyStopping
from keras.models import Model, load_model, Sequential 
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
K.set_session(tf.Session(config=config))

early_stopping = EarlyStopping(patience=10)

# Helper libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn import metrics
import os
from os import listdir

np.random.seed(777)

print(tf.__version__)

1.10.0


##  Functions library

In [20]:
# prediction
def check_correct(predict, y):
    result = {}
    result['resistant-correct'] = 0
    result['resistant-wrong'] = 0
    result['sensitive-correct'] = 0
    result['sensitive-wrong'] = 0

    for i in range(len(predict)) :
        if predict[i] == y[i] :
            if y[i] == 0 :
                result['sensitive-correct'] += 1
            else :
                result['resistant-correct'] += 1
        else :
            if y[i] == 0 :
                result['sensitive-wrong'] += 1
            else :
                result['resistant-wrong'] += 1

    #for result_k, result_v in result.items():
    #    print(result_k +" : "+ str(result_v))
    sensitivity=result['resistant-correct']/(result['resistant-correct']+result['resistant-wrong'])
    specificity=result['sensitive-correct']/(result['sensitive-correct']+result['sensitive-wrong'])
    #print("Sensitivity :", sensitivity)
    #print("Specificity :", specificity)
    return sensitivity, specificity

In [21]:
# devide raw data into train / test & x_val / y_val
def data_split(raw_data, index_col, test_index):
    
    train_data = raw_data.iloc[list(raw_data.iloc[:,index_col]!=test_index)]
    test_data = raw_data.iloc[list(raw_data.iloc[:,index_col]==test_index)]
    
    y_val = train_data.Platinum_Status
    x_val = train_data.drop(["Platinum_Status","index"],axis=1)
    test_y_val = test_data.Platinum_Status
    test_x_val = test_data.drop(["Platinum_Status","index"],axis=1)
    
    return train_data, test_data, y_val, x_val, test_y_val, test_x_val

    # raw_data: have gene_expressions(maybe multiple columns), index column, Platinum_Status column.


In [22]:
# calculate all of model performance 
# - predictions(probability) / labeled predictions(0/1) / Loss / Accuracy / Sensitivity / Specificity / AUC values of Train / Test dataset.
# using trained models, or you can put predictions(probability) passively(in this case, Loss & Accuracy do not provided.)
def model_performance(information=False, Input_Prediction_Passively=False, using_model=None, tr_predictions=None, ts_predictions=None, tr_x_val=None, tr_y_val=None, ts_x_val=None, ts_y_val=None, output_list=None):
    
    if information == True:            
        print("options model_performance:\n1) using_model: keras models that you want to check performance. \"Input_Prediction_Passive\" option for input prediction list instead using models.\n3) tr_predictions & ts_predictions: prediction input passively. put this data only when not using keras model.\n4) tr_x_val & ts_x_val: input samples of train/test samples.\n4) tr_y_val & ts_y_val: results of train/test samples.\n5) output_list: return values that you want to recieve.\n CAUTION: Essential variable.\n\t tr_loss, tr_accuracy, tr_sensitivity, tr_specificity, tr_predictions, labeled_tr_predictions, tr_predictions_flat, roc_auc_tr,\nts_loss, ts_accuracy, ts_sensitivity, ts_specificity, ts_predictions, labeled_ts_predictions, ts_predictions_flat, roc_auc_ts,\nroc_auc_total\n\n* CAUTION: if 'None' value is returned, please check your input tr inputs(None value for tr outputs) or ts inputs(None value for ts outputs).") 
        return 0
    elif information != False:
        print("for using information options, please set 'information' variable for 'True'")
        return -1
    
    if using_model is None:
        if Input_Prediction_Passively == False:
            print("ERROR: There are no models for using.\nusing \"model_performance(information = True)\" for getting informations of this function.") 
            return -1
        elif (tr_predictions is None) and (ts_predictions is None): # No model/prediction input. no performance should be calculated.
                print("ERROR: Input prediction list instead using saved model.")
                return -1
        else: # No model input, but Input_Prediction_Passively is True & input prediction is valid.
            tr_loss,tr_accuracy= None, None
            ts_loss,ts_accuracy= None, None
            
    elif Input_Prediction_Passively == True: # both of model/prediction putted, could cause confusing.
        ch = input("You put both model and prediction. Select one method:\n'p' for using prediction only, 'm' using models only, 'n' for quit the function.")
        while 1:
            if ch == 'p':
                using_model = None
                break
            elif ch == 'm':
                tr_predictions = None
                ts_predictions = None
                break
            elif ch == 'e':
                return 0
            else:
                print("you put worng option: "+str(ch))
            ch = input("Select one method:\n'p' for using prediction only, 'm' using models only, 'n' for quit the function.")
                
    if output_list is None:
        print("ERROR: There are no output_list for return.\nusing \"model_performance(information = True)\" for getting informations of this function.")
        return -1
    
    if not(tr_x_val is None) and not(tr_y_val is None):
        # predict tr result only when no tr_prediction input
        if tr_predictions is None:
            tr_loss,tr_accuracy= using_model.evaluate(tr_x_val,tr_y_val)
            tr_predictions = using_model.predict(tr_x_val)
        # tr sensitivity / specificity
        labeled_tr_predictions = np.where(tr_predictions > 0.5, 1, 0).flatten()
        tr_sensitivity, tr_specificity = check_correct(labeled_tr_predictions, tr_y_val)
        tr_predictions_flat = tr_predictions[:,0]   
        # roc(tr)
        fpr_tr, tpr_tr, threshold_tr = metrics.roc_curve(tr_y_val, tr_predictions)
        roc_auc_tr = metrics.auc(fpr_tr, tpr_tr)
    
    if not(ts_x_val is None) and not(ts_y_val is None):
        # predict ts result only when no ts_prediction input
        if ts_predictions is None:
            ts_loss,ts_accuracy= using_model.evaluate(ts_x_val,ts_y_val)
            ts_predictions = using_model.predict(ts_x_val)
        labeled_ts_predictions = np.where(ts_predictions > 0.5, 1, 0).flatten()
        ts_sensitivity, ts_specificity = check_correct(labeled_ts_predictions, ts_y_val)
        ts_predictions_flat = ts_predictions[:,0]   
        # roc(ts)
        fpr_ts, tpr_ts, threshold_ts = metrics.roc_curve(ts_y_val, ts_predictions)
        roc_auc_ts = metrics.auc(fpr_ts, tpr_ts)    
    
    if (not(tr_x_val is None) and not(tr_y_val is None)) and (not(ts_x_val is None) and not(ts_y_val is None)):
        y_true = np.append(tr_y_val, ts_y_val)
        y_pred = np.append(tr_predictions, ts_predictions)
        fpr_total, tpr_total, threshold_total = metrics.roc_curve(y_true, y_pred)
        roc_auc_total = metrics.auc(fpr_total, tpr_total)
        
        
    return_list = []
    
    for output in output_list:
        
        if(output == "tr_loss"):
            return_list.append(tr_loss)
                               
        elif(output == "tr_accuracy"):
            return_list.append(tr_accuracy)
                               
        elif(output == "tr_sensitivity"):
            return_list.append(tr_sensitivity)
                               
        elif(output == "tr_specificity"):
            return_list.append(tr_specificity)
                               
        elif(output == "tr_predictions"):
            return_list.append(tr_predictions)
                               
        elif(output == "labeled_tr_predictions"):
            return_list.append(labeled_tr_predictions)
                               
        elif(output == "tr_predictions_flat"):
            return_list.append(tr_predictions_flat)
            
        elif(output == "roc_auc_tr"):
            return_list.append(roc_auc_tr)

        elif(output == "ts_loss"):
            return_list.append(ts_loss)
                               
        elif(output == "ts_accuracy"):
            return_list.append(ts_accuracy)
                               
        elif(output == "ts_sensitivity"):
            return_list.append(ts_sensitivity)
                               
        elif(output == "ts_specificity"):
            return_list.append(ts_specificity)
                               
        elif(output == "ts_predictions"):
            return_list.append(ts_predictions)
                               
        elif(output == "labeled_ts_predictions"):
            return_list.append(labeled_ts_predictions)
                               
        elif(output == "ts_predictions_flat"):
            return_list.append(ts_predictions_flat)
        
        elif(output == "roc_auc_ts"):
            return_list.append(roc_auc_ts)
            
        elif(output == "roc_auc_total"):
            return_list.append(roc_auc_total)
                               
        else:
            print("There are no options <"+str(output)+">. Please refer these output options:\ntr_loss, tr_accuracy, tr_sensitivity, tr_specificity, tr_predictions, labeled_tr_predictions, tr_predictions_flat, roc_auc_tr,\nts_loss, ts_accuracy, ts_sensitivity, ts_specificity, ts_predictions, labeled_ts_predictions, ts_predictions_flat, roc_auc_ts,\nroc_auc_total")
    
    return return_list

In [60]:
# Coverage algorithm
def ensemble_coverage(inputModels,x,y):
    
    outputModels = []
    modelInfo = []
    coverageTotal= [False]*len(y)
    
    for i in range(len(inputModels)):
        m = inputModels[i]
        yHat = m.predict(x[i])
        yHat = [round(i) for [i] in yHat]
        
        loss, acc = m.evaluate(x[i],y[i])
        modelInfo.append((m,yHat,acc))
        print(yHat[0:10])
        print(acc)
    
    modelInfo.sort(key=lambda x : x[2],reverse=True)
    print(modelInfo)
    
    i = 0
    y = y[i]
    for m,yHat,acc in modelInfo:
        beforeCoverage = sum(coverageTotal)
        coverage = [a == b for a,b in zip(y,yHat)]
        coverageTotal = [a or b for a,b in zip(coverageTotal,coverage)]
        afterCoverage = sum(coverageTotal)
        
        print(afterCoverage/len(y))
        
        if afterCoverage > beforeCoverage:
            outputModels.append(m)
            print("Increased Coverage : model added!")
        else:
            print("Same Coverage : model not added")
        if afterCoverage == len(y):
            print("Fully Covered!")
            break
        
        i=i+1
                
    return outputModels

In [61]:
#e_models_select = ensemble_coverage(model_list,inter_dataset["tr_x_val"],inter_dataset["tr_y_val"])

[0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0]
1.0
[0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0]
0.9672131147540983
[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0]
0.9262295033110947
[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0]
0.8852459035935949
[(<keras.engine.training.Model object at 0x00000236BEE59278>, [0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0], 1.0), (<keras.engine.training.Model object at 0x00000236BEE59

# 1. Preparation: import & preprocessing data + import module

## Input path & name of models / raw data for ensemble

In [191]:
# change model_path & each model_name.
# Caution: If you want to change input models, you also have to change selected data types.

# ex) if you want to put these models: two CV, one Annot_3000,  one Var, one new_Diff, one Clin.

'''

m_1_name = CV_400_1.h5
m_2_name = CV_400_2.h5
m_3_name = Annot_3000_400_1.h5
m_4_name = Var_400_0.h5
m_5_name = new_Diff_400_2.h5
m_6_name = Clin_400_1.h5
--> if you change this part,

select_types = [types[1], # "inter_by_names_CV_400"
                types[1], # "inter_by_names_CV_400"
                types[0], # "inter_by_names_Annotation3000_400"
                types[2], # "inter_by_names_Var_400"
                types[3], # "inter_by_names_new_Diff_400"
                types[4]] # "inter_by_names_Clin"
--> you also have to change this part.

'''

types = ["OV_six_fold_Annotation3000_400", 
         "OV_six_fold_CV_400", 
         "OV_six_fold_Var_400", "OV_six_fold_new_Diff_400",
         "OV_six_fold_Clin", 
         "OV_six_fold_SNV" 
         ]

ch = input("test index: ")
ts_i = int(ch)

# input model path & ensemble data(Transcriptome, Cinical Information, Somatic Mutation data)
# data path(server): /home/tjahn/TCGA_Ovary/01.Data/DNN/TC_intersect_subsamples_by_names 
model_path = "C:/test/temp/test_"+str(ts_i)+"/"
path = "C:/test/TC_six_fold_subsamples/"
save_model_path = "C:/test/temp/model/"
save_prediction_path = "C:/test/temp/predictions/"

model_names = []
model_index = []
files = os.listdir(model_path)

for f in files:
    ext= os.path.splitext(f)[-1]
    if ext == ".h5":
        model_names.append(f)
        ind = int(f.split("_")[1].split("-")[0])
        model_index.append(ind)

test index: 2


In [192]:
t = 0
for model_i in model_index:
    print(model_names[t])
    print(types[model_i]+"\n")
    t = t+1

m_1-2_16.h5
OV_six_fold_CV_400

m_1-2_6.h5
OV_six_fold_CV_400

m_3-2.h5
OV_six_fold_new_Diff_400

m_4-2_56.h5
OV_six_fold_Clin

m_4-2_58.h5
OV_six_fold_Clin



#### model_names

## Import Data

In [193]:
file_1 = path+types[0]+".csv"
file_2 = path+types[1]+".csv"
file_3 = path+types[2]+".csv"
file_4 = path+types[3]+".csv"
file_5 = path+types[4]+".csv"
file_6 = path+types[5]+".csv"

idx_col = 0

full_data_1 = pd.read_csv(file_1,index_col=idx_col)
full_data_2 = pd.read_csv(file_2,index_col=idx_col)
full_data_3 = pd.read_csv(file_3,index_col=idx_col)
full_data_4 = pd.read_csv(file_4,index_col=idx_col)
full_data_5 = pd.read_csv(file_5,index_col=idx_col)
full_data_6 = pd.read_csv(file_6,index_col=idx_col)

inter_data_1 = full_data_1.iloc[list(full_data_1.iloc[:,-1]!=6)]
inter_data_2 = full_data_2.iloc[list(full_data_2.iloc[:,-1]!=6)]
inter_data_3 = full_data_3.iloc[list(full_data_3.iloc[:,-1]!=6)]
inter_data_4 = full_data_4.iloc[list(full_data_4.iloc[:,-1]!=6)]
inter_data_5 = full_data_5.iloc[list(full_data_5.iloc[:,-1]!=6)]
inter_data_6 = full_data_6.iloc[list(full_data_6.iloc[:,-1]!=6)]

full_ds_list = [full_data_1, full_data_2, full_data_3, full_data_4, full_data_5, full_data_6]
inter_ds_list = [inter_data_1, inter_data_2, inter_data_3, inter_data_4, inter_data_5, inter_data_6]

# Split Train Test Data & Make full & inter dataset

full_dataset = {"tr_data":[], "ts_data":[], "tr_y_val":[], "tr_x_val":[], "ts_y_val":[], "ts_x_val":[]}
inter_dataset = {"tr_data":[], "ts_data":[], "tr_y_val":[], "tr_x_val":[], "ts_y_val":[], "ts_x_val":[]}

print("############### test index is ["+str(ts_i)+"] ###############\n\n")
for m in range(len(model_index)):
    print(model_index[m])
    full_tr_data, full_ts_data, full_tr_y_val, full_tr_x_val, full_ts_y_val, full_ts_x_val = data_split(raw_data = full_ds_list[model_index[m]], index_col = -1, test_index = ts_i)
    print("["+str(m)+"]: "+model_names[m]+" for type: "+types[model_index[m]]+".\n full tr & ts: "+str(full_tr_x_val.shape)+", "+str(full_ts_x_val.shape)+"\n")
    full_dataset['tr_data'].append(full_tr_data)
    full_dataset['ts_data'].append(full_ts_data)
    full_dataset['tr_x_val'].append(full_tr_x_val)
    full_dataset['tr_y_val'].append(full_tr_y_val)
    full_dataset['ts_x_val'].append(full_ts_x_val)
    full_dataset['ts_y_val'].append(full_ts_y_val)  
    inter_tr_data, inter_ts_data, inter_tr_y_val, inter_tr_x_val, inter_ts_y_val, inter_ts_x_val = data_split(raw_data = inter_ds_list[model_index[m]], index_col = -1, test_index = ts_i)
    inter_dataset['tr_data'].append(inter_tr_data)
    inter_dataset['ts_data'].append(inter_ts_data)
    inter_dataset['tr_x_val'].append(inter_tr_x_val)
    inter_dataset['tr_y_val'].append(inter_tr_y_val)
    inter_dataset['ts_x_val'].append(inter_ts_x_val)
    inter_dataset['ts_y_val'].append(inter_ts_y_val)     

############### test index is [2] ###############


1
[0]: m_1-2_16.h5 for type: OV_six_fold_CV_400.
 full tr & ts: (186, 400), (31, 400)

1
[1]: m_1-2_6.h5 for type: OV_six_fold_CV_400.
 full tr & ts: (186, 400), (31, 400)

3
[2]: m_3-2.h5 for type: OV_six_fold_new_Diff_400.
 full tr & ts: (186, 400), (31, 400)

4
[3]: m_4-2_56.h5 for type: OV_six_fold_Clin.
 full tr & ts: (256, 35), (31, 35)

4
[4]: m_4-2_58.h5 for type: OV_six_fold_Clin.
 full tr & ts: (256, 35), (31, 35)



## Import separate models & evaluation

In [194]:
# model load & evaluation. <model_n_l> is full-layer model, <model_n_l_new> is without-sigmoid-layer model.
'''
Each model's tr_accuracy can be differ to original model, but ts_accuracy should be same to original tested models.
Because we using full-size data(about 200 patients data used Transcriptome, Clinical, SNV models.) for train each models.
In contrast, in this code, we using ensemble-input data(intersected 153 patients).
For-training-patients may be different in ensemble data and whole size data, but for-test-patients are the same.
'''

model_list = []
model_output_list = {"tr_accuracy":[], "tr_sensitivity":[], "tr_specificity":[], "tr_predictions":[],
                 "labeled_tr_predictions":[], "tr_predictions_flat":[], "roc_auc_tr":[], 
                 "ts_accuracy":[], "ts_sensitivity":[], "ts_specificity":[], "ts_predictions":[],
                 "labeled_ts_predictions":[], "ts_predictions_flat":[], "roc_auc_ts":[], 
                 "roc_auc_total":[], "tr_result":[], "ts_result":[]}
tr_predictions = []
ts_predictions = []

for m in range(len(model_names)):
    
    model_l = load_model(model_path+model_names[m])
    model_list.append(model_l)
    output_list = output_list = model_performance(
        information = False, using_model=model_l,Input_Prediction_Passively = False, 
        tr_x_val=inter_dataset['tr_x_val'][m], tr_y_val=inter_dataset['tr_y_val'][m], ts_x_val=inter_dataset['ts_x_val'][m], ts_y_val=inter_dataset['ts_y_val'][m],
        output_list=["tr_accuracy", "tr_sensitivity", "tr_specificity", "tr_predictions",
                     "labeled_tr_predictions", "tr_predictions_flat", "roc_auc_tr", 
                     "ts_accuracy", "ts_sensitivity", "ts_specificity", "ts_predictions",
                     "labeled_ts_predictions", "ts_predictions_flat", "roc_auc_ts", 
                     "roc_auc_total"])
    m_tr_accuracy, m_tr_sensitivity, m_tr_specificity, m_tr_predictions, m_labeled_tr_predictions, m_tr_predictions_flat, m_roc_auc_tr, m_ts_accuracy, m_ts_sensitivity, m_ts_specificity, m_ts_predictions,m_labeled_ts_predictions, m_ts_predictions_flat, m_roc_auc_ts, m_roc_auc_total = output_list
    print("\nmodel: "+model_names[m])
    print("tr & ts for inter data: "+str(m_tr_accuracy)+", "+str(m_ts_accuracy)+"\n")
    
    model_l_new = Model(inputs = model_l.input, outputs=model_l.get_layer(model_l.layers[-2].name).output)
    m_tr_result = model_l_new.predict([inter_dataset['tr_x_val'][m]])
    m_ts_result = model_l_new.predict([inter_dataset['ts_x_val'][m]])
    
    model_output_list["tr_accuracy"].append(m_tr_accuracy)
    model_output_list["tr_sensitivity"].append(m_tr_sensitivity)
    model_output_list["tr_specificity"].append(m_tr_specificity)
    model_output_list["ts_accuracy"].append(m_ts_accuracy)
    model_output_list["ts_sensitivity"].append(m_ts_sensitivity)
    model_output_list["ts_specificity"].append(m_ts_specificity)
    model_output_list["tr_result"].append(m_tr_result)
    
    model_output_list["tr_predictions"].append(m_tr_predictions)
    model_output_list["labeled_tr_predictions"].append(m_labeled_tr_predictions)
    model_output_list["tr_predictions_flat"].append(m_tr_predictions_flat)
    model_output_list["roc_auc_tr"].append(m_roc_auc_tr)
    model_output_list["ts_predictions"].append(m_ts_predictions)
    model_output_list["labeled_ts_predictions"].append(m_labeled_ts_predictions)
    model_output_list["ts_predictions_flat"].append(m_ts_predictions_flat)
    model_output_list["roc_auc_ts"].append(m_roc_auc_ts)
    model_output_list["ts_result"].append(m_ts_result)
    
    model_output_list["roc_auc_total"].append(m_roc_auc_total)  
    
    tr_predictions.append(m_tr_predictions)
    ts_predictions.append(m_ts_predictions)


model: m_1-2_16.h5
tr & ts for inter data: 1.0, 0.8064516186714172


model: m_1-2_6.h5
tr & ts for inter data: 0.9918032786885246, 0.774193525314331


model: m_3-2.h5
tr & ts for inter data: 1.0, 0.8709677457809448


model: m_4-2_56.h5
tr & ts for inter data: 0.9836065573770492, 0.774193525314331


model: m_4-2_58.h5
tr & ts for inter data: 1.0, 0.774193525314331



### Evaluating seperate model's performance

In [195]:
for m in range(len(model_names)):
    print("#### "+model_names[m]+" ####")
    print("types: "+types[model_index[m]])
    print("tr: "+str(model_output_list["tr_accuracy"][m])+", ts: "+str(model_output_list["ts_accuracy"][m])+"\n")

#### m_1-2_16.h5 ####
types: OV_six_fold_CV_400
tr: 1.0, ts: 0.8064516186714172

#### m_1-2_6.h5 ####
types: OV_six_fold_CV_400
tr: 0.9918032786885246, ts: 0.774193525314331

#### m_3-2.h5 ####
types: OV_six_fold_new_Diff_400
tr: 1.0, ts: 0.8709677457809448

#### m_4-2_56.h5 ####
types: OV_six_fold_Clin
tr: 0.9836065573770492, ts: 0.774193525314331

#### m_4-2_58.h5 ####
types: OV_six_fold_Clin
tr: 1.0, ts: 0.774193525314331



# 2. Modeling Ensemble model

### Select models

In [196]:
# select models for ensemble among loaded models.
# CAUTION: Duplication(ex: select = [1, 1, 1, 3, 5]) is allowed, but it is same models, and have same predictions. They have same opinions.

select = [1, 2, 2, 2, 4]
print(select)

[1, 2, 2, 2, 4]


## 1) DNN-Combiner Ensmeble

### Ensemble Input listup

In [197]:
m_tr_predictions_select = []
m_ts_predictions_select = []   

for i in range(len(select)):
    m_tr_predictions_select.append(model_output_list["tr_predictions"][select[i]-1])
    m_ts_predictions_select.append(model_output_list["ts_predictions"][select[i]-1])
    #print(m_tr_predictions[select[i]-1].shape)
    
em_tr_x_val = np.concatenate(m_tr_predictions_select, axis=1)
em_ts_x_val = np.concatenate(m_ts_predictions_select, axis=1)

tr_y_val = inter_dataset["tr_y_val"][0]
ts_y_val = inter_dataset["ts_y_val"][0]

In [198]:
print(em_tr_x_val.shape)
print(em_ts_x_val.shape)

(122, 5)
(31, 5)


In [199]:
print("################################## DNN em ##################################")
print("select: "+str(select))
for select_i in select:
    print("\n"+types[model_index[select_i-1]])
    print(model_names[select_i-1])

    
print("#############################################################################################")

# 1) parameter setting
em_adam = optimizers.Adam(lr=0.05)                                   
em_input_drop_out = 0.3
em_drop_out = 0
em_batch_size = 5
em_BN = True                           

em_layers = [10]
em_tr_loss_best = 100 # for saving best loss value 
em_best_model=[] #for saving best model
count=0 # for early stopping

# 2) model build
em_input = Input(shape=(len(select),))
em_dp = Dropout(em_input_drop_out)(em_input)
for l in layers:
    if em_BN == True:
        em_m = Dense(l)(em_dp)
        em_bn = BatchNormalization(axis=1, epsilon=0.001, center=True, scale=True, beta_initializer='zeros', gamma_initializer='ones')(em_m)
        em_dp = Activation("relu")(em_bn)
    else:
        em_m = Dense(l,activation='relu')(em_dp)
        em_dp = Dropout(drop_out_m)(em_m)

em_final = em_dp
em_output = Dense(1, activation="sigmoid")(em_final)
em_model = Model(inputs=em_input,outputs=em_output)
em_model.compile(optimizer=em_adam, 
                loss='binary_crossentropy',
                metrics=['accuracy'])

# 3) Training: if no increase of tr_loss three times, stop training.
while 1:
    em_model.fit(em_tr_x_val, tr_y_val, batch_size=em_batch_size, nb_epoch=1, verbose = 0)
    em_tr_loss=em_model.evaluate( em_tr_x_val, tr_y_val)[0]
    if em_tr_loss < em_tr_loss_best: # new best model. count reset.
        em_tr_loss_best = em_tr_loss
        count=0
        em_best_model = em_model
    if count>10: # no increase three time. stop.
        em_model = em_best_model
        break
    else: count=count+1
print("Model em" +"-"+str(ts_i)+" trained.")

# 4) save model
em_model.save(save_model_path+"/m_em-"+str(ts_i)+".h5")

################################## DNN em ##################################
select: [1, 2, 2, 2, 4]

OV_six_fold_CV_400
m_1-2_16.h5

OV_six_fold_CV_400
m_1-2_6.h5

OV_six_fold_CV_400
m_1-2_6.h5

OV_six_fold_CV_400
m_1-2_6.h5

OV_six_fold_Clin
m_4-2_56.h5
#############################################################################################




Model em-2 trained.


### Evaluating _DNN Combiner_ ensemble model

In [200]:
em_output_list = model_performance(
    information = False, using_model=em_model,Input_Prediction_Passively = False, 
    tr_x_val=em_tr_x_val, tr_y_val=tr_y_val, ts_x_val=em_ts_x_val, ts_y_val=ts_y_val,
    output_list=["tr_loss", "tr_accuracy", "tr_sensitivity", "tr_specificity", "tr_predictions",
                 "labeled_tr_predictions", "tr_predictions_flat", "roc_auc_tr", 
                 "ts_loss", "ts_accuracy", "ts_sensitivity", "ts_specificity", "ts_predictions",
                 "labeled_ts_predictions", "ts_predictions_flat", "roc_auc_ts", 
                 "roc_auc_total"])

em_tr_loss, em_tr_accuracy, em_tr_sensitivity, em_tr_specificity, em_tr_predictions, em_labeled_tr_predictions, em_tr_predictions_flat, em_roc_auc_tr, em_ts_loss, em_ts_accuracy, em_ts_sensitivity, em_ts_specificity, em_ts_predictions,em_labeled_ts_predictions, em_ts_predictions_flat, em_roc_auc_ts, em_roc_auc_total = em_output_list

print("Overall AUC: ", em_roc_auc_total)
print("Train AUC: ", em_roc_auc_tr)
print("Test AUC: ", em_roc_auc_ts)

print("Train Accuracy: {}".format(em_tr_accuracy))
print("Train Sensitivities & Specificities : "+str(em_tr_sensitivity)+", "+str(em_tr_specificity))
print("Test Accuracy: {}".format(em_ts_accuracy))
print("Test Sensitivities & Specificities : "+str(em_ts_sensitivity)+", "+str(em_ts_specificity))

Overall AUC:  0.9879492600422832
Train AUC:  1.0
Test AUC:  0.8383838383838383
Train Accuracy: 0.9918032786885246
Train Sensitivities & Specificities : 1.0, 0.9886363636363636
Test Accuracy: 0.7419354915618896
Test Sensitivities & Specificities : 0.7777777777777778, 0.7272727272727273


In [201]:
# save prediction result.

tr_df_em = pd.DataFrame(data={"patient":list(inter_dataset["tr_data"][0].index), "hypothesis 1": list(em_tr_predictions_flat), 
                        "prediction":list(em_labeled_tr_predictions), "Platinum_Status":list(tr_y_val)})
tr_df_em.to_csv(save_prediction_path+"m_em-"+str(ts_i)+"_tr.csv", index=False, header=True, columns = ["patient", "hypothesis 1", "prediction", "Platinum_Status"])

ts_df_em = pd.DataFrame(data={"patient":list(inter_dataset["ts_data"][0].index), "hypothesis 1": list(em_ts_predictions_flat), 
                        "prediction":list(em_labeled_ts_predictions), "Platinum_Status":list(ts_y_val)})
ts_df_em.to_csv(save_prediction_path+"m_em-"+str(ts_i)+"_ts.csv", index=False, header=True, columns = ["patient", "hypothesis 1", "prediction", "Platinum_Status"])


## 2) Mean Ensemble

### Evaluating _mean_ ensemble model

In [202]:
mean_em_tr_predictions=sum(m_tr_predictions_select)/len(select)
mean_em_ts_predictions=sum(m_ts_predictions_select)/len(select)

mean_em_output_list = model_performance(
    information = False, using_model=None,Input_Prediction_Passively = True, 
    tr_predictions=mean_em_tr_predictions, ts_predictions=mean_em_ts_predictions, 
    tr_x_val=em_tr_x_val, tr_y_val=tr_y_val, ts_x_val=em_ts_x_val, ts_y_val=ts_y_val,
    output_list=["tr_sensitivity", "tr_specificity",
                 "labeled_tr_predictions", "tr_predictions_flat", "roc_auc_tr", 
                 "ts_sensitivity", "ts_specificity",
                 "labeled_ts_predictions", "ts_predictions_flat", "roc_auc_ts", 
                 "roc_auc_total"])
mean_em_tr_sensitivity, mean_em_tr_specificity,  mean_em_labeled_tr_predictions, mean_em_tr_predictions_flat, mean_em_roc_auc_tr, mean_em_ts_sensitivity, mean_em_ts_specificity, mean_em_labeled_ts_predictions, mean_em_ts_predictions_flat, mean_em_roc_auc_ts, mean_em_roc_auc_total = mean_em_output_list

mean_em_tr_accuracy = sum(mean_em_labeled_tr_predictions==tr_y_val.values)/len(tr_y_val)
mean_em_ts_accuracy = sum(mean_em_labeled_ts_predictions==ts_y_val.values)/len(ts_y_val)

print("Overall AUC: ", mean_em_roc_auc_total)
print("Train AUC: ", mean_em_roc_auc_tr)
print("Test AUC: ", mean_em_roc_auc_ts)

print("Train Accuracy: {}".format(mean_em_tr_accuracy))
print("Train Sensitivities & Specificities : "+str(mean_em_tr_sensitivity)+", "+str(mean_em_tr_specificity))
print("Test Accuracy: {}".format(mean_em_ts_accuracy))
print("Test Sensitivities & Specificities : "+str(mean_em_ts_sensitivity)+", "+str(mean_em_ts_specificity))

Overall AUC:  0.9826638477801268
Train AUC:  1.0
Test AUC:  0.8080808080808081
Train Accuracy: 1.0
Train Sensitivities & Specificities : 1.0, 1.0
Test Accuracy: 0.7419354838709677
Test Sensitivities & Specificities : 0.6666666666666666, 0.7727272727272727


In [203]:
# save prediction result.

tr_df_em = pd.DataFrame(data={"patient":list(inter_dataset["tr_data"][0].index), "hypothesis 1": list(mean_em_tr_predictions_flat), 
                        "prediction":list(mean_em_labeled_tr_predictions), "Platinum_Status":list(tr_y_val)})
tr_df_em.to_csv(save_prediction_path+"m_mean-"+str(ts_i)+"_tr.csv", index=False, header=True, columns = ["patient", "hypothesis 1", "prediction", "Platinum_Status"])

ts_df_em = pd.DataFrame(data={"patient":list(inter_dataset["ts_data"][0].index), "hypothesis 1": list(mean_em_ts_predictions_flat), 
                        "prediction":list(mean_em_labeled_ts_predictions), "Platinum_Status":list(ts_y_val)})
ts_df_em.to_csv(save_prediction_path+"m_mean-"+str(ts_i)+"_ts.csv", index=False, header=True, columns = ["patient", "hypothesis 1", "prediction", "Platinum_Status"])

## 3) Transferred Ensemble Modeling 

### Making new input data for t-ensemble

In [204]:
m_tr_result_select = []
m_ts_result_select = []

for i in range(len(select)):
    m_tr_result_select.append(model_output_list["tr_result"][select[i]-1])
    m_ts_result_select.append(model_output_list["ts_result"][select[i]-1])

t_em_tr_x_val = np.concatenate(m_tr_result_select, axis=1)
t_em_ts_x_val = np.concatenate(m_ts_result_select, axis=1)
print("\n############################################### t-em x val merged. ###############################################\n")
print(t_em_tr_x_val.shape)
print(t_em_ts_x_val.shape)


############################################### t-em x val merged. ###############################################

(122, 450)
(31, 450)


### Modeling t-ensemble  

In [205]:
print("################################## Transferred em ##################################")
print("select: "+str(select))
for select_i in select:
    print("\n"+types[model_index[select_i-1]])
    print(model_names[select_i-1])

    
print("#############################################################################################")

# 1) parameter setting
t_em_adam = optimizers.Adam(lr=0.05)                                   
t_em_input_drop_out = 0.3
t_em_drop_out = 0
t_em_batch_size = 5
t_em_BN = True                           

t_em_layers = [10]
t_em_tr_loss_best = 100 # for saving best loss value 
t_em_best_model=[] #for saving best model
count=0 # for early stopping

# 2) model build
t_em_input = Input(shape=(t_em_ts_x_val.shape[1],))
t_em_dp = Dropout(t_em_input_drop_out)(t_em_input)
for l in layers:
    if t_em_BN == True:
        t_em_m = Dense(l)(t_em_dp)
        t_em_bn = BatchNormalization(axis=1, epsilon=0.001, center=True, scale=True, beta_initializer='zeros', gamma_initializer='ones')(t_em_m)
        t_em_dp = Activation("relu")(t_em_bn)
    else:
        t_em_m = Dense(l,activation='relu')(t_em_dp)
        t_em_dp = Dropout(drop_out_m)(t_em_m)

t_em_final = t_em_dp
t_em_output = Dense(1, activation="sigmoid")(t_em_final)
t_em_model = Model(inputs=t_em_input,outputs=t_em_output)
t_em_model.compile(optimizer=t_em_adam, 
                loss='binary_crossentropy',
                metrics=['accuracy'])

# 3) Training: if no increase of tr_loss three times, stop training.
while 1:
    t_em_model.fit(t_em_tr_x_val, tr_y_val, batch_size=t_em_batch_size, nb_epoch=1, verbose = 0)
    t_em_tr_loss=t_em_model.evaluate( t_em_tr_x_val, tr_y_val)[0]
    if t_em_tr_loss < t_em_tr_loss_best: # new best model. count reset.
        t_em_tr_loss_best = t_em_tr_loss
        count=0
        t_em_best_model = t_em_model
    if count>10: # no increase three time. stop.
        t_em_model = t_em_best_model
        break
    else: count=count+1
        
print("Model t-em" +"-"+str(ts_i)+" trained.")

# 4) save model
em_model.save(save_model_path+"/m_t-em-"+str(ts_i)+".h5")

################################## Transferred em ##################################
select: [1, 2, 2, 2, 4]

OV_six_fold_CV_400
m_1-2_16.h5

OV_six_fold_CV_400
m_1-2_6.h5

OV_six_fold_CV_400
m_1-2_6.h5

OV_six_fold_CV_400
m_1-2_6.h5

OV_six_fold_Clin
m_4-2_56.h5
#############################################################################################




Model t-em-2 trained.


### Evaluating t-ensemble

In [206]:
t_em_output_list = model_performance(
    information = False, using_model=t_em_model,Input_Prediction_Passively = False, 
    tr_x_val=t_em_tr_x_val, tr_y_val=tr_y_val, ts_x_val=t_em_ts_x_val, ts_y_val=ts_y_val,
    output_list=["tr_loss", "tr_accuracy", "tr_sensitivity", "tr_specificity", "tr_predictions",
                 "labeled_tr_predictions", "tr_predictions_flat", "roc_auc_tr", 
                 "ts_loss", "ts_accuracy", "ts_sensitivity", "ts_specificity", "ts_predictions",
                 "labeled_ts_predictions", "ts_predictions_flat", "roc_auc_ts", 
                 "roc_auc_total"])

t_em_tr_loss, t_em_tr_accuracy, t_em_tr_sensitivity, t_em_tr_specificity, t_em_tr_predictions, t_em_labeled_tr_predictions, t_em_tr_predictions_flat, t_em_roc_auc_tr, t_em_ts_loss, t_em_ts_accuracy, t_em_ts_sensitivity, t_em_ts_specificity, t_em_ts_predictions,t_em_labeled_ts_predictions, t_em_ts_predictions_flat, t_em_roc_auc_ts, t_em_roc_auc_total = t_em_output_list

print("Overall AUC: ", t_em_roc_auc_total)
print("Train AUC: ", t_em_roc_auc_tr)
print("Test AUC: ", t_em_roc_auc_ts)

print("Train Accuracy: {}".format(t_em_tr_accuracy))
print("Train Sensitivities & Specificities : "+str(t_em_tr_sensitivity)+", "+str(t_em_tr_specificity))
print("Test Accuracy: {}".format(t_em_ts_accuracy))
print("Test Sensitivities & Specificities : "+str(t_em_ts_sensitivity)+", "+str(t_em_ts_specificity))

Overall AUC:  0.9808668076109937
Train AUC:  1.0
Test AUC:  0.8207070707070707
Train Accuracy: 1.0
Train Sensitivities & Specificities : 1.0, 1.0
Test Accuracy: 0.7419354915618896
Test Sensitivities & Specificities : 0.6666666666666666, 0.7727272727272727


In [207]:
# save prediction result.

tr_df_t_em = pd.DataFrame(data={"patient":list(inter_dataset["tr_data"][0].index), "hypothesis 1": list(t_em_tr_predictions_flat), 
                        "prediction":list(t_em_labeled_tr_predictions), "Platinum_Status":list(tr_y_val)})
tr_df_t_em.to_csv(save_prediction_path+"m_t-em-"+str(ts_i)+"_tr.csv", index=False, header=True, columns = ["patient", "hypothesis 1", "prediction", "Platinum_Status"])

ts_df_t_em = pd.DataFrame(data={"patient":list(inter_dataset["ts_data"][0].index), "hypothesis 1": list(t_em_ts_predictions_flat), 
                        "prediction":list(t_em_labeled_ts_predictions), "Platinum_Status":list(ts_y_val)})
ts_df_t_em.to_csv(save_prediction_path+"m_t-em-"+str(ts_i)+"_ts.csv", index=False, header=True, columns = ["patient", "hypothesis 1", "prediction", "Platinum_Status"])

## Transferred Ensemble(Modified)

### mo_transferred ensemble input dataset

In [208]:
# dataset : raw data + prediction results
mo_em_tr_x_val = np.concatenate([inter_dataset["tr_x_val"][1], em_tr_x_val], axis = 1)
mo_em_ts_x_val = np.concatenate([inter_dataset["ts_x_val"][1], em_ts_x_val], axis = 1)

In [209]:
print(mo_em_tr_x_val.shape)
print(mo_em_ts_x_val.shape)

(122, 405)
(31, 405)


In [210]:
#full_em_matrix = np.concatenate([full_em_ts_x_val, full_em_tr_x_val], axis = 0)
#df_full_dataset = pd.DataFrame(full_em_matrix)
#df_full_dataset.to_csv(index=False, )

In [211]:
'''
df_full_dataset = pd.DataFrame(full_em_matrix)
df_full_dataset.to_csv("C:/test/merge_newDiff_400_with_predictions.csv",index=False)

'''
#df_full_dataset.loc[df_full_dataset.shape[1]] = patient

'\ndf_full_dataset = pd.DataFrame(full_em_matrix)\ndf_full_dataset.to_csv("C:/test/merge_newDiff_400_with_predictions.csv",index=False)\n\n'

In [212]:
print("modified t-ensemble model")

# 1) parameter setting
mo_em_adam = optimizers.Adam(lr=0.05)                                   
mo_em_input_drop_out = 0.3
mo_em_drop_out = 0
mo_em_batch_size = 5
mo_em_BN = True                           

mo_em_layers = [100]
mo_em_tr_loss_best = 100 # for saving best loss value 
mo_em_best_model=[] #for saving best model
count=0 # for early stopping

# 2) model build
mo_em_input = Input(shape=(mo_em_ts_x_val.shape[1],))
mo_em_dp = Dropout(mo_em_input_drop_out)(mo_em_input)
for l in layers:
    if mo_em_BN == True:
        mo_em_m = Dense(l)(mo_em_dp)
        mo_em_bn = BatchNormalization(axis=1, epsilon=0.001, center=True, scale=True, beta_initializer='zeros', gamma_initializer='ones')(mo_em_m)
        mo_em_dp = Activation("relu")(mo_em_bn)
    else:
        mo_em_m = Dense(l,activation='relu')(mo_em_dp)
        mo_em_dp = Dropout(drop_out_m)(mo_em_m)

mo_em_final = mo_em_dp
mo_em_output = Dense(1, activation="sigmoid")(mo_em_final)
mo_em_model = Model(inputs=mo_em_input,outputs=mo_em_output)
mo_em_model.compile(optimizer=mo_em_adam, 
                loss='binary_crossentropy',
                metrics=['accuracy'])

# 3) Training: if no increase of tr_loss three times, stop training.
while 1:
    mo_em_model.fit(mo_em_tr_x_val, tr_y_val, batch_size=mo_em_batch_size, nb_epoch=1, verbose = 0)
    mo_em_tr_loss=mo_em_model.evaluate( mo_em_tr_x_val, tr_y_val)[0]
    if mo_em_tr_loss < mo_em_tr_loss_best: # new best model. count reset.
        mo_em_tr_loss_best = mo_em_tr_loss
        count=0
        mo_em_best_model = mo_em_model
    if count>10: # no increase three time. stop.
        mo_em_model = mo_em_best_model
        break
    else: count=count+1
        
print("Model mo-em" +"-"+str(ts_i)+" trained.")

# 4) save model
em_model.save(save_model_path+"/m_mo-em-"+str(ts_i)+".h5")

# 5) evaluate model
mo_em_output_list = model_performance(
    information = False, using_model=mo_em_model,Input_Prediction_Passively = False, 
    tr_x_val=mo_em_tr_x_val, tr_y_val=tr_y_val, ts_x_val=mo_em_ts_x_val, ts_y_val=ts_y_val,
    output_list=["tr_loss", "tr_accuracy", "tr_sensitivity", "tr_specificity", "tr_predictions",
                 "labeled_tr_predictions", "tr_predictions_flat", "roc_auc_tr", 
                 "ts_loss", "ts_accuracy", "ts_sensitivity", "ts_specificity", "ts_predictions",
                 "labeled_ts_predictions", "ts_predictions_flat", "roc_auc_ts", 
                 "roc_auc_total"])

mo_em_tr_loss, mo_em_tr_accuracy, mo_em_tr_sensitivity, mo_em_tr_specificity, mo_em_tr_predictions, mo_em_labeled_tr_predictions, mo_em_tr_predictions_flat, mo_em_roc_auc_tr, mo_em_ts_loss, mo_em_ts_accuracy, mo_em_ts_sensitivity, mo_em_ts_specificity, mo_em_ts_predictions,mo_em_labeled_ts_predictions, mo_em_ts_predictions_flat, mo_em_roc_auc_ts, mo_em_roc_auc_total = mo_em_output_list

print("Overall AUC: ", mo_em_roc_auc_total)
print("Train AUC: ", mo_em_roc_auc_tr)
print("Test AUC: ", mo_em_roc_auc_ts)

print("Train Accuracy: {}".format(mo_em_tr_accuracy))
print("Train Sensitivities & Specificities : "+str(mo_em_tr_sensitivity)+", "+str(mo_em_tr_specificity))
print("Test Accuracy: {}".format(mo_em_ts_accuracy))
print("Test Sensitivities & Specificities : "+str(mo_em_ts_sensitivity)+", "+str(mo_em_ts_specificity))

modified t-ensemble model




Model mo-em-2 trained.
Overall AUC:  0.9788583509513742
Train AUC:  1.0
Test AUC:  0.797979797979798
Train Accuracy: 1.0
Train Sensitivities & Specificities : 1.0, 1.0
Test Accuracy: 0.8064516186714172
Test Sensitivities & Specificities : 0.5555555555555556, 0.9090909090909091


In [214]:
# save prediction result.

tr_df_mo_em = pd.DataFrame(data={"patient":list(inter_dataset["tr_data"][0].index), "hypothesis 1": list(mo_em_tr_predictions_flat), 
                        "prediction":list(mo_em_labeled_tr_predictions), "Platinum_Status":list(tr_y_val)})
tr_df_mo_em.to_csv(save_prediction_path+"m_mo-em-"+str(ts_i)+"_tr.csv", index=False, header=True, columns = ["patient", "hypothesis 1", "prediction", "Platinum_Status"])

ts_df_mo_em = pd.DataFrame(data={"patient":list(inter_dataset["ts_data"][0].index), "hypothesis 1": list(mo_em_ts_predictions_flat), 
                        "prediction":list(mo_em_labeled_ts_predictions), "Platinum_Status":list(ts_y_val)})
ts_df_mo_em.to_csv(save_prediction_path+"m_mo-em-"+str(ts_i)+"_ts.csv", index=False, header=True, columns = ["patient", "hypothesis 1", "prediction", "Platinum_Status"])

## Performance Comparison

In [215]:
t_em_tr_accuracy, t_em_tr_sensitivity, t_em_tr_specificity,t_em_roc_auc_tr, 
t_em_ts_accuracy, t_em_ts_sensitivity, t_em_ts_specificity, t_em_roc_auc_ts, 
t_em_roc_auc_total
tr_accuracy_list = [m_1_l_tr_accuracy, m_2_l_tr_accuracy, m_3_l_tr_accuracy, m_4_l_tr_accuracy, m_5_l_tr_accuracy, m_6_l_tr_accuracy]
ts_accuracy_list = [m_1_l_accuracy, m_2_l_accuracy, m_3_l_accuracy, m_4_l_accuracy, m_5_l_accuracy, m_6_l_accuracy]
tr_accuracy_select = []
ts_accuracy_select = []

for i in select:
    label.append("model"+str(i))
    tr_accuracy_select.append(tr_accuracy_list[i-1])
    ts_accuracy_select.append(ts_accuracy_list[i-1])

label = label+["mean-em","d-comb em","t-em"]
tr_accuracy_select= tr_accuracy_select + [mean_em_tr_accuracy, em_tr_accuracy, t_em_tr_accuracy]
ts_accuracy_select= ts_accuracy_select + [mean_em_ts_accuracy, em_ts_accuracy, t_em_ts_accuracy]

for model_num in range(len(label)):
    print("< "+label[model_num]+" > tr: "+str(tr_accuracy_select[model_num])+", ts: "+str(ts_accuracy_select[model_num]))

#label = ["model1","model2","model3","mean-em","d-comb em","t-em"]
#accuracy = [m1_accuracy,m2_accuracy,m3_accuracy,mean_em_accuracy,em_accuracy,t_em_accuracy ]
#print("model1: "+str(accuracy[0])+"\nmodel2: "+str(accuracy[1])+"\nmodel3: "+str(accuracy[2])+"\nmean-em: "+str(accuracy[3])+"\nd-comb em: "+str(accuracy[4])+"\nt-em: "+str(accuracy[5]))

NameError: name 'm_1_l_tr_accuracy' is not defined