In [4]:
import sys
import random
import numpy as np
from numpy import interp
import keras
from keras import optimizers
import tensorflow as tf
from sklearn.model_selection import StratifiedKFold
import sklearn.metrics as metrics
from sklearn.metrics import precision_recall_curve
from sklearn.metrics import f1_score
from sklearn.metrics import plot_precision_recall_curve

In [5]:

n_nodes = int(sys.argv[1])
n_layers = int(sys.argv[2])
lr = float(sys.argv[3])
n_batch = int(sys.argv[4])
n_epochs = int(sys.argv[5])
n_cv = int(sys.argv[6])
iteration = int(sys.argv[7])

##################
# You have to modify file names and path to files as you need.
path_to_data_file = 'PATH/TO/DATA/FILE'
test_index_save_file = 'PATH/TO/TEST/INDEX'
train_index_save_file = 'PATH/TO/TRAIN/INDEX'
save_model_folder = 'PATH/TO/MODEL/SAVED'
performance_file = 'PATH/TO/PERFORMANCE/FILE'
##################

seed_1 = 1209 # random number
np.random.seed(seed_1)

seed_2 = 1234 # random number

fn = 'model_%inodes_%ilayers_%slr_%ibatch_%iepochs_%icv_%i' % (n_nodes, n_layers, str(lr), n_batch, n_epochs, n_cv, iteration)
print(fn)

# Our dataset has header and is tab-delimited file.
whole_data = [line.strip().split('\t') for line in open(path_to_data_file)]
del(whole_data[0])

model_10nodes_2layers_3e-05lr_10batch_50epochs_test_3cv_1


In [6]:
featDic= {}
data_list_x = []
data_list_y = []

# The number of index has to be changed according to the number of features in your data.
for line in whole_data:
    featDic[line[0]] = list(map(float,line[1:11])) + list(map(int,line[11:13])) + list(map(float,line[13:15])) + list(map(int,line[15]))
    data_list_x.append(featDic[line[0]][:-1])
    data_list_y.append(featDic[line[0]][-1])

data_x_bf_st = np.array(data_list_x)
data_x = (data_x_bf_st - np.mean(data_x_bf_st, axis=0))/np.std(data_x_bf_st, axis=0)
data_y = np.array(data_list_y)

In [8]:
np.random.seed(seed_2)

cv = StratifiedKFold(n_splits=n_cv, shuffle=True, random_state=seed_2)

tprs = []
mean_fpr = np.linspace(0, 1, 100)
accs = []
ROC_aucs = []
f1s = []
RP_aucs = []

In [9]:
for train, test in cv.split(data_x, data_y):

    with open(test_index_save_file, 'a') as index_file:
        index_file.write(fn + '_%i\t' % (i + 1))
        index_file.write(','.join(str(x) for x in test) + '\n')
    with open(train_index_save_file, 'a') as train_index_file:
        train_index_file.write(fn + '_%i\t' % (i + 1))
        train_index_file.write(','.join(str(x) for x in train) + '\n')

    model = keras.Sequential()
    for i_layer in range(n_layers):
        model.add(keras.layers.Dense(n_nodes, activation=tf.nn.relu))
    model.add(keras.layers.Dense(1, activation=tf.nn.sigmoid))

    Adam=optimizers.Adam(lr=lr, beta_1=0.9, beta_2=0.999)
    model.compile(loss='binary_crossentropy',
        optimizer=Adam,
        metrics=['accuracy'])

    model.fit(data_x[train], data_y[train], batch_size=n_batch, epochs=n_epochs)

    model.save(save_model_folder + '/%s_%i.h5' % (fn, i + 1))
    print('model saved')

    test_loss, test_acc = model.evaluate(data_x[test], data_y[test])
    predictions = model.predict(data_x[test])

    accs.append(test_acc)
    
    fpr, tpr, threshold = metrics.roc_curve(data_y[test], predictions)
    roc_auc = metrics.auc(fpr, tpr)

    tprs.append(interp(mean_fpr, fpr, tpr))
    tprs[-1][0] = 0.0
    ROC_aucs.append(roc_auc)

    precision, recall, thresholds = precision_recall_curve(data_y[test], predictions)
    f1 = f1_score(data_y[test], predictions.round())
    f1s.append(f1)
    rp_auc = metrics.auc(recall, precision)
    RP_aucs.append(rp_auc)

mean_tpr = np.mean(tprs, axis=0)
mean_tpr[-1] = 1.0
mean_auc = metrics.auc(mean_fpr, mean_tpr)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
model saved
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/5

In [None]:
with open(performance_file, 'a') as perf_file:
    perf_file.write(fn + '\t' + '\t'.join([str(sum(accs) / 3), '\t'.join(str(x) for x in accs), 
                               str(mean_auc), '\t'.join(str(x) for x in ROC_aucs),
                               str(sum(f1s)/3), '\t'.join(str(x) for x in f1s),
                               str(sum(RP_aucs) / 3), '\t'.join(str(x) for x in RP_aucs)]) + '\n')