In [1]:
import pandas as pd
from keras.callbacks import History, ReduceLROnPlateau,EarlyStopping,ModelCheckpoint
import os
import numpy as np
from data_analysis import calculate_metrics, load_weights_and_evaluate
from model_builders import GCN_pretraining
from hyperparameter_tuning_GCN import objective
from functools import partial
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials
import pickle
import dill

Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [None]:
# Specify model callbacks on training
es = EarlyStopping(monitor='loss',patience=8, min_delta=0)
rlr = ReduceLROnPlateau(monitor='loss',factor=0.5, patience=4, verbose=1, min_lr=0.0000001)

model_params = {
        "num_layers" : 3,
        "max_atoms" : 70,
        "num_atom_features" : 62,
        "num_atom_features_original" : 62,
        "num_bond_features" : 6,
        "max_degree" : 5,
        "conv_width" : [32,64,96],
        "fp_length" : [96,96,96],
        "activ_enc" : "selu",
        "activ_dec" : "selu",
        "learning_rates" : [0.001,0.001,0.001],
        "learning_rates_fp": [0.005,0.005,0.005],
        "losses_conv" : {
                    "neighbor_output": "mean_squared_error",
                    "self_output": "mean_squared_error",
                    },
        "lossWeights" : {"neighbor_output": 1.0, "self_output": 1.0},
        "metrics" : "mse",
        "loss_fp" : "mean_squared_error",
        "enc_layer_names" : ["enc_1", "enc_2", "enc_3"],
        'callbacks' : [es,rlr],
        'adam_decay': 0.0005329142291371636,
        'beta': 5,
        'p': 0.004465204118126482,
        'dense_size' : [96,96,48],
        'dropout_rate' : [0.1,0.1],
        'lr' : 0.001,
        'batch_size' : int(64),
        'n_epochs' : int(5)
        }


In [2]:
fspace = {
    'conv1' : hp.quniform('conv1', 32, 96, 8),
    'conv2' : hp.quniform('conv2', 48, 128, 8),
    'conv3' : hp.quniform('conv3', 64, 168, 8),
    'fp' : hp.quniform('fp', 64, 196, 8),
    'dense1' : hp.quniform('dense1',96,256,32),
    'dense2' : hp.quniform('dense2',96,256,32),
    'dense3' : hp.quniform('dense3',48,128,32),
    'dropout_rate' : hp.uniform('dropout_rate',0.1,0.5),
    'lr' : hp.uniform('lr',0.0005,0.01),
    'n_epochs' : hp.quniform('n_epochs',15,40,5) 
}

In [3]:
target_1 = 'p38'
base_path_1 = f'C:/Users/tomas/Documents/GitHub/kinase_binding'

data_fpath_1 = base_path_1+f'/data/{target_1}/data.csv'
df_p38=pd.read_csv(data_fpath_1).set_index('biolab_index')

with open(base_path_1+f'/data/{target_1}/train_val_folds.pkl', "rb") as in_f:
    train_val_folds_p38 = dill.load(in_f)

with open(base_path_1+f'/data/{target_1}/train_test_folds.pkl', "rb") as in_f:
    train_test_folds_p38 = dill.load(in_f)

training_p38 = [df_p38.loc[train_val_folds_p38[0][0]],
                 df_p38.loc[train_val_folds_p38[1][0]],
                 df_p38.loc[train_val_folds_p38[2][0]],
                 df_p38.loc[train_val_folds_p38[3][0]],
                 df_p38.loc[train_val_folds_p38[4][0]],
                 df_p38.loc[train_val_folds_p38[5][0]],
                 df_p38.loc[train_test_folds_p38[0]]
                 ]
validation_p38 = [df_p38.loc[train_val_folds_p38[0][1]],
                   df_p38.loc[train_val_folds_p38[1][1]],
                   df_p38.loc[train_val_folds_p38[2][1]],
                   df_p38.loc[train_val_folds_p38[3][1]],
                   df_p38.loc[train_val_folds_p38[4][1]],
                   df_p38.loc[train_val_folds_p38[5][1]],
                   df_p38.loc[train_test_folds_p38[1]]
                   ]

In [4]:
fmin_objective = partial(objective, train_sets = training_p38, val_sets = validation_p38)

In [5]:
def run_trials():

    trials_step = 0  # how many additional trials to do after loading saved trials. 1 = save after iteration
    max_trials = 0  # initial max_trials. put something small to not have to wait

    
    try:  # try to load an already saved trials object, and increase the max
        trials = pickle.load(open("gcn.hyperopt", "rb"))
        print("Found saved Trials! Loading...")
        max_trials = len(trials.trials) + trials_step
        print("Rerunning from {} trials to {} (+{}) trials".format(len(trials.trials), max_trials, trials_step))
    except:  # create a new trials object and start searching
        trials = Trials()

    best = fmin(fn = fmin_objective, space = fspace, algo=tpe.suggest, max_evals=max_trials, trials=trials)

    print("Best:", best)
    
    # save the trials object
    with open("gcn.hyperopt", "wb") as f:
        pickle.dump(trials, f)
    return(trials)

In [6]:
trials = run_trials()

Found saved Trials! Loading...
Rerunning from 555 trials to 555 (+0) trials
100%|████████████████████████████████████████████████████████████████████████| 555/555 [00:00<?, ?trial/s, best loss=?]
Best: {'conv1': 96.0, 'conv2': 104.0, 'conv3': 120.0, 'dense1': 256.0, 'dense2': 192.0, 'dense3': 96.0, 'dropout_rate': 0.3554537312557061, 'fp': 160.0, 'lr': 0.007037117031430456, 'n_epochs': 35.0}


In [7]:
best_loss = trials.trials[0]['result']['loss']
for i in range(1,len(trials.trials)):
    if (trials.trials[i]['result']['loss'] <=  best_loss):
        best_loss = trials.trials[i]['result']['loss']
        index = i
best_params = trials.trials[index]['misc']['vals']

In [8]:
from hyperparameter_tuning_GCN import GCN_hyper
es = EarlyStopping(monitor='loss',patience=8, min_delta=0)
rlr = ReduceLROnPlateau(monitor='loss',factor=0.5, patience=4, verbose=1, min_lr=0.0000001)
gcn_best = {
        "num_layers" : 3,
        "max_atoms" : 70,
        "num_atom_features" : 62,
        "num_atom_features_original" : 62,
        "num_bond_features" : 6,
        "max_degree" : 5,
        "conv_width" : [int(best_params['conv1'][0]), int(best_params['conv2'][0]), int(best_params['conv3'][0])],
        "fp_length" : [int(best_params['fp'][0]), int(best_params['fp'][0]), int(best_params['fp'][0])],
        "activ_enc" : "selu",
        "activ_dec" : "selu",
        "learning_rates" : [0.001,0.001,0.001],
        "learning_rates_fp": [0.005,0.005,0.005],
        "losses_conv" : {
                    "neighbor_output": "mean_squared_error",
                    "self_output": "mean_squared_error",
                    },
        "lossWeights" : {"neighbor_output": 1.0, "self_output": 1.0},
        "metrics" : "mse",
        "loss_fp" : "mean_squared_error",
        "enc_layer_names" : ["enc_1", "enc_2", "enc_3"],
        'callbacks' : [es,rlr],
        'adam_decay': 0.0005329142291371636,
        'beta': 5,
        'p': 0.004465204118126482,
        'dense_size' : [int(best_params['dense1'][0]), int(best_params['dense2'][0]), int(best_params['dense3'][0])],
        'dropout_rate' : [best_params['dropout_rate'][0], best_params['dropout_rate'][0]],
        'lr' : best_params['lr'][0],
        'batch_size' : int(64),
        'n_epochs' : int(best_params['n_epochs'][0])
        }
gcn = GCN_hyper(gcn_best)

In [9]:
results_val = {}
results_train = {}
for i in range(len(training_p38)):
        X_atoms_cold,X_bonds_cold,X_edges_cold = gcn.dataframe_to_gcn_input(validation_p38[i])
        Y_cold = validation_p38[i].Binary
        X_atoms_train, X_bonds_train, X_edges_train = gcn.dataframe_to_gcn_input(training_p38[i])
        Y = training_p38[i].Binary
        gcn_encoder = gcn.build_encoder()
        gcn_model = gcn.build_model(gcn_encoder)
        gcn_model.fit([X_atoms_train,X_bonds_train,X_edges_train],Y,
                    batch_size = gcn_best['batch_size'],
                    epochs = gcn_best['n_epochs'],
                    verbose = 1,
                    shuffle=True,
                    validation_data = ([X_atoms_cold,X_bonds_cold,X_edges_cold],Y_cold))
        y_pred_val = gcn_model.predict([X_atoms_cold,X_bonds_cold,X_edges_cold])
        y_pred_train = gcn_model.predict([X_atoms_train,X_bonds_train,X_edges_train])
        if i < 6:
            results_val['Fold %s'%i] = calculate_metrics(np.array(Y_cold), y_pred_val.squeeze())
            results_train['Fold %s'%i] = calculate_metrics(np.array(Y),y_pred_train.squeeze())
        elif i == 6:
            results_val['Test'] = calculate_metrics(np.array(Y_cold), y_pred_val.squeeze())
            results_train['Test'] = calculate_metrics(np.array(Y),y_pred_train.squeeze())
        


LAYER 0
LAYER 1
LAYER 2


  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Train on 2541 samples, validate on 509 samples
Epoch 1/35
Epoch 2/35
Epoch 3/35
Epoch 4/35
Epoch 5/35
Epoch 6/35
Epoch 7/35
Epoch 8/35
Epoch 9/35
Epoch 10/35
Epoch 11/35
Epoch 12/35
Epoch 13/35
Epoch 14/35
Epoch 15/35
Epoch 16/35
Epoch 17/35
Epoch 18/35
Epoch 19/35
Epoch 20/35
Epoch 21/35
Epoch 22/35
Epoch 23/35
Epoch 24/35
Epoch 25/35
Epoch 26/35
Epoch 27/35
Epoch 28/35
Epoch 29/35
Epoch 30/35
Epoch 31/35
Epoch 32/35
Epoch 33/35
Epoch 34/35
Epoch 35/35
LAYER 0
LAYER 1
LAYER 2


  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Train on 2541 samples, validate on 509 samples
Epoch 1/35
Epoch 2/35
Epoch 3/35
Epoch 4/35
Epoch 5/35
Epoch 6/35
Epoch 7/35
Epoch 8/35
Epoch 9/35
Epoch 10/35
Epoch 11/35
Epoch 12/35
Epoch 13/35
Epoch 14/35
Epoch 15/35
Epoch 16/35
Epoch 17/35
Epoch 18/35
Epoch 19/35
Epoch 20/35
Epoch 21/35
Epoch 22/35
Epoch 23/35
Epoch 24/35
Epoch 25/35
Epoch 26/35
Epoch 27/35
Epoch 28/35
Epoch 29/35
Epoch 30/35
Epoch 31/35
Epoch 32/35
Epoch 33/35
Epoch 34/35
Epoch 35/35
LAYER 0
LAYER 1
LAYER 2


  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Train on 2541 samples, validate on 509 samples
Epoch 1/35
Epoch 2/35
Epoch 3/35
Epoch 4/35
Epoch 5/35
Epoch 6/35
Epoch 7/35
Epoch 8/35
Epoch 9/35
Epoch 10/35
Epoch 11/35
Epoch 12/35
Epoch 13/35
Epoch 14/35
Epoch 15/35
Epoch 16/35
Epoch 17/35
Epoch 18/35
Epoch 19/35
Epoch 20/35
Epoch 21/35
Epoch 22/35
Epoch 23/35
Epoch 24/35
Epoch 25/35
Epoch 26/35
Epoch 27/35
Epoch 28/35
Epoch 29/35
Epoch 30/35
Epoch 31/35
Epoch 32/35
Epoch 33/35
Epoch 34/35
Epoch 35/35
LAYER 0
LAYER 1
LAYER 2


  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Train on 2541 samples, validate on 509 samples
Epoch 1/35
Epoch 2/35
Epoch 3/35
Epoch 4/35
Epoch 5/35
Epoch 6/35
Epoch 7/35
Epoch 8/35
Epoch 9/35
Epoch 10/35
Epoch 11/35
Epoch 12/35
Epoch 13/35
Epoch 14/35
Epoch 15/35
Epoch 16/35
Epoch 17/35
Epoch 18/35
Epoch 19/35
Epoch 20/35
Epoch 21/35
Epoch 22/35
Epoch 23/35
Epoch 24/35
Epoch 25/35
Epoch 26/35
Epoch 27/35
Epoch 28/35
Epoch 29/35
Epoch 30/35
Epoch 31/35
Epoch 32/35
Epoch 33/35
Epoch 34/35
Epoch 35/35
LAYER 0
LAYER 1
LAYER 2


  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Train on 2541 samples, validate on 509 samples
Epoch 1/35
Epoch 2/35
Epoch 3/35
Epoch 4/35
Epoch 5/35
Epoch 6/35
Epoch 7/35
Epoch 8/35
Epoch 9/35
Epoch 10/35
Epoch 11/35
Epoch 12/35
Epoch 13/35
Epoch 14/35
Epoch 15/35
Epoch 16/35
Epoch 17/35
Epoch 18/35
Epoch 19/35
Epoch 20/35
Epoch 21/35
Epoch 22/35
Epoch 23/35
Epoch 24/35
Epoch 25/35
Epoch 26/35
Epoch 27/35
Epoch 28/35
Epoch 29/35
Epoch 30/35
Epoch 31/35
Epoch 32/35
Epoch 33/35
Epoch 34/35
Epoch 35/35
LAYER 0
LAYER 1
LAYER 2


  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Train on 2545 samples, validate on 505 samples
Epoch 1/35
Epoch 2/35
Epoch 3/35
Epoch 4/35
Epoch 5/35
Epoch 6/35
Epoch 7/35
Epoch 8/35
Epoch 9/35
Epoch 10/35
Epoch 11/35
Epoch 12/35
Epoch 13/35
Epoch 14/35
Epoch 15/35
Epoch 16/35
Epoch 17/35
Epoch 18/35
Epoch 19/35
Epoch 20/35
Epoch 21/35
Epoch 22/35
Epoch 23/35
Epoch 24/35
Epoch 25/35
Epoch 26/35
Epoch 27/35
Epoch 28/35
Epoch 29/35
Epoch 30/35
Epoch 31/35
Epoch 32/35
Epoch 33/35
Epoch 34/35
Epoch 35/35
LAYER 0
LAYER 1
LAYER 2


  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Train on 3050 samples, validate on 509 samples
Epoch 1/35
Epoch 2/35
Epoch 3/35
Epoch 4/35
Epoch 5/35
Epoch 6/35
Epoch 7/35
Epoch 8/35
Epoch 9/35
Epoch 10/35
Epoch 11/35
Epoch 12/35
Epoch 13/35
Epoch 14/35
Epoch 15/35
Epoch 16/35
Epoch 17/35
Epoch 18/35
Epoch 19/35
Epoch 20/35
Epoch 21/35
Epoch 22/35
Epoch 23/35
Epoch 24/35
Epoch 25/35
Epoch 26/35
Epoch 27/35
Epoch 28/35
Epoch 29/35
Epoch 30/35
Epoch 31/35
Epoch 32/35
Epoch 33/35
Epoch 34/35
Epoch 35/35


In [12]:
pd.DataFrame(results_train).T

Unnamed: 0,roc_auc,tn,fp,fn,tp,map,precision,recall,accuracy
Fold 0,0.950408,1060.0,232.0,91.0,1158.0,0.945362,0.833094,0.927142,0.872885
Fold 1,0.947933,1053.0,218.0,109.0,1161.0,0.945379,0.841914,0.914173,0.871311
Fold 2,0.95721,1125.0,178.0,130.0,1108.0,0.953127,0.861586,0.894992,0.878788
Fold 3,0.953121,1176.0,128.0,196.0,1041.0,0.94935,0.890505,0.841552,0.872491
Fold 4,0.94983,1114.0,224.0,93.0,1110.0,0.941381,0.832084,0.922693,0.875246
Fold 5,0.95054,1080.0,242.0,90.0,1133.0,0.946065,0.824,0.92641,0.869548
Test,0.9404,1375.0,191.0,267.0,1217.0,0.933095,0.864347,0.820081,0.849836


In [13]:
pd.DataFrame(results_val).T

Unnamed: 0,roc_auc,tn,fp,fn,tp,map,precision,recall,accuracy
Fold 0,0.895147,205.0,69.0,25.0,210.0,0.84754,0.752688,0.893617,0.815324
Fold 1,0.906811,248.0,47.0,35.0,179.0,0.870437,0.792035,0.836449,0.8389
Fold 2,0.882748,190.0,73.0,44.0,202.0,0.87395,0.734545,0.821138,0.770138
Fold 3,0.866428,213.0,49.0,60.0,187.0,0.833247,0.792373,0.757085,0.785855
Fold 4,0.897102,171.0,57.0,42.0,239.0,0.906437,0.807432,0.850534,0.805501
Fold 5,0.845817,166.0,78.0,48.0,213.0,0.852169,0.731959,0.816092,0.750495
Test,0.785513,258.0,48.0,96.0,107.0,0.700677,0.690323,0.527094,0.717092


In [15]:
from keras.utils.vis_utils import plot_model
plot_model(gcn_model, to_file='../../../../Desktop/latex report intern/DeepGCNN.png', show_shapes=True, show_layer_names=True)