In [1]:
import pandas as pd
from keras.callbacks import History, ReduceLROnPlateau,EarlyStopping,ModelCheckpoint
import os
import numpy as np
from data_analysis import calculate_metrics
from functools import partial
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials
import pickle
import dill
from hyper_mining import objective_fn
import xgboost as xgb
from sklearn.model_selection import train_test_split

Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [2]:
fspace = {
    'conv1' : hp.quniform('conv1', 32, 64, 8),
    'conv2' : hp.quniform('conv2', 64, 128, 8),
    'conv3' : hp.quniform('conv3', 128, 168, 8),
    'fp' : hp.quniform('fp', 96, 196, 8),
    'dense1' : hp.quniform('dense1',96,512,32),
    'dense2' : hp.quniform('dense2',96,512,32),
    'dense3' : hp.quniform('dense3',64,512,32),
    'dropout_rate' : hp.uniform('dropout_rate',0.1,0.5),
    'lr' : hp.uniform('lr',0.000001,0.01),
    'n_epochs' : hp.quniform('n_epochs',15,60,5),
    'batch_size' : hp.quniform('batch_size',64,256,16),
    'colsample_bylevel' : hp.uniform('colsample_bylevel', 0.1, 1), 
    'colsample_bytree' : hp.uniform('colsample_bytree', 0.1, 1), 
    'gamma' : hp.uniform('gamma', 0.1, 1), 
    'learning_rate' : hp.uniform('learning_rate', 0.1, 1),
    'max_delta_step' : hp.quniform('max_delta_step',1,10,1),
    'max_depth' : hp.quniform('max_depth',6, 12, 1),
    'min_child_weight' : hp.quniform('min_child_weight',10 ,500 ,5),
    'reg_alpha' : hp.uniform('reg_alpha',0.1,100),
    'reg_lambda' : hp.uniform('reg_lambda',0.1,100),
    'subsample' : hp.uniform('subsample',0.1,1.0),
    'max_bin' : hp.quniform('max_bin',16,256,16)
    #'margin' : hp.uniform('margin',0.2,2)
}

In [3]:
target_1 = 'pi3k'
base_path_1 = f'C:/Users/tomas/Documents/GitHub/kinase_binding'

data_fpath_1 = base_path_1+f'/data/{target_1}/data.csv'
df_p38=pd.read_csv(data_fpath_1).set_index('biolab_index')

with open(base_path_1+f'/data/{target_1}/train_val_folds.pkl', "rb") as in_f:
    train_val_folds_p38 = dill.load(in_f)

with open(base_path_1+f'/data/{target_1}/train_test_folds.pkl', "rb") as in_f:
    train_test_folds_p38 = dill.load(in_f)
    
target_2 = 'akt1'
base_path_2 = f'C:/Users/tomas/Documents/GitHub/kinase_binding'

data_fpath_2 = base_path_2+f'/data/{target_2}/data.csv'
df_akt1 = pd.read_csv(data_fpath_2).set_index('biolab_index')

with open(base_path_2+f'/data/{target_2}/train_val_folds.pkl', "rb") as in_f:
    train_val_folds_akt1 = dill.load(in_f)
with open(base_path_2+f'/data/{target_2}/train_test_folds.pkl', "rb") as in_f:
    train_test_folds_akt1 = dill.load(in_f)
    
target_3 = 'pi3k'
base_path_3 = f'C:/Users/tomas/Documents/GitHub/kinase_binding'

data_fpath_3 = base_path_3+f'/data/{target_3}/data.csv'
df_pi3k = pd.read_csv(data_fpath_3).set_index('biolab_index')

with open(base_path_3+f'/data/{target_3}/train_val_folds.pkl', "rb") as in_f:
    train_val_folds_pi3k = dill.load(in_f)
with open(base_path_3+f'/data/{target_3}/train_test_folds.pkl', "rb") as in_f:
    train_test_folds_pi3k = dill.load(in_f)

In [4]:
#Evaluation Splits (our test set)
training_p38 = df_p38.loc[train_test_folds_p38[0]]
validation_p38 = df_p38.loc[train_test_folds_p38[1]]


training_akt1 = df_akt1.loc[train_test_folds_akt1[0]]
validation_akt1 = df_akt1.loc[train_test_folds_akt1[1]]
               

training_pi3k = df_pi3k.loc[train_test_folds_pi3k[0]]
validation_pi3k = df_pi3k.loc[train_test_folds_pi3k[1]]

In [5]:
#AVE Bias splits (test) only p38
ave_p38_train = pd.read_csv('data/p38/split_aveb/train_all.csv', index_col=0)
ave_p38_val = pd.read_csv('data/p38/split_aveb/test.csv', index_col = 0)
print(len(ave_p38_train),len(ave_p38_val))

3186 371


In [6]:
#Random splits with sklearn (on our test set)
df_p38 = df_p38.reset_index(drop=True)
X_train_p38, X_val_p38, Y_train_p38, Y_val_p38 = train_test_split(df_p38.rdkit,
                                                                  df_p38.Binary,
                                                                  test_size = 0.15,
                                                                  train_size = 0.85,
                                                                  shuffle = True)
X_train_p38 = pd.DataFrame(X_train_p38)
X_val_p38 = pd.DataFrame(X_val_p38)
print(len(X_train_p38),len(X_val_p38))

3190 564


In [7]:
df_akt1 = df_akt1.reset_index(drop=True)
X_train_akt1, X_val_akt1, Y_train_akt1, Y_val_akt1 = train_test_split(df_akt1.rdkit,
                                                                     df_akt1.Binary,
                                                                     test_size = 0.15,
                                                                     train_size = 0.85,
                                                                     shuffle = True)
X_train_akt1 = pd.DataFrame(X_train_akt1)
X_val_akt1 = pd.DataFrame(X_val_akt1)
print(len(X_train_akt1),len(X_val_akt1))

1819 321


In [8]:
df_pi3k = df_pi3k.reset_index(drop=True)
X_train_pi3k, X_val_pi3k, Y_train_pi3k, Y_val_pi3k = train_test_split(df_pi3k.rdkit,
                                                                      df_pi3k.Binary,
                                                                      test_size = 0.15,
                                                                      train_size = 0.85,
                                                                      shuffle = True)
X_train_pi3k = pd.DataFrame(X_train_pi3k)
X_val_pi3k = pd.DataFrame(X_val_pi3k)
print(len(X_train_pi3k),len(X_val_pi3k))

3190 564


In [10]:
fmin_objective = partial(objective_fn, train_sets = training_p38, val_sets = validation_p38)

In [11]:
def run_trials():

    trials_step = 0  # how many additional trials to do after loading saved trials. 1 = save after iteration
    max_trials = 1  # initial max_trials. put something small to not have to wait

    
    try:  # try to load an already saved trials object, and increase the max
        trials = pickle.load(open("gcn_xgb.hyperopt", "rb"))
        print("Found saved Trials! Loading...")
        max_trials = len(trials.trials) + trials_step
        print("Rerunning from {} trials to {} (+{}) trials".format(len(trials.trials), max_trials, trials_step))
    except:  # create a new trials object and start searching
        trials = Trials()

    best = fmin(fn = fmin_objective, space = fspace, algo=tpe.suggest, max_evals=max_trials, trials=trials)

    print("Best:", best)
    
    # save the trials object
    with open("gcn_xgb.hyperopt", "wb") as f:
        pickle.dump(trials, f)
    return(trials)

In [12]:
trials = run_trials()

Found saved Trials! Loading...
Rerunning from 154 trials to 154 (+0) trials
100%|████████████████████████████████████████████████████████████████████████| 154/154 [00:00<?, ?trial/s, best loss=?]
Best: {'batch_size': 288.0, 'colsample_bylevel': 0.4371082812232264, 'colsample_bytree': 0.4179415558635843, 'conv1': 56.0, 'conv2': 88.0, 'conv3': 136.0, 'dense1': 384.0, 'dense2': 288.0, 'dense3': 224.0, 'dropout_rate': 0.27225175676555935, 'fp': 152.0, 'gamma': 0.919836526180396, 'learning_rate': 0.41409388868400826, 'lr': 0.0008110012706176706, 'max_bin': 48.0, 'max_delta_step': 2.0, 'max_depth': 7.0, 'min_child_weight': 20.0, 'n_epochs': 25.0, 'reg_alpha': 42.8887552483495, 'reg_lambda': 12.306130216692438, 'subsample': 0.6038298323514097}


In [13]:
best_loss = trials.trials[0]['result']['loss']
for i in range(1,len(trials.trials)):
    if (trials.trials[i]['result']['loss'] <=  best_loss):
        best_loss = trials.trials[i]['result']['loss']
        index = i
best_params = trials.trials[index]['misc']['vals']

In [14]:
from hyper_mining import XGB_predictor,GCN_online_mining_test
from data_analysis import calculate_metrics
es = EarlyStopping(monitor='loss',patience=8, min_delta=0)
rlr = ReduceLROnPlateau(monitor='loss',factor=0.5, patience=4, verbose=1, min_lr=0.0000001)
gcn_best = {
        "num_layers" : 3,
        "max_atoms" : 70,
        "num_atom_features" : 62,
        "num_atom_features_original" : 62,
        "num_bond_features" : 6,
        "max_degree" : 5,
        "conv_width" : [int(best_params['conv1'][0]), int(best_params['conv2'][0]), int(best_params['conv3'][0])],
        "fp_length" : [int(best_params['fp'][0]), int(best_params['fp'][0]), int(best_params['fp'][0])],
        "activ_enc" : "selu",
        "activ_dec" : "selu",
        "learning_rates" : [0.001,0.001,0.001],
        "learning_rates_fp": [0.005,0.005,0.005],
        "losses_conv" : {
                    "neighbor_output": "mean_squared_error",
                    "self_output": "mean_squared_error",
                    },
        "lossWeights" : {"neighbor_output": 1.0, "self_output": 1.0},
        "metrics" : "mse",
        "loss_fp" : "mean_squared_error",
        "enc_layer_names" : ["enc_1", "enc_2", "enc_3"],
        'callbacks' : [es,rlr],
        'adam_decay': 0.0005329142291371636,
        'beta': 5,
        'p': 0.004465204118126482,
        'dense_size' : [int(best_params['dense1'][0]), int(best_params['dense2'][0]), int(best_params['dense3'][0])],
        'dropout_rate' : [best_params['dropout_rate'][0], best_params['dropout_rate'][0]],
        'lr' : best_params['lr'][0],
        'batch_size' : int(best_params['batch_size'][0]),
        'n_epochs' : int(best_params['n_epochs'][0])
        #'margin' : best_params['margin'][0]
        }
xgb_best = {
        "colsample_bylevel" : best_params['colsample_bylevel'][0],
        "colsample_bytree" : best_params['colsample_bytree'][0],
        "gamma" : best_params['gamma'][0],
        "eta" : best_params['learning_rate'][0],
        "max_delta_step" : int(best_params['max_delta_step'][0]),
        "max_depth" : int(best_params['max_depth'][0]),
        "min_child_weight" : int(best_params['min_child_weight'][0]),
        "alpha" : best_params['reg_alpha'][0],
        "lambda" : best_params['reg_lambda'][0],
        "subsample" : best_params['subsample'][0],
        "max_bin" : int(best_params['max_bin'][0]),
        "eval_metric":'auc',
        "objective":'binary:logistic',
        "booster":'gbtree'
        #"single_precision_histogram" : True
        }
class_XGB = XGB_predictor(xgb_best)
class_GCN = GCN_online_mining_test(gcn_best)

In [15]:
train_list_p38 = [training_p38, ave_p38_train, X_train_p38]
val_list_p38 = [validation_p38, ave_p38_val, X_val_p38]

train_list_akt1 = [training_akt1, X_train_akt1]
val_list_akt1 = [validation_akt1, X_val_akt1]

train_list_pi3k = [training_pi3k, X_train_pi3k]
val_list_pi3k = [validation_pi3k, X_val_pi3k]

In [17]:
eval_p38 = {}
es2 = EarlyStopping(monitor='loss',patience=15, min_delta=0)
rlr2 = ReduceLROnPlateau(monitor='loss',factor=0.5, patience=2, verbose=1, min_lr=0.000000001)
for i in range(len(train_list_p38)):
    if i == 2:
        X_atoms_cold,X_bonds_cold,X_edges_cold = class_GCN.dataframe_to_gcn_input(val_list_p38[i])
        Y_cold = Y_val_p38
        Y_dummy_cold = np.empty((X_atoms_cold.shape[0],gcn_best['dense_size'][2]+1))
        X_atoms_train, X_bonds_train, X_edges_train = class_GCN.dataframe_to_gcn_input(train_list_p38[i])
        Y = Y_train_p38
        Y_dummy_train = np.empty((X_atoms_train.shape[0],gcn_best['dense_size'][2]+1))
    else:
        X_atoms_cold,X_bonds_cold,X_edges_cold = class_GCN.dataframe_to_gcn_input(val_list_p38[i])
        Y_cold = val_list_p38[i].Binary
        Y_dummy_cold = np.empty((X_atoms_cold.shape[0],gcn_best['dense_size'][2]+1))
        X_atoms_train, X_bonds_train, X_edges_train = class_GCN.dataframe_to_gcn_input(train_list_p38[i])
        Y = train_list_p38[i].Binary
        Y_dummy_train = np.empty((X_atoms_train.shape[0],gcn_best['dense_size'][2]+1))
    
    gcn_encoder = class_GCN.build_encoder()
    gcn_model = class_GCN.build_model(gcn_encoder)
    gcn_mining = class_GCN.build_mining(gcn_model)
    gcn_mining.fit([X_atoms_train,X_bonds_train,X_edges_train,Y],
                   Y_dummy_train,
                   epochs = gcn_best['n_epochs'],
                   batch_size = gcn_best['batch_size'],
                   shuffle = True,
                   validation_data = ([X_atoms_cold,X_bonds_cold,X_edges_cold,Y_cold],Y_dummy_cold),
                   callbacks=[es2,rlr2]
                  )
    #Predict Embeddings
    embeddings_cold = gcn_model.predict([X_atoms_cold,X_bonds_cold,X_edges_cold])
    embeddings_train = gcn_model.predict([X_atoms_train, X_bonds_train, X_edges_train])
        
    #Prepare data for XGBoost
    dmatrix_train = class_XGB.to_xgb_input(Y,embeddings_train)
    dmatrix_cold = class_XGB.to_xgb_input(Y_cold,embeddings_cold)
    evalist = [(dmatrix_train,'train'),(dmatrix_cold,'eval')]
    xgb_model = class_XGB.build_model(dmatrix_train,evalist,300)
    xgb_pred_train = xgb_model.predict(dmatrix_train)
    xgb_pred_cold = xgb_model.predict(dmatrix_cold)
    
    if i == 0:
        eval_p38['Test'] = calculate_metrics(np.array(Y_cold),xgb_pred_cold)
    elif i == 1:
        eval_p38['Ave'] = calculate_metrics(np.array(Y_cold),xgb_pred_cold)
    elif i == 2:
        eval_p38['Random'] = calculate_metrics(np.array(Y_cold),xgb_pred_cold)

LAYER 0
LAYER 1
LAYER 2


  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Train on 3217 samples, validate on 537 samples
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25

Epoch 00016: ReduceLROnPlateau reducing learning rate to 0.0004055006429553032.
Epoch 17/25
Epoch 18/25

Epoch 00018: ReduceLROnPlateau reducing learning rate to 0.0002027503214776516.
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25

Epoch 00023: ReduceLROnPlateau reducing learning rate to 0.0001013751607388258.
Epoch 24/25
Epoch 25/25

Epoch 00025: ReduceLROnPlateau reducing learning rate to 5.06875803694129e-05.
LAYER 0
LAYER 1
LAYER 2


  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Train on 3186 samples, validate on 371 samples
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25

Epoch 00007: ReduceLROnPlateau reducing learning rate to 0.0004055006429553032.
Epoch 8/25
Epoch 9/25

Epoch 00009: ReduceLROnPlateau reducing learning rate to 0.0002027503214776516.
Epoch 10/25
Epoch 11/25

Epoch 00011: ReduceLROnPlateau reducing learning rate to 0.0001013751607388258.
Epoch 12/25
Epoch 13/25

Epoch 00013: ReduceLROnPlateau reducing learning rate to 5.06875803694129e-05.
Epoch 14/25
Epoch 15/25

Epoch 00015: ReduceLROnPlateau reducing learning rate to 2.534379018470645e-05.
Epoch 16/25
Epoch 17/25

Epoch 00017: ReduceLROnPlateau reducing learning rate to 1.2671895092353225e-05.
Epoch 18/25
Epoch 19/25
Epoch 20/25

Epoch 00020: ReduceLROnPlateau reducing learning rate to 6.335947546176612e-06.
Epoch 21/25
Epoch 22/25

Epoch 00022: ReduceLROnPlateau reducing learning rate to 3.167973773088306e-06.
Epoch 23/25
Epoch 24/25

Epoch 00024: ReduceLROnPl

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Train on 3190 samples, validate on 564 samples
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25

Epoch 00008: ReduceLROnPlateau reducing learning rate to 0.0004055006429553032.
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25

Epoch 00012: ReduceLROnPlateau reducing learning rate to 0.0002027503214776516.
Epoch 13/25
Epoch 14/25

Epoch 00014: ReduceLROnPlateau reducing learning rate to 0.0001013751607388258.
Epoch 15/25
Epoch 16/25

Epoch 00016: ReduceLROnPlateau reducing learning rate to 5.06875803694129e-05.
Epoch 17/25
Epoch 18/25

Epoch 00018: ReduceLROnPlateau reducing learning rate to 2.534379018470645e-05.
Epoch 19/25
Epoch 20/25

Epoch 00020: ReduceLROnPlateau reducing learning rate to 1.2671895092353225e-05.
Epoch 21/25
Epoch 22/25

Epoch 00022: ReduceLROnPlateau reducing learning rate to 6.335947546176612e-06.
Epoch 23/25
Epoch 24/25

Epoch 00024: ReduceLROnPlateau reducing learning rate to 3.167973773088306e-06.
Epoch 25/25


In [18]:
eval_p38 = pd.DataFrame(eval_p38).T
eval_p38.to_csv('../../../../Desktop/binding/thesis english/Results/3-One-Shot/Online/p38.csv')
eval_p38

Unnamed: 0,roc_auc,tn,fp,fn,tp,map,precision,recall,accuracy
Test,0.866008,293.0,67.0,48.0,129.0,0.726899,0.658163,0.728814,0.785847
Ave,0.729158,197.0,66.0,40.0,68.0,0.498475,0.507463,0.62963,0.714286
Random,0.845682,280.0,65.0,61.0,158.0,0.784768,0.70852,0.721461,0.776596


In [19]:
eval_akt1 = {}
es2 = EarlyStopping(monitor='loss',patience=15, min_delta=0)
rlr2 = ReduceLROnPlateau(monitor='loss',factor=0.5, patience=2, verbose=1, min_lr=0.000000001)
for i in range(len(train_list_akt1)):
    if i == 1:
        X_atoms_cold,X_bonds_cold,X_edges_cold = class_GCN.dataframe_to_gcn_input(val_list_akt1[i])
        Y_cold = Y_val_akt1
        Y_dummy_cold = np.empty((X_atoms_cold.shape[0],gcn_best['dense_size'][2]+1))
        X_atoms_train, X_bonds_train, X_edges_train = class_GCN.dataframe_to_gcn_input(train_list_akt1[i])
        Y = Y_train_akt1
        Y_dummy_train = np.empty((X_atoms_train.shape[0],gcn_best['dense_size'][2]+1))
    else:
        X_atoms_cold,X_bonds_cold,X_edges_cold = class_GCN.dataframe_to_gcn_input(val_list_akt1[i])
        Y_cold = val_list_akt1[i].Binary
        Y_dummy_cold = np.empty((X_atoms_cold.shape[0],gcn_best['dense_size'][2]+1))
        X_atoms_train, X_bonds_train, X_edges_train = class_GCN.dataframe_to_gcn_input(train_list_akt1[i])
        Y = train_list_akt1[i].Binary
        Y_dummy_train = np.empty((X_atoms_train.shape[0],gcn_best['dense_size'][2]+1))
    
    gcn_encoder = class_GCN.build_encoder()
    gcn_model = class_GCN.build_model(gcn_encoder)
    gcn_mining = class_GCN.build_mining(gcn_model)
    gcn_mining.fit([X_atoms_train,X_bonds_train,X_edges_train,Y],
                   Y_dummy_train,
                   epochs = gcn_best['n_epochs'],
                   batch_size = gcn_best['batch_size'],
                   shuffle = True,
                   validation_data = ([X_atoms_cold,X_bonds_cold,X_edges_cold,Y_cold],Y_dummy_cold),
                   callbacks=[es2,rlr2]
                  )
    #Predict Embeddings
    embeddings_cold = gcn_model.predict([X_atoms_cold,X_bonds_cold,X_edges_cold])
    embeddings_train = gcn_model.predict([X_atoms_train, X_bonds_train, X_edges_train])
        
    #Prepare data for XGBoost
    dmatrix_train = class_XGB.to_xgb_input(Y,embeddings_train)
    dmatrix_cold = class_XGB.to_xgb_input(Y_cold,embeddings_cold)
    evalist = [(dmatrix_train,'train'),(dmatrix_cold,'eval')]
    xgb_model = class_XGB.build_model(dmatrix_train,evalist,300)
    xgb_pred_train = xgb_model.predict(dmatrix_train)
    xgb_pred_cold = xgb_model.predict(dmatrix_cold)
    
    if i == 0:
        eval_akt1['Test'] = calculate_metrics(np.array(Y_cold),xgb_pred_cold)
    elif i == 1:
        eval_akt1['Random'] = calculate_metrics(np.array(Y_cold),xgb_pred_cold)

LAYER 0
LAYER 1
LAYER 2


  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Train on 1834 samples, validate on 306 samples
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25

Epoch 00011: ReduceLROnPlateau reducing learning rate to 0.0004055006429553032.
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25

Epoch 00019: ReduceLROnPlateau reducing learning rate to 0.0002027503214776516.
Epoch 20/25
Epoch 21/25
Epoch 22/25

Epoch 00022: ReduceLROnPlateau reducing learning rate to 0.0001013751607388258.
Epoch 23/25
Epoch 24/25
Epoch 25/25
LAYER 0
LAYER 1
LAYER 2


  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Train on 1819 samples, validate on 321 samples
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25

Epoch 00014: ReduceLROnPlateau reducing learning rate to 0.0004055006429553032.
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25

Epoch 00021: ReduceLROnPlateau reducing learning rate to 0.0002027503214776516.
Epoch 22/25
Epoch 23/25

Epoch 00023: ReduceLROnPlateau reducing learning rate to 0.0001013751607388258.
Epoch 24/25
Epoch 25/25

Epoch 00025: ReduceLROnPlateau reducing learning rate to 5.06875803694129e-05.


In [20]:
eval_akt1 = pd.DataFrame(eval_akt1).T
eval_akt1.to_csv('../../../../Desktop/binding/thesis english/Results/3-One-Shot/Online/akt1.csv')
eval_akt1

Unnamed: 0,roc_auc,tn,fp,fn,tp,map,precision,recall,accuracy
Test,0.874115,168.0,22.0,39.0,77.0,0.795934,0.777778,0.663793,0.800654
Random,0.881458,149.0,41.0,28.0,103.0,0.818932,0.715278,0.78626,0.785047


In [21]:
eval_pi3k = {}
es2 = EarlyStopping(monitor='loss',patience=15, min_delta=0)
rlr2 = ReduceLROnPlateau(monitor='loss',factor=0.5, patience=2, verbose=1, min_lr=0.000000001)
for i in range(len(train_list_pi3k)):
    if i == 1:
        X_atoms_cold,X_bonds_cold,X_edges_cold = class_GCN.dataframe_to_gcn_input(val_list_pi3k[i])
        Y_cold = Y_val_pi3k
        Y_dummy_cold = np.empty((X_atoms_cold.shape[0],gcn_best['dense_size'][2]+1))
        X_atoms_train, X_bonds_train, X_edges_train = class_GCN.dataframe_to_gcn_input(train_list_pi3k[i])
        Y = Y_train_pi3k
        Y_dummy_train = np.empty((X_atoms_train.shape[0],gcn_best['dense_size'][2]+1))
    else:
        X_atoms_cold,X_bonds_cold,X_edges_cold = class_GCN.dataframe_to_gcn_input(val_list_pi3k[i])
        Y_cold = val_list_pi3k[i].Binary
        Y_dummy_cold = np.empty((X_atoms_cold.shape[0],gcn_best['dense_size'][2]+1))
        X_atoms_train, X_bonds_train, X_edges_train = class_GCN.dataframe_to_gcn_input(train_list_pi3k[i])
        Y = train_list_pi3k[i].Binary
        Y_dummy_train = np.empty((X_atoms_train.shape[0],gcn_best['dense_size'][2]+1))
    
    gcn_encoder = class_GCN.build_encoder()
    gcn_model = class_GCN.build_model(gcn_encoder)
    gcn_mining = class_GCN.build_mining(gcn_model)
    gcn_mining.fit([X_atoms_train,X_bonds_train,X_edges_train,Y],
                   Y_dummy_train,
                   epochs = gcn_best['n_epochs'],
                   batch_size = gcn_best['batch_size'],
                   shuffle = True,
                   validation_data = ([X_atoms_cold,X_bonds_cold,X_edges_cold,Y_cold],Y_dummy_cold),
                   callbacks=[es2,rlr2]
                  )
    #Predict Embeddings
    embeddings_cold = gcn_model.predict([X_atoms_cold,X_bonds_cold,X_edges_cold])
    embeddings_train = gcn_model.predict([X_atoms_train, X_bonds_train, X_edges_train])
        
    #Prepare data for XGBoost
    dmatrix_train = class_XGB.to_xgb_input(Y,embeddings_train)
    dmatrix_cold = class_XGB.to_xgb_input(Y_cold,embeddings_cold)
    evalist = [(dmatrix_train,'train'),(dmatrix_cold,'eval')]
    xgb_model = class_XGB.build_model(dmatrix_train,evalist,300)
    xgb_pred_train = xgb_model.predict(dmatrix_train)
    xgb_pred_cold = xgb_model.predict(dmatrix_cold)
    
    if i == 0:
        eval_pi3k['Test'] = calculate_metrics(np.array(Y_cold),xgb_pred_cold)
    elif i == 1:
        eval_pi3k['Random'] = calculate_metrics(np.array(Y_cold),xgb_pred_cold)

LAYER 0
LAYER 1
LAYER 2


  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Train on 3217 samples, validate on 537 samples
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25

Epoch 00010: ReduceLROnPlateau reducing learning rate to 0.0004055006429553032.
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25

Epoch 00019: ReduceLROnPlateau reducing learning rate to 0.0002027503214776516.
Epoch 20/25
Epoch 21/25

Epoch 00021: ReduceLROnPlateau reducing learning rate to 0.0001013751607388258.
Epoch 22/25
Epoch 23/25

Epoch 00023: ReduceLROnPlateau reducing learning rate to 5.06875803694129e-05.
Epoch 24/25
Epoch 25/25
LAYER 0
LAYER 1
LAYER 2


  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Train on 3190 samples, validate on 564 samples
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25

Epoch 00011: ReduceLROnPlateau reducing learning rate to 0.0004055006429553032.
Epoch 12/25
Epoch 13/25
Epoch 14/25

Epoch 00014: ReduceLROnPlateau reducing learning rate to 0.0002027503214776516.
Epoch 15/25
Epoch 16/25
Epoch 17/25

Epoch 00017: ReduceLROnPlateau reducing learning rate to 0.0001013751607388258.
Epoch 18/25
Epoch 19/25

Epoch 00019: ReduceLROnPlateau reducing learning rate to 5.06875803694129e-05.
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25

Epoch 00024: ReduceLROnPlateau reducing learning rate to 2.534379018470645e-05.
Epoch 25/25


In [22]:
eval_pi3k = pd.DataFrame(eval_pi3k).T
eval_pi3k.to_csv('../../../../Desktop/binding/thesis english/Results/3-One-Shot/Online/pi3k.csv')
eval_pi3k

Unnamed: 0,roc_auc,tn,fp,fn,tp,map,precision,recall,accuracy
Test,0.872332,310.0,50.0,61.0,116.0,0.732184,0.698795,0.655367,0.793296
Random,0.87648,291.0,42.0,71.0,160.0,0.840004,0.792079,0.692641,0.799645
