In [None]:
import matplotlib.pyplot as plt
import os
import glob
import numpy as np
import pandas as pd
from keras.callbacks import EarlyStopping, ReduceLROnPlateau

from utils.model_builders import GCN
from utils.model_builders import calculate_metrics

# Build/Load model

In [None]:
es = EarlyStopping(monitor='loss', patience=8, min_delta=0)
rlr = ReduceLROnPlateau(monitor='loss', factor=0.5, patience=4, verbose=1, min_lr=0.0000001)

gcn_model_params = {
    "num_layers": 3,
    "max_atoms": 70,
    "num_atom_features": 62,
    "num_atom_features_original": 62,
    "num_bond_features": 6,
    "max_degree": 5,
    "conv_width": [32, 64, 96],
    "fp_length": [96, 96, 96],
    "activ_enc": "selu",
    "activ_dec": "selu",
    "learning_rates": [0.001, 0.001, 0.001],
    "learning_rates_fp": [0.005, 0.005, 0.005],
    "losses_conv": {
        "neighbor_output": "mean_squared_error",
        "self_output": "mean_squared_error",
    },
    "lossWeights": {"neighbor_output": 1.0, "self_output": 1.0},
    "metrics": "mse",
    "loss_fp": "mean_squared_error",
    "enc_layer_names": ["enc_1", "enc_2", "enc_3"],
    'callbacks': [es, rlr],
    'adam_decay': 0.0005329142291371636,
    'beta': 5,
    'p': 0.004465204118126482
}

In [None]:
gcn = GCN(gcn_model_params)
model = gcn.build_combined_onehot()

# Load Data

In [None]:
base_path = '/Users/panos/Desktop/biolab/kinase_binding/learning'
target = 'p38'

In [None]:
# no need for manual changes in this cell
train_files = os.path.join(base_path, f'data/{target}/data_crossval/fold_{{}}/train_{{}}.csv')
val_files = os.path.join(base_path, f'data/{target}/data_crossval/fold_{{}}/val_cold_{{}}.csv')
weight_files = os.path.join(base_path, f'data/{target}/{target}_AutoEnc_Model/First Run/Weights/weights_model_{{}}.h5')

In [None]:
train_sets = [pd.read_csv(train_files.format(i,i), index_col = 0) if 'Unnamed: 0' in pd.read_csv(train_files.format(i,i)) else pd.read_csv(train_files.format(i,i)) for i in range(1,8)]
val_sets = [pd.read_csv(val_files.format(i,i), index_col = 0) if 'Unnamed: 0' in pd.read_csv(val_files.format(i,i)) else pd.read_csv(val_files.format(i,i)) for i in range(1,8)]

# predict

In [None]:
preds = []
for i, df_val in enumerate(val_sets):
    y_true = df_val.Binary
    input_data = gcn.dataframe_to_gcn_input(df_val)
    model.load_weights(weight_files.format(i,i))
    preds.append(model.predict(input_data))

In [None]:
dfs = []
for i, (y_pred, df_val) in enumerate(zip(preds, val_sets)):
    print(f'fold {i}\n')
    y_true = df_val.Binary
    dfs.append(calculate_metrics(y_true.values, y_pred.squeeze(), plots=True))
    metrics = pd.DataFrame(dfs)
metrics