In [None]:
import tensorflow as tf
from ccgnet import experiment as exp
from ccgnet.finetune import *
from ccgnet import layers
from ccgnet.layers import *
import numpy as np
import time
from sklearn.metrics import balanced_accuracy_score
from ccgnet.Dataset import Dataset, DataLoader
from Featurize.Coformer import Coformer
from Featurize.Cocrystal import Cocrystal

In [None]:
class CCGNet(object):
    def build_model(self, inputs, is_training, global_step):
        V = inputs[0]
        A = inputs[1]
        labels = inputs[2]
        mask = inputs[3]
        graph_size = inputs[4]
        tags = inputs[5]
        global_state = inputs[6]
        subgraph_size = inputs[7]
        # message passing 
        V, global_state = CCGBlock(V, A, global_state, subgraph_size, no_filters=64, mask=mask, num_updates=global_step, is_training=is_training)
        V, global_state = CCGBlock(V, A, global_state, subgraph_size, no_filters=16, mask=mask, num_updates=global_step, is_training=is_training)
        V, global_state = CCGBlock(V, A, global_state, subgraph_size, no_filters=64, mask=mask, num_updates=global_step, is_training=is_training)
        V, global_state = CCGBlock(V, A, global_state, subgraph_size, no_filters=16, mask=mask, num_updates=global_step, is_training=is_training)
        # readout
        V = ReadoutFunction(V, global_state, graph_size, num_head=2, is_training=is_training)
        # predict
        with tf.compat.v1.variable_scope('Predictive_FC_1') as scope:
            V = layers.make_embedding_layer(V, 256)
            V = layers.make_bn(V, is_training, mask=None, num_updates=global_step)
            V = tf.nn.relu(V)
            V = tf.compat.v1.layers.dropout(V, 0.457, training=is_training)
        with tf.compat.v1.variable_scope('Predictive_FC_2') as scope:
            V = layers.make_embedding_layer(V, 1024)
            V = layers.make_bn(V, is_training, mask=None, num_updates=global_step)
            V = tf.nn.relu(V)
            V = tf.compat.v1.layers.dropout(V, 0.457, training=is_training)
        with tf.compat.v1.variable_scope('Predictive_FC_3') as scope:
            V = layers.make_embedding_layer(V, 256)
            V = layers.make_bn(V, is_training, mask=None, num_updates=global_step)
            V = tf.nn.relu(V)
            V = tf.compat.v1.layers.dropout(V, 0.457, training=is_training)
        out = layers.make_embedding_layer(V, 2, name='final')
        return out, labels

In [None]:
data1 = Dataset('data/CC_Table/ECC&CC_Table.tab', mol_blocks_dir='data/Mol_Blocks.dir')
data1.make_graph_dataset(Desc=1, A_type='OnlyCovalentBond', hbond=0, pipi_stack=0, contact=0, make_dataframe=True)

In [None]:
data2 = Dataset('data/CC_Table/CC_Table-DataAug.tab', mol_blocks_dir='data/Mol_Blocks.dir')
data2.make_graph_dataset(Desc=1, A_type='OnlyCovalentBond', hbond=0, pipi_stack=0, contact=0, make_dataframe=True)

In [None]:
fold_10 = eval(open('data/Fold_10.dir').read())
nico = eval(open('data/Test/Test_Samples/Nicotinamide_Test.list').read())
carb = eval(open('data/Test/Test_Samples/Carbamazepine_Test.list').read())
indo = eval(open('data/Test/Test_Samples/Indomethacin_Test.list').read())
para = eval(open('data/Test/Test_Samples/Paracetamol_Test.list').read())
pyre = eval(open('data/Test/Test_Samples/Pyrene_Test.list').read())
test = list(set(nico + carb + indo + para + pyre))
apis = list(set(nico + indo + para + carb))
cl20 = eval(open('data/Test/Test/Test_Samples/CL-20_Test.list').read())
tnt = eval(open('data/Test/Test_Samples/TNT_Test.list').read())

In [None]:
start = time.time()
snapshot_path = './snapshot/'
model_name = 'CCGNet'
dataset_name = 'CC_Dataset'
for fold in ['fold-{}'.format(i) for i in range(10)]:
    print('\n################ {} ################'.format(fold))
    train_data1, valid_data1, test_data1 = data1.split(train_samples=fold_10[fold]['train'], valid_samples=fold_10[fold]['valid'], with_test=True, test_samples=fold_10['test'])
    train_data2, valid_data2 = data2.split(train_samples=fold_10[fold]['train'], valid_samples=fold_10[fold]['valid'], with_test=False)
    train_data = []
    for ix, i in enumerate(train_data1):
        train_data.append(np.concatenate([i, train_data2[ix]]))
    del train_data2
    del train_data1
    tf.reset_default_graph()
    model = CCGNet()
    model = exp.Model(model, train_data, valid_data1, with_test=True, test_data=test_data1, snapshot_path=snapshot_path, use_subgraph=True,
                      model_name=model_name, dataset_name=dataset_name+'/time_{}'.format(fold[-1]))
    history = model.fit(num_epoch=100, save_info=True, save_att=True, silence=0, train_batch_size=128,
                        metric='acc')
end = time.time()
time_gap = end-start
h = time_gap//3600
h_m = time_gap%3600
m = h_m//60
s = h_m%60
print('{}h {}m {}s'.format(int(h),int(m),round(s,2)))

In [None]:
from Featurize.MetricsReport import model_metrics_report

model_metrics_report('{}/{}/'.format(snapshot_path, model_name), tofixed=2)

In [None]:
from ccgnet.parselog import ParseTestLog, ParseTestLogEnsemble
import glob

val_list = glob.glob('{}/{}/{}/*/*val*'.format(snapshot_path, model_name, dataset_name))
l = []
for i in val_list:
    l.append(ParseTestLog(i))

In [None]:
ens = ParseTestLogEnsemble(l)
print('####### Mean ########')
ens.Reports
print('####### Bagging ########')
ens_bagging = ens.Bagging