In [1]:
# TOX21
tox21_tasks = ['NR-AR', 'NR-AR-LBD', 'NR-AhR', 'NR-Aromatase', 'NR-ER', 'NR-ER-LBD','NR-PPAR-gamma', 
               'SR-ARE', 'SR-ATAD5', 'SR-HSE', 'SR-MMP', 'SR-p53']

In [2]:
# load splitted csv data
import deepchem as dc

loader = dc.data.CSVLoader(tox21_tasks, id_field=["ID"], feature_field="SMILES", featurizer=dc.feat.CircularFingerprint())

ecfp_train = loader.create_dataset('../data/split_data/tox21_train.csv')
ecfp_valid = loader.create_dataset('../data/split_data/tox21_valid.csv')
ecfp_test = loader.create_dataset('../data/split_data/tox21_test.csv')



In [3]:
# ID
id_train = ecfp_train.ids
id_valid = ecfp_valid.ids
id_test = ecfp_test.ids
# ECFP
ecfp_train = ecfp_train.X
ecfp_valid = ecfp_valid.X
ecfp_test = ecfp_test.X

In [4]:
# tensorflow gpu 
import os
import sys
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "3"
sys.path.append('../code/')

import tensorflow as tf
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        tf.config.experimental.set_virtual_device_configuration(gpus[0], [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=1024*10)])
    except RuntimeError as e:
        print(e)
from tensorflow.keras import backend as K
from tensorflow.keras.backend import clear_session

In [5]:
# load preprocessed numpy data
import model_util

#faeture, label
train_feature, label_train = model_util.load_data('tox21', 'train')
valid_feature, label_valid = model_util.load_data('tox21', 'valid')
test_feature, label_test = model_util.load_data('tox21', 'test')

In [6]:
import numpy as np

def custom_run_batch(data, size, func):
    result = []
    for i in range(len(data[0])//size):
        result.append(func([data[j][i*size:i*size+size] for j in range(len(data))])[0])
    result.append(func([data[j][i*size+size:len(data[0])] for j in range(len(data))])[0])
    
    return np.concatenate(result)

In [7]:
# Single
import model_frame
import data_config as cf

single_train_list = []
single_valid_list = []
single_test_list = []

for i in range(len(tox21_tasks)):
    model = model_frame.make_model(1)
    model_name = 'tox21_single_'+str(i)

    model.load_weights(os.path.join(cf.result_dir, model_name+'.h5'))
    get_layer_output = K.function([model.input], 
                                  [model.get_layer('i_out0').output])
    
    single_train_list.append(custom_run_batch(train_feature, 2048, get_layer_output))
    single_valid_list.append(custom_run_batch(valid_feature, 512, get_layer_output))
    single_test_list.append(custom_run_batch(test_feature, 512, get_layer_output))
    
    clear_session()

In [8]:
# Multi
multi_train_list = []
multi_valid_list = []
multi_test_list = []

model = model_frame.make_model(len(tox21_tasks))
model_name = 'tox21_multi'
model.load_weights(os.path.join(cf.result_dir, model_name+'.h5'))

for i in range(len(tox21_tasks)):
    get_layer_output = K.function([model.input], 
                                  [model.get_layer('i_out'+str(i)).output])
    
    multi_train_list.append(custom_run_batch(train_feature, 2048, get_layer_output))
    multi_valid_list.append(custom_run_batch(valid_feature, 512, get_layer_output))
    multi_test_list.append(custom_run_batch(test_feature, 512, get_layer_output))
    
    clear_session()

In [9]:
# Multi
model = model_frame.make_model(len(tox21_tasks))
model_name = 'tox21_multi'
model.load_weights(os.path.join(cf.result_dir, model_name+'.h5'))

get_layer_output = K.function([model.input], 
                              [model.get_layer('dense_5').output])
    
multi_train = custom_run_batch(train_feature, 2048, get_layer_output)
multi_valid = custom_run_batch(valid_feature, 512, get_layer_output)
multi_test = custom_run_batch(test_feature, 512, get_layer_output)

clear_session()

In [10]:
# NR
NR_train_list = []
NR_valid_list = []
NR_test_list = []

model = model_frame.make_model(7)
model_name = 'tox21_NR'
model.load_weights(os.path.join(cf.result_dir, model_name+'.h5'))

for i in range(7):
    get_layer_output = K.function([model.input], 
                                  [model.get_layer('i_out'+str(i)).output])
    
    NR_train_list.append(custom_run_batch(train_feature, 2048, get_layer_output))
    NR_valid_list.append(custom_run_batch(valid_feature, 512, get_layer_output))
    NR_test_list.append(custom_run_batch(test_feature, 512, get_layer_output))
    
    clear_session()

In [11]:
# SR
SR_train_list = []
SR_valid_list = []
SR_test_list = []

model = model_frame.make_model(5)
model_name = 'tox21_SR'
model.load_weights(os.path.join(cf.result_dir, model_name+'.h5'))

for i in range(5):
    get_layer_output = K.function([model.input], 
                                  [model.get_layer('i_out'+str(i)).output])
    
    SR_train_list.append(custom_run_batch(train_feature, 2048, get_layer_output))
    SR_valid_list.append(custom_run_batch(valid_feature, 512, get_layer_output))
    SR_test_list.append(custom_run_batch(test_feature, 512, get_layer_output))
    
    clear_session()

In [12]:
np.savez_compressed('./tox21_embed_data/'+'id', 
                    train = id_train, 
                    valid = id_valid, 
                    test = id_test)

np.savez_compressed('./tox21_embed_data/'+'label', 
                    train = label_train, 
                    valid = label_valid, 
                    test = label_test)

np.savez_compressed('./tox21_embed_data/'+'ecfp', 
                    train = ecfp_train, 
                    valid = ecfp_valid, 
                    test = ecfp_test)

for i in range(len(tox21_tasks)):
    np.savez_compressed('./tox21_embed_data/'+'single'+str(i), 
                        train = single_train_list[i], 
                        valid = single_valid_list[i],
                        test = single_test_list[i])
    np.savez_compressed('./tox21_embed_data/'+'multi'+str(i), 
                        train = multi_train_list[i], 
                        valid = multi_valid_list[i],
                        test = multi_test_list[i])

for i in range(7):
    np.savez_compressed('./tox21_embed_data/'+'NR'+str(i), 
                        train = NR_train_list[i], 
                        valid = NR_valid_list[i],
                        test = NR_test_list[i])

for i in range(5):
    np.savez_compressed('./tox21_embed_data/'+'SR'+str(i), 
                        train = SR_train_list[i], 
                        valid = SR_valid_list[i],
                        test = SR_test_list[i])
    
np.savez_compressed('./tox21_embed_data/'+'multi', 
                    train = multi_train, 
                    valid = multi_valid, 
                    test = multi_test)

exit()