### Imports

In [9]:
import importlib
from matplotlib.pyplot import figure
from AIBind.import_modules import *
from AIBind import AIBind

In [10]:
importlib.reload(AIBind)

<module 'AIBind.AIBind' from '/home/sars-busters-consolidated/GitCode/AIBind/AIBind.py'>

### GPU Settings

In [11]:
str(subprocess.check_output('nvidia-smi', shell = True)).split('\\n')

["b'Wed Jul 14 03:31:31 2021       ",
 '+-----------------------------------------------------------------------------+',
 '| NVIDIA-SMI 418.87.01    Driver Version: 418.87.01    CUDA Version: 10.1     |',
 '|-------------------------------+----------------------+----------------------+',
 '| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |',
 '| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |',
 '|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |',
 '| N/A   61C    P0    31W /  70W |      0MiB / 15079MiB |      0%      Default |',
 '+-------------------------------+----------------------+----------------------+',
 '|   1  Tesla T4            Off  | 00000000:00:05.0 Off |                    0 |',
 '| N/A   76C    P0    33W /  70W |  14387MiB / 15079MiB |      0%      Default |',
 '+-------------------------------+----------------------+----------------------+',
 '|   2  Tesla T4            Off  | 00

In [12]:
os.environ["CUDA_VISIBLE_DEVICES"] = "1"

### VecNet Object Definition

In [13]:
# Read In drugs and targets dataframes to pass to AIBind after changing column names 
with open('/data/sars-busters/Mol2Vec/chemicals_01_w_embed.pkl', 'rb') as file: 
    drugs = pkl.load(file)
    
with open('/data/sars-busters/Mol2Vec/amino_01_w_embed.pkl', 'rb') as file: 
    targets = pkl.load(file)

# Ensure correct column names    
drugs = drugs.rename(columns = {'Label' : 'InChiKey'})
targets = targets.rename(columns = {'Label' : 'target_aa_code'})

In [None]:
targets_test = []
targets_validation = []
edges_test = []
edges_validation = []
train_sets = []

for run_number in tqdm(range(5)):
    
    targets_test.append(pd.read_csv('/data/sars-busters-consolidated/GitData/VecNet_Unseen_Nodes/test_unseen_nodes_' + str(run_number) + '.csv'))
    edges_test.append(pd.read_csv('/data/sars-busters-consolidated/GitData/VecNet_Unseen_Nodes/test_unseen_edges_' + str(run_number) + '.csv'))    
    targets_validation.append(pd.read_csv('/data/sars-busters-consolidated/GitData/VecNet_Unseen_Nodes/validation_unseen_nodes_' + str(run_number) + '.csv'))    
    edges_validation.append(pd.read_csv('/data/sars-busters-consolidated/GitData/VecNet_Unseen_Nodes/validation_unseen_edges_' + str(run_number) + '.csv'))    
    train_sets.append(pd.read_csv('/data/sars-busters-consolidated/GitData/VecNet_Unseen_Nodes/train_' + str(run_number) + '.csv'))    

In [14]:
vecnet_object = AIBind.AIBind(interactions_location = '/data/sars-busters-consolidated/GitData/interactions/Network_Derived_Negatives.csv',
                              interactions = None,
                              interaction_y_name = 'Y',

                              absolute_negatives_location = None,
                              absolute_negatives = None,

                              drugs_location = None,
                              drugs_dataframe = drugs,
                              drug_inchi_name = 'InChiKey',
                              drug_smile_name = 'SMILE',

                              targets_location = None,
                              targets_dataframe = targets, 
                              target_seq_name = 'target_aa_code',

                              mol2vec_location = None,
                              mol2vec_model = None,

                              protvec_location = None, 
                              protvec_model = None,

                              nodes_test = targets_test, 
                              nodes_validation = targets_validation, 

                              edges_test = edges_test, 
                              edges_validation = edges_validation, 

                              model_out_dir = '/data/sars-busters-consolidated/siamese/KF-Final/',

                              debug = False)

### Prediction on Unseen Nodes

In [8]:
with open('/data/sars-busters-consolidated/GitData/VecNet_unseen_nodes.pickle', 'rb') as file:
    vecnet_object = pkl.load(file) 

In [24]:
## Expected to have 'InChiKey', 'SMILE', and 'target_aa_code'

nodes_df = pd.read_csv('csv_file_path')

# Example entries
#nodes_df['InChiKey'] = ['HUMNYLRZRPPJDN-UHFFFAOYSA-N']
#nodes_df['SMILE'] = ['C1=CC=C(C=C1)C=O']
#nodes_df['target_aa_code'] = sars_targets['Sequence'].tolist()[0]

In [26]:
unseen_nodes_example_5fold_average = vecnet_object.get_fold_averaged_prediction_results( model_name = None,
                                                                                     version_number = None,
                                                                                     model_paths = [],
                                                                                     optimal_validation_model = None,
                                                                                     test_sets = [nodes_df],
                                                                                     get_drug_embed = True,
                                                                                     get_target_embed = True,
                                                                                     drug_filter_list = [],
                                                                                     target_filter_list = [],
                                                                                     return_dataframes = True )


Testing on model :  ./VecNet_Unseen_Nodes/Run_0/vecnet_ds2_5_fold_unseen_nodes_v00_run0_06-15_04h42_epoch_19_idx_0.model
filtered_nodes_test :  (1, 3)
Drugs :  1
Targets :  1


0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

X0, X1 :  (1, 100) (1, 300)
Testing on model :  ./VecNet_Unseen_Nodes/Run_1/vecnet_ds2_5_fold_unseen_nodes_v00_run1_06-15_04h46_epoch_19_idx_0.model
filtered_nodes_test :  (1, 3)
Drugs :  1
Targets :  1


0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

X0, X1 :  (1, 100) (1, 300)
Testing on model :  ./VecNet_Unseen_Nodes/Run_2/vecnet_ds2_5_fold_unseen_nodes_v00_run2_06-15_04h50_epoch_19_idx_0.model
filtered_nodes_test :  (1, 3)
Drugs :  1
Targets :  1


0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

X0, X1 :  (1, 100) (1, 300)
Testing on model :  ./VecNet_Unseen_Nodes/Run_3/vecnet_ds2_5_fold_unseen_nodes_v00_run3_06-15_04h54_epoch_19_idx_0.model
filtered_nodes_test :  (1, 3)
Drugs :  1
Targets :  1


0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

X0, X1 :  (1, 100) (1, 300)
Testing on model :  ./VecNet_Unseen_Nodes/Run_4/vecnet_ds2_5_fold_unseen_nodes_v00_run4_06-15_04h56_epoch_19_idx_0.model
filtered_nodes_test :  (1, 3)
Drugs :  1
Targets :  1


0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

X0, X1 :  (1, 100) (1, 300)
unseen_targets_pred :  (1,)
list :  1


In [3]:
unseen_nodes_example_5fold_average[0].to_csv('dump_file_csv')