In [77]:
import pandas as pd
from bicm import BipartiteGraph
import numpy as np
from tqdm import tqdm
import csv
import itertools 
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, f1_score, classification_report
from sklearn.metrics import roc_curve, roc_auc_score, precision_recall_curve, average_precision_score
from sklearn.metrics import confusion_matrix, f1_score, classification_report
import math

### Functions

In [85]:
def dump_degree_sequences(train,test,fold=0,unseen_folder='VecNet_Unseen_Nodes'):
    
    ligands = list(set(train['InChiKey'].tolist()))
    targets = list(set(train['target_aa_code'].tolist()))
    
    ligands_degree_dict = dict()

    for inchikey_chem in tqdm(ligands):
        sum_df = train[train['InChiKey'] == inchikey_chem]
        ligands_degree_dict[inchikey_chem] = dict()
        ligands_degree_dict[inchikey_chem]['deg_0'] = len(sum_df[sum_df['Y'] == 0])
        ligands_degree_dict[inchikey_chem]['deg_1'] = len(sum_df[sum_df['Y'] == 1])
        
    targets_degree_dict = dict()

    for aa_target in tqdm(targets):
        sum_df = train[train['target_aa_code'] == aa_target]
        targets_degree_dict[aa_target] = dict()
        targets_degree_dict[aa_target]['deg_0'] = len(sum_df[sum_df['Y'] == 0])
        targets_degree_dict[aa_target]['deg_1'] = len(sum_df[sum_df['Y'] == 1])
        
    degree_train_1_0_ligands = [ligands_degree_dict[key_val]['deg_1'] for key_val in tqdm(ligands_degree_dict.keys())]
    degree_train_0_1_ligands = [ligands_degree_dict[key_val]['deg_0'] for key_val in tqdm(ligands_degree_dict.keys())]

    degree_train_1_0_targets = [targets_degree_dict[key_val]['deg_1'] for key_val in tqdm(targets_degree_dict.keys())]
    degree_train_0_1_targets = [targets_degree_dict[key_val]['deg_0'] for key_val in tqdm(targets_degree_dict.keys())]
    
    with open('../data/sars-busters-consolidated/GitData/' + unseen_folder + '/Degree_Sequences/degreetrain10ligands_' + str(fold) + '.txt', 'w') as file:
        for degree in degree_train_1_0_ligands:
            file.write("%i\n" % degree)
        
    file.close()
    
    with open('../data/sars-busters-consolidated/GitData/' + unseen_folder + '/Degree_Sequences/degreetrain01ligands_' + str(fold) + '.txt', 'w') as file:
        for degree in degree_train_0_1_ligands:
            file.write("%i\n" % degree)
        
    file.close()
    
    with open('../data/sars-busters-consolidated/GitData/' + unseen_folder + '/Degree_Sequences/degreetrain10targets_' + str(fold) + '.txt', 'w') as file:
        for degree in degree_train_1_0_targets:
            file.write("%i\n" % degree)
        
    file.close()
    
    with open('../data/sars-busters-consolidated/GitData/' + unseen_folder + '/Degree_Sequences/degreetrain01targets_' + str(fold) + '.txt', 'w') as file:
        for degree in degree_train_0_1_targets:
            file.write("%i\n" % degree)
        
    file.close()
    
    textfile = open('../data/sars-busters-consolidated/GitData/' + unseen_folder + '/Degree_Sequences/ligands_' + str(fold) + '.txt', "w")
    for element in ligands:
        textfile.write(element + "\n")
    textfile.close()
    
    textfile = open('../data/sars-busters-consolidated/GitData/' + unseen_folder + '/Degree_Sequences/targets_' + str(fold) + '.txt', "w")
    for element in targets:
        textfile.write(element + "\n")
    textfile.close()
    
    return 

In [81]:
def get_configuration_model_performance(train,test,ligand_file_path,target_file_path,summat10_file_path,summat01_file_path):
    
    text_file = open(ligand_file_path, "r") # Rows of the adjacency matrix in order
    ligands = text_file.readlines()

    text_file = open(target_file_path, "r") # Columns of the adjacency matrix in order 
    targets = text_file.readlines()
    
    ligands = [j.replace('\n','') for j in tqdm(ligands)]
    targets = [j.replace('\n','') for j in tqdm(targets)]
    
    number_ligands = len(ligands)
    number_targets = len(targets)
    
    train_pos = train[train['Y'] == 1]
    train_neg = train[train['Y'] == 0]
    
    pos_deg_0_ligands = []
    pos_deg_0_targets = []
    neg_deg_0_ligands = []
    neg_deg_0_targets = []
    
    ligand_degree_ratio = dict()
    ligand_all_average = []

    for ligand in tqdm(ligands):
        pos_deg = len(train_pos[train_pos['InChiKey'] == ligand])
        neg_deg = len(train_neg[train_neg['InChiKey'] == ligand])
        ligand_degree_ratio[ligand] = dict()
        ligand_degree_ratio[ligand]['deg_ratio'] = pos_deg / (pos_deg + neg_deg)
        ligand_degree_ratio[ligand]['deg_avg'] = pos_deg / number_targets 
        ligand_all_average.append(pos_deg / number_targets)
        if pos_deg == 0:
            pos_deg_0_ligands.append(ligand)
        if neg_deg == 0:
            neg_deg_0_ligands.append(ligand)
    
    ligands_all_avg = sum(ligand_all_average) / number_ligands
    
    targets_degree_ratio = dict()
    target_all_average = []

    for target in tqdm(targets):
        pos_deg = len(train_pos[train_pos['target_aa_code'] == target])
        neg_deg = len(train_neg[train_neg['target_aa_code'] == target])
        targets_degree_ratio[target] = dict()
        targets_degree_ratio[target]['deg_ratio'] = pos_deg / (pos_deg + neg_deg)
        targets_degree_ratio[target]['deg_avg'] = pos_deg / number_ligands
        target_all_average.append(pos_deg / number_ligands)
        if pos_deg == 0:
            pos_deg_0_targets.append(target)
        if neg_deg == 0:
            neg_deg_0_targets.append(target)
    
    targets_all_avg = sum(target_all_average) / number_targets
    
    print('Ligands with positive degree 0: ',len(pos_deg_0_ligands))
    print('Ligands with negative degree 0: ',len(neg_deg_0_ligands))
    print('Targets with positive degree 0: ',len(pos_deg_0_targets))
    print('Targets with negative degree 0: ',len(neg_deg_0_targets))
    
    
    pos_annotated_ligands = list(set(ligands)-set(pos_deg_0_ligands))
    pos_annotated_targets = list(set(targets)-set(pos_deg_0_targets))
    
    neg_annotated_ligands = list(set(ligands)-set(neg_deg_0_ligands))
    neg_annotated_targets = list(set(targets)-set(neg_deg_0_targets))
    
    summat10 = np.loadtxt(open(summat10_file_path, "rb"), delimiter=",", skiprows=0) # Output of MATLAB run 
    summat01 = np.loadtxt(open(summat01_file_path, "rb"), delimiter=",", skiprows=0) # Output of MATLAB run 
    
    test_probabilty_predicted_conditioned = []

    ## Average conditional probability
    conditoned_summat = np.divide(summat10,np.add(summat10,summat01)) # Elementwise pos_deg / (pos_deg + neg_deg)
    conditoned_summat = conditoned_summat[~np.isnan(conditoned_summat)] 
    average_conditional_probability = sum(conditoned_summat) / len(conditoned_summat) # Average over valid conditional probabilities

    drop_nan = []
    
    for index, row in tqdm(test.iterrows()):      
    
        if row['InChiKey'] in pos_annotated_ligands and row['target_aa_code'] in pos_annotated_targets:
            p10 = summat10[ligands.index(row['InChiKey']),targets.index(row['target_aa_code'])]
            p01 = summat01[ligands.index(row['InChiKey']),targets.index(row['target_aa_code'])]
            p10_conditioned = p10 / (p10 + p01)
        
        elif row['InChiKey'] in pos_annotated_ligands and row['target_aa_code'] not in pos_annotated_targets:
            p10_conditioned = ligand_degree_ratio[row['InChiKey']]['deg_ratio']  ## k_+ / (k_+ + k_-)
        
        elif row['InChiKey'] not in pos_annotated_ligands and row['target_aa_code'] in pos_annotated_targets:
            p10_conditioned = targets_degree_ratio[row['target_aa_code']]['deg_ratio'] ## k_+ / (k_+ + k_-)

        else:
            p10_conditioned = average_conditional_probability
            
        if math.isnan(p10_conditioned): 
            drop_nan.append(index)
        else:
            test_probabilty_predicted_conditioned.append(p10_conditioned)
            
        ## Performance on the test dataset

    print('AUC: ', roc_auc_score(test.drop(drop_nan)['Y'].tolist(), test_probabilty_predicted_conditioned))
    print('AUP: ', average_precision_score(test.drop(drop_nan)['Y'].tolist(), test_probabilty_predicted_conditioned))
        
    return roc_auc_score(test.drop(drop_nan)['Y'].tolist(), test_probabilty_predicted_conditioned), average_precision_score(test.drop(drop_nan)['Y'].tolist(), test_probabilty_predicted_conditioned)

### Generated the degree files - Unseen Nodes and Edges

In [19]:
for fold in tqdm(range(5)):

    train = pd.read_csv('../data/sars-busters-consolidated/GitData/VecNet_Unseen_Nodes/train_' + str(fold) + '.csv')
    edges_test = pd.read_csv('../data/sars-busters-consolidated/GitData/VecNet_Unseen_Nodes/test_unseen_edges_' + str(fold) + '.csv')
    nodes_test = pd.read_csv('../data/sars-busters-consolidated/GitData/VecNet_Unseen_Nodes/test_unseen_nodes_' + str(fold) + '.csv')
    dump_degree_sequences(train,nodes_test,fold=fold,unseen_folder='VecNet_Unseen_Nodes')

  0%|          | 0/5 [00:00<?, ?it/s]
  0%|          | 0/2705 [00:00<?, ?it/s][A
  1%|          | 33/2705 [00:00<00:08, 328.57it/s][A
  2%|▏         | 66/2705 [00:00<00:08, 324.92it/s][A
  4%|▎         | 99/2705 [00:00<00:08, 319.51it/s][A
  5%|▍         | 131/2705 [00:00<00:08, 319.10it/s][A
  6%|▌         | 164/2705 [00:00<00:07, 320.32it/s][A
  7%|▋         | 197/2705 [00:00<00:07, 321.93it/s][A
  9%|▊         | 230/2705 [00:00<00:07, 320.68it/s][A
 10%|▉         | 263/2705 [00:00<00:07, 321.78it/s][A
 11%|█         | 296/2705 [00:00<00:07, 321.18it/s][A
 12%|█▏        | 329/2705 [00:01<00:07, 320.56it/s][A
 13%|█▎        | 362/2705 [00:01<00:07, 321.41it/s][A
 15%|█▍        | 395/2705 [00:01<00:07, 320.90it/s][A
 16%|█▌        | 428/2705 [00:01<00:07, 319.24it/s][A
 17%|█▋        | 462/2705 [00:01<00:06, 322.53it/s][A
 18%|█▊        | 495/2705 [00:01<00:06, 321.25it/s][A
 20%|█▉        | 528/2705 [00:01<00:06, 318.47it/s][A
 21%|██        | 560/2705 [00:01<00:06, 3

######## Now run the MATLAB code in Configuration Model - 5 fold folder to generate the matrices. #########

### Get Unseen Nodes and Edges Test Performance

In [83]:
auc_nodes = []
aup_nodes = []
auc_edges =[]
aup_edges = []


for fold in tqdm(range(5)):

    train = pd.read_csv('../data/sars-busters-consolidated/GitData/VecNet_Unseen_Nodes/train_' + str(fold) + '.csv')
    edges_test = pd.read_csv('../data/sars-busters-consolidated/GitData/VecNet_Unseen_Nodes/test_unseen_edges_' + str(fold) + '.csv')
    nodes_test = pd.read_csv('../data/sars-busters-consolidated/GitData/VecNet_Unseen_Nodes/test_unseen_nodes_' + str(fold) + '.csv')

    ligand_file_path = '/data/sars-busters-consolidated/GitData/VecNet_Unseen_Nodes/Degree_Sequences/ligands_' + str(fold) + '.txt'
    target_file_path = '/data/sars-busters-consolidated/GitData/VecNet_Unseen_Nodes/Degree_Sequences/targets_' + str(fold) + '.txt'
    summat10_file_path = '/data/sars-busters-consolidated/GitData/VecNet_Unseen_Nodes/Degree_Sequences/summat10_' + str(fold) + '.csv'
    summat01_file_path = '/data/sars-busters-consolidated/GitData/VecNet_Unseen_Nodes/Degree_Sequences/summat01_' + str(fold) + '.csv'

    auc, aup = get_configuration_model_performance(train,nodes_test,ligand_file_path,target_file_path,summat10_file_path,summat01_file_path)
    auc_nodes.append(auc)
    aup_nodes.append(aup)
    
    auc, aup = get_configuration_model_performance(train,edges_test,ligand_file_path,target_file_path,summat10_file_path,summat01_file_path)
    auc_edges.append(auc)
    aup_edges.append(aup)

  0%|          | 0/5 [00:00<?, ?it/s]
100%|██████████| 2705/2705 [00:00<00:00, 1235634.10it/s]

100%|██████████| 3049/3049 [00:00<00:00, 786738.41it/s]

  0%|          | 0/2705 [00:00<?, ?it/s][A
  1%|▏         | 40/2705 [00:00<00:06, 392.02it/s][A
  3%|▎         | 80/2705 [00:00<00:06, 394.03it/s][A
  4%|▍         | 120/2705 [00:00<00:06, 389.83it/s][A
  6%|▌         | 159/2705 [00:00<00:06, 388.09it/s][A
  7%|▋         | 198/2705 [00:00<00:06, 387.40it/s][A
  9%|▉         | 238/2705 [00:00<00:06, 388.74it/s][A
 10%|█         | 277/2705 [00:00<00:06, 378.40it/s][A
 12%|█▏        | 315/2705 [00:00<00:06, 375.69it/s][A
 13%|█▎        | 353/2705 [00:00<00:06, 372.64it/s][A
 14%|█▍        | 391/2705 [00:01<00:06, 373.27it/s][A
 16%|█▌        | 429/2705 [00:01<00:06, 374.83it/s][A
 17%|█▋        | 467/2705 [00:01<00:06, 372.32it/s][A
 19%|█▊        | 505/2705 [00:01<00:05, 370.00it/s][A
 20%|██        | 543/2705 [00:01<00:05, 371.01it/s][A
 21%|██▏       | 581/2705 [00:01<00

Ligands with positive degree 0:  446
Ligands with negative degree 0:  69
Targets with positive degree 0:  2069
Targets with negative degree 0:  334



0it [00:00, ?it/s][A
280it [00:00, 2796.12it/s][A
562it [00:00, 2809.98it/s][A
854it [00:00, 2856.76it/s][A
1146it [00:00, 2881.26it/s][A
1437it [00:00, 2890.89it/s][A
1727it [00:00, 2891.77it/s][A
2017it [00:00, 2880.09it/s][A
2306it [00:00, 2844.12it/s][A
2591it [00:00, 2736.70it/s][A
2878it [00:01, 2775.02it/s][A
3166it [00:01, 2805.07it/s][A
3456it [00:01, 2831.00it/s][A
3746it [00:01, 2851.22it/s][A
4040it [00:01, 2875.38it/s][A
4330it [00:01, 2879.90it/s][A
4619it [00:01, 2881.12it/s][A
4910it [00:01, 2888.36it/s][A
5200it [00:01, 2891.23it/s][A
5490it [00:01, 2893.14it/s][A
5780it [00:02, 2892.77it/s][A
6070it [00:02, 2850.30it/s][A
6491it [00:02, 2846.63it/s][A

100%|██████████| 2705/2705 [00:00<00:00, 801016.12it/s]

100%|██████████| 3049/3049 [00:00<00:00, 508445.96it/s]

  0%|          | 0/2705 [00:00<?, ?it/s][A
  1%|▏         | 39/2705 [00:00<00:06, 385.95it/s][A


AUC:  0.5
AUP:  0.482976428901556


  3%|▎         | 78/2705 [00:00<00:06, 381.78it/s][A
  4%|▍         | 117/2705 [00:00<00:06, 383.42it/s][A
  6%|▌         | 157/2705 [00:00<00:06, 386.49it/s][A
  7%|▋         | 196/2705 [00:00<00:06, 386.13it/s][A
  9%|▊         | 235/2705 [00:00<00:06, 387.42it/s][A
 10%|█         | 274/2705 [00:00<00:06, 386.42it/s][A
 12%|█▏        | 313/2705 [00:00<00:06, 386.92it/s][A
 13%|█▎        | 353/2705 [00:00<00:06, 388.23it/s][A
 14%|█▍        | 392/2705 [00:01<00:05, 388.42it/s][A
 16%|█▌        | 431/2705 [00:01<00:05, 387.51it/s][A
 17%|█▋        | 470/2705 [00:01<00:05, 387.42it/s][A
 19%|█▉        | 510/2705 [00:01<00:05, 388.73it/s][A
 20%|██        | 549/2705 [00:01<00:05, 388.98it/s][A
 22%|██▏       | 589/2705 [00:01<00:05, 390.10it/s][A
 23%|██▎       | 629/2705 [00:01<00:05, 388.68it/s][A
 25%|██▍       | 668/2705 [00:01<00:05, 388.06it/s][A
 26%|██▌       | 708/2705 [00:01<00:05, 390.67it/s][A
 28%|██▊       | 748/2705 [00:01<00:04, 393.33it/s][A
 29%|██▉   

Ligands with positive degree 0:  446
Ligands with negative degree 0:  69
Targets with positive degree 0:  2069
Targets with negative degree 0:  334



0it [00:00, ?it/s][A
288it [00:00, 2870.99it/s][A
576it [00:00, 2775.78it/s][A
865it [00:00, 2825.50it/s][A
1154it [00:00, 2849.84it/s][A
1445it [00:00, 2869.83it/s][A
1733it [00:00, 2873.07it/s][A
2021it [00:00, 2859.27it/s][A

2606it [00:00, 2892.20it/s][A
2899it [00:01, 2902.02it/s][A
3190it [00:01, 2883.80it/s][A
3693it [00:01, 2874.49it/s][A
 20%|██        | 1/5 [01:02<04:08, 62.06s/it]
100%|██████████| 2525/2525 [00:00<00:00, 1110011.28it/s]

100%|██████████| 2994/2994 [00:00<00:00, 830154.44it/s]

  0%|          | 0/2525 [00:00<?, ?it/s][A


AUC:  0.7041806477014296
AUP:  0.7215158796163658


  2%|▏         | 40/2525 [00:00<00:06, 399.97it/s][A
  3%|▎         | 80/2525 [00:00<00:06, 389.29it/s][A
  5%|▍         | 119/2525 [00:00<00:06, 386.25it/s][A
  6%|▋         | 159/2525 [00:00<00:06, 389.25it/s][A
  8%|▊         | 199/2525 [00:00<00:05, 391.77it/s][A
  9%|▉         | 239/2525 [00:00<00:05, 391.88it/s][A
 11%|█         | 279/2525 [00:00<00:05, 390.31it/s][A
 13%|█▎        | 320/2525 [00:00<00:05, 393.46it/s][A
 14%|█▍        | 360/2525 [00:00<00:05, 395.02it/s][A
 16%|█▌        | 400/2525 [00:01<00:05, 395.92it/s][A
 17%|█▋        | 440/2525 [00:01<00:05, 397.13it/s][A
 19%|█▉        | 480/2525 [00:01<00:05, 396.56it/s][A
 21%|██        | 520/2525 [00:01<00:05, 397.42it/s][A
 22%|██▏       | 560/2525 [00:01<00:04, 397.92it/s][A
 24%|██▍       | 600/2525 [00:01<00:04, 397.82it/s][A
 25%|██▌       | 640/2525 [00:01<00:04, 397.95it/s][A
 27%|██▋       | 680/2525 [00:01<00:04, 397.50it/s][A
 29%|██▊       | 720/2525 [00:01<00:04, 398.11it/s][A
 30%|███    

Ligands with positive degree 0:  492
Ligands with negative degree 0:  59
Targets with positive degree 0:  1988
Targets with negative degree 0:  326



0it [00:00, ?it/s][A
297it [00:00, 2966.02it/s][A
594it [00:00, 2946.67it/s][A
889it [00:00, 2901.14it/s][A
1184it [00:00, 2917.78it/s][A
1483it [00:00, 2942.14it/s][A
1783it [00:00, 2960.77it/s][A
2089it [00:00, 2991.53it/s][A
2391it [00:00, 2998.27it/s][A
2700it [00:00, 3025.56it/s][A
3009it [00:01, 3045.06it/s][A
3314it [00:01, 3035.92it/s][A
3627it [00:01, 3063.19it/s][A
3934it [00:01, 3050.44it/s][A
4242it [00:01, 3059.04it/s][A
4548it [00:01, 3053.72it/s][A
4855it [00:01, 3058.46it/s][A
5162it [00:01, 3061.48it/s][A
5469it [00:01, 3035.82it/s][A
5773it [00:01, 3021.00it/s][A
6076it [00:02, 3003.32it/s][A
6377it [00:02, 2949.53it/s][A
6935it [00:02, 2994.79it/s][A

100%|██████████| 2525/2525 [00:00<00:00, 1246394.92it/s]

100%|██████████| 2994/2994 [00:00<00:00, 822703.50it/s]

  0%|          | 0/2525 [00:00<?, ?it/s][A
  2%|▏         | 41/2525 [00:00<00:06, 405.27it/s][A


AUC:  0.5
AUP:  0.487238644556597


  3%|▎         | 82/2525 [00:00<00:06, 404.25it/s][A
  5%|▍         | 123/2525 [00:00<00:05, 404.22it/s][A
  6%|▋         | 164/2525 [00:00<00:05, 396.44it/s][A
  8%|▊         | 204/2525 [00:00<00:05, 395.84it/s][A
 10%|▉         | 244/2525 [00:00<00:05, 396.03it/s][A
 11%|█         | 284/2525 [00:00<00:05, 394.73it/s][A
 13%|█▎        | 324/2525 [00:00<00:05, 394.88it/s][A
 14%|█▍        | 364/2525 [00:00<00:05, 394.34it/s][A
 16%|█▌        | 404/2525 [00:01<00:05, 395.16it/s][A
 18%|█▊        | 444/2525 [00:01<00:05, 396.22it/s][A
 19%|█▉        | 484/2525 [00:01<00:05, 396.92it/s][A
 21%|██        | 525/2525 [00:01<00:05, 398.27it/s][A
 22%|██▏       | 565/2525 [00:01<00:04, 397.02it/s][A
 24%|██▍       | 605/2525 [00:01<00:04, 396.57it/s][A
 26%|██▌       | 646/2525 [00:01<00:04, 398.62it/s][A
 27%|██▋       | 686/2525 [00:01<00:04, 396.64it/s][A
 29%|██▉       | 726/2525 [00:01<00:04, 397.32it/s][A
 30%|███       | 766/2525 [00:01<00:04, 397.32it/s][A
 32%|███▏  

Ligands with positive degree 0:  492
Ligands with negative degree 0:  59
Targets with positive degree 0:  1988
Targets with negative degree 0:  326



0it [00:00, ?it/s][A
303it [00:00, 3023.95it/s][A
606it [00:00, 2983.05it/s][A
905it [00:00, 2964.61it/s][A
1202it [00:00, 2943.84it/s][A
1498it [00:00, 2948.43it/s][A
1793it [00:00, 2903.37it/s][A
2084it [00:00, 2879.13it/s][A
2372it [00:00, 2873.48it/s][A
2664it [00:00, 2887.24it/s][A
2962it [00:01, 2913.10it/s][A
3254it [00:01, 2903.38it/s][A
3603it [00:01, 2915.53it/s][A
 40%|████      | 2/5 [01:59<02:58, 59.41s/it]
100%|██████████| 3394/3394 [00:00<00:00, 1107559.93it/s]

100%|██████████| 3184/3184 [00:00<00:00, 821774.90it/s]

  0%|          | 0/3394 [00:00<?, ?it/s][A


AUC:  0.719679816844512
AUP:  0.7333036585625492


  1%|          | 35/3394 [00:00<00:09, 346.06it/s][A
  2%|▏         | 70/3394 [00:00<00:10, 331.09it/s][A
  3%|▎         | 104/3394 [00:00<00:10, 326.67it/s][A
  4%|▍         | 137/3394 [00:00<00:10, 324.17it/s][A
  5%|▌         | 170/3394 [00:00<00:09, 325.28it/s][A
  6%|▌         | 204/3394 [00:00<00:09, 329.52it/s][A
  7%|▋         | 238/3394 [00:00<00:09, 329.92it/s][A
  8%|▊         | 272/3394 [00:00<00:09, 324.74it/s][A
  9%|▉         | 305/3394 [00:00<00:09, 323.28it/s][A
 10%|▉         | 338/3394 [00:01<00:09, 324.14it/s][A
 11%|█         | 371/3394 [00:01<00:09, 325.43it/s][A
 12%|█▏        | 404/3394 [00:01<00:09, 320.09it/s][A
 13%|█▎        | 437/3394 [00:01<00:09, 321.92it/s][A
 14%|█▍        | 470/3394 [00:01<00:09, 323.29it/s][A
 15%|█▍        | 503/3394 [00:01<00:08, 323.42it/s][A
 16%|█▌        | 536/3394 [00:01<00:08, 323.04it/s][A
 17%|█▋        | 569/3394 [00:01<00:08, 324.27it/s][A
 18%|█▊        | 602/3394 [00:01<00:08, 324.83it/s][A
 19%|█▊     

Ligands with positive degree 0:  887
Ligands with negative degree 0:  80
Targets with positive degree 0:  2052
Targets with negative degree 0:  273



0it [00:00, ?it/s][A
268it [00:00, 2676.52it/s][A
536it [00:00, 2596.32it/s][A
796it [00:00, 2591.27it/s][A
1068it [00:00, 2639.19it/s][A
1333it [00:00, 2642.77it/s][A
1598it [00:00, 2613.45it/s][A
1862it [00:00, 2620.64it/s][A
2135it [00:00, 2654.05it/s][A
2406it [00:00, 2670.12it/s][A
2680it [00:01, 2689.35it/s][A
2949it [00:01, 2643.34it/s][A
3222it [00:01, 2666.97it/s][A
3495it [00:01, 2682.38it/s][A
3771it [00:01, 2705.06it/s][A
4045it [00:01, 2712.99it/s][A
4324it [00:01, 2734.67it/s][A
4603it [00:01, 2749.94it/s][A
4883it [00:01, 2763.35it/s][A
5161it [00:01, 2765.75it/s][A
5438it [00:02, 2713.49it/s][A
5714it [00:02, 2725.90it/s][A
5992it [00:02, 2740.13it/s][A
6329it [00:02, 2693.93it/s][A

100%|██████████| 3394/3394 [00:00<00:00, 1234967.27it/s]

100%|██████████| 3184/3184 [00:00<00:00, 828710.14it/s]

  0%|          | 0/3394 [00:00<?, ?it/s][A
  1%|          | 36/3394 [00:00<00:09, 357.41it/s][A


AUC:  0.5
AUP:  0.4482540685732343


  2%|▏         | 72/3394 [00:00<00:09, 352.86it/s][A
  3%|▎         | 108/3394 [00:00<00:09, 342.96it/s][A
  4%|▍         | 143/3394 [00:00<00:14, 230.77it/s][A
  5%|▌         | 178/3394 [00:00<00:12, 262.11it/s][A
  6%|▋         | 213/3394 [00:00<00:11, 284.50it/s][A
  7%|▋         | 248/3394 [00:00<00:10, 301.98it/s][A
  8%|▊         | 283/3394 [00:00<00:09, 314.22it/s][A
  9%|▉         | 316/3394 [00:01<00:09, 318.55it/s][A
 10%|█         | 349/3394 [00:01<00:09, 321.83it/s][A
 11%|█▏        | 384/3394 [00:01<00:09, 328.92it/s][A
 12%|█▏        | 419/3394 [00:01<00:08, 334.03it/s][A
 13%|█▎        | 454/3394 [00:01<00:08, 338.66it/s][A
 14%|█▍        | 490/3394 [00:01<00:08, 342.47it/s][A
 15%|█▌        | 525/3394 [00:01<00:08, 343.85it/s][A
 17%|█▋        | 561/3394 [00:01<00:08, 346.67it/s][A
 18%|█▊        | 597/3394 [00:01<00:08, 348.18it/s][A
 19%|█▊        | 632/3394 [00:01<00:07, 347.32it/s][A
 20%|█▉        | 667/3394 [00:02<00:08, 339.95it/s][A
 21%|██    

Ligands with positive degree 0:  887
Ligands with negative degree 0:  80
Targets with positive degree 0:  2052
Targets with negative degree 0:  273



0it [00:00, ?it/s][A
276it [00:00, 2756.35it/s][A
554it [00:00, 2764.78it/s][A
832it [00:00, 2768.98it/s][A
1109it [00:00, 2763.03it/s][A
1386it [00:00, 2759.80it/s][A
1662it [00:00, 2738.64it/s][A
1938it [00:00, 2743.56it/s][A
2220it [00:00, 2765.73it/s][A
2497it [00:00, 2738.47it/s][A
2771it [00:01, 2733.26it/s][A
3048it [00:01, 2743.02it/s][A
3324it [00:01, 2747.40it/s][A
3785it [00:01, 2749.30it/s][A
 60%|██████    | 3/5 [03:19<02:17, 68.70s/it]
100%|██████████| 2271/2271 [00:00<00:00, 1115893.20it/s]

100%|██████████| 2965/2965 [00:00<00:00, 754893.25it/s]

  0%|          | 0/2271 [00:00<?, ?it/s][A


AUC:  0.7349298280590594
AUP:  0.7558771158179316


  2%|▏         | 45/2271 [00:00<00:05, 440.70it/s][A
  4%|▍         | 90/2271 [00:00<00:05, 434.75it/s][A
  6%|▌         | 134/2271 [00:00<00:04, 432.36it/s][A
  8%|▊         | 178/2271 [00:00<00:04, 430.93it/s][A
 10%|▉         | 222/2271 [00:00<00:04, 427.54it/s][A
 12%|█▏        | 265/2271 [00:00<00:04, 428.25it/s][A
 14%|█▎        | 309/2271 [00:00<00:04, 430.21it/s][A
 16%|█▌        | 353/2271 [00:00<00:04, 428.12it/s][A
 17%|█▋        | 397/2271 [00:00<00:04, 429.16it/s][A
 19%|█▉        | 440/2271 [00:01<00:04, 427.98it/s][A
 21%|██▏       | 483/2271 [00:01<00:04, 426.22it/s][A
 23%|██▎       | 526/2271 [00:01<00:04, 426.82it/s][A
 25%|██▌       | 570/2271 [00:01<00:03, 428.59it/s][A
 27%|██▋       | 613/2271 [00:01<00:03, 426.27it/s][A
 29%|██▉       | 656/2271 [00:01<00:03, 425.89it/s][A
 31%|███       | 699/2271 [00:01<00:03, 423.29it/s][A
 33%|███▎      | 742/2271 [00:01<00:03, 423.46it/s][A
 35%|███▍      | 785/2271 [00:01<00:03, 424.35it/s][A
 36%|███▋   

Ligands with positive degree 0:  490
Ligands with negative degree 0:  70
Targets with positive degree 0:  2060
Targets with negative degree 0:  308



0it [00:00, ?it/s][A
294it [00:00, 2931.67it/s][A
605it [00:00, 3033.47it/s][A
919it [00:00, 3080.75it/s][A
1232it [00:00, 3096.88it/s][A
1558it [00:00, 3155.06it/s][A
1888it [00:00, 3201.77it/s][A
2212it [00:00, 3212.64it/s][A
2544it [00:00, 3244.12it/s][A
2869it [00:00, 3237.69it/s][A
3193it [00:01, 3214.73it/s][A
3527it [00:01, 3249.71it/s][A
3862it [00:01, 3278.37it/s][A
4197it [00:01, 3297.47it/s][A
4529it [00:01, 3302.12it/s][A
4860it [00:01, 3295.81it/s][A
5195it [00:01, 3309.55it/s][A
5526it [00:01, 3306.03it/s][A
5857it [00:01, 3291.69it/s][A
6191it [00:01, 3303.41it/s][A
6579it [00:02, 3242.31it/s][A

100%|██████████| 2271/2271 [00:00<00:00, 1161334.36it/s]

100%|██████████| 2965/2965 [00:00<00:00, 799338.69it/s]

  0%|          | 0/2271 [00:00<?, ?it/s][A
  2%|▏         | 44/2271 [00:00<00:05, 439.74it/s][A


AUC:  0.5
AUP:  0.46390028879768963


  4%|▍         | 88/2271 [00:00<00:04, 438.24it/s][A
  6%|▌         | 132/2271 [00:00<00:04, 433.80it/s][A
  8%|▊         | 176/2271 [00:00<00:04, 427.99it/s][A
 10%|▉         | 220/2271 [00:00<00:04, 431.31it/s][A
 12%|█▏        | 264/2271 [00:00<00:04, 431.86it/s][A
 14%|█▎        | 308/2271 [00:00<00:04, 431.58it/s][A
 15%|█▌        | 352/2271 [00:00<00:04, 430.24it/s][A
 17%|█▋        | 396/2271 [00:00<00:04, 431.21it/s][A
 19%|█▉        | 440/2271 [00:01<00:04, 431.25it/s][A
 21%|██▏       | 484/2271 [00:01<00:04, 432.69it/s][A
 23%|██▎       | 528/2271 [00:01<00:04, 432.98it/s][A
 25%|██▌       | 572/2271 [00:01<00:03, 433.68it/s][A
 27%|██▋       | 616/2271 [00:01<00:03, 433.94it/s][A
 29%|██▉       | 660/2271 [00:01<00:03, 435.36it/s][A
 31%|███       | 704/2271 [00:01<00:03, 434.72it/s][A
 33%|███▎      | 748/2271 [00:01<00:03, 430.82it/s][A
 35%|███▍      | 792/2271 [00:01<00:03, 424.50it/s][A
 37%|███▋      | 835/2271 [00:01<00:03, 425.37it/s][A
 39%|███▊  

Ligands with positive degree 0:  490
Ligands with negative degree 0:  70
Targets with positive degree 0:  2060
Targets with negative degree 0:  308



0it [00:00, ?it/s][A
331it [00:00, 3304.61it/s][A
662it [00:00, 3299.28it/s][A
992it [00:00, 3281.54it/s][A
1321it [00:00, 3257.61it/s][A
1647it [00:00, 3113.56it/s][A
1969it [00:00, 3146.83it/s][A
2301it [00:00, 3199.73it/s][A
2634it [00:00, 3239.19it/s][A
2961it [00:00, 3248.29it/s][A
3292it [00:01, 3266.49it/s][A
3710it [00:01, 3205.51it/s][A
 80%|████████  | 4/5 [04:10<01:01, 61.69s/it]
100%|██████████| 3441/3441 [00:00<00:00, 1203218.01it/s]

100%|██████████| 3236/3236 [00:00<00:00, 813471.25it/s]

  0%|          | 0/3441 [00:00<?, ?it/s][A


AUC:  0.7096105310026694
AUP:  0.7170082980222419


  1%|          | 33/3441 [00:00<00:10, 321.67it/s][A
  2%|▏         | 66/3441 [00:00<00:10, 324.47it/s][A
  3%|▎         | 99/3441 [00:00<00:10, 323.86it/s][A
  4%|▍         | 132/3441 [00:00<00:10, 322.53it/s][A
  5%|▍         | 165/3441 [00:00<00:10, 321.95it/s][A
  6%|▌         | 198/3441 [00:00<00:10, 322.62it/s][A
  7%|▋         | 231/3441 [00:00<00:09, 323.64it/s][A
  8%|▊         | 264/3441 [00:00<00:09, 323.25it/s][A
  9%|▊         | 297/3441 [00:00<00:09, 324.37it/s][A
 10%|▉         | 331/3441 [00:01<00:09, 326.22it/s][A
 11%|█         | 364/3441 [00:01<00:09, 326.94it/s][A
 12%|█▏        | 397/3441 [00:01<00:09, 327.66it/s][A
 13%|█▎        | 431/3441 [00:01<00:09, 329.62it/s][A
 14%|█▎        | 465/3441 [00:01<00:08, 331.14it/s][A
 15%|█▍        | 499/3441 [00:01<00:08, 330.48it/s][A
 15%|█▌        | 533/3441 [00:01<00:08, 329.15it/s][A
 16%|█▋        | 566/3441 [00:01<00:08, 327.24it/s][A
 17%|█▋        | 599/3441 [00:01<00:08, 326.49it/s][A
 18%|█▊      

Ligands with positive degree 0:  797
Ligands with negative degree 0:  83
Targets with positive degree 0:  2065
Targets with negative degree 0:  239



0it [00:00, ?it/s][A
252it [00:00, 2515.13it/s][A
519it [00:00, 2605.02it/s][A
783it [00:00, 2620.24it/s][A
1055it [00:00, 2657.60it/s][A
1321it [00:00, 2580.90it/s][A
1580it [00:00, 2501.90it/s][A
1831it [00:00, 2479.59it/s][A
2086it [00:00, 2499.95it/s][A
2347it [00:00, 2530.77it/s][A
2612it [00:01, 2566.11it/s][A
2875it [00:01, 2584.23it/s][A
3138it [00:01, 2595.26it/s][A
3405it [00:01, 2616.34it/s][A
3669it [00:01, 2621.79it/s][A
3933it [00:01, 2624.27it/s][A
4196it [00:01, 2622.64it/s][A
4461it [00:01, 2628.04it/s][A
4724it [00:01, 2610.57it/s][A
4986it [00:01, 2609.64it/s][A
5250it [00:02, 2616.29it/s][A
5512it [00:02, 2604.80it/s][A
5773it [00:02, 2561.16it/s][A
6030it [00:02, 2549.88it/s][A
6289it [00:02, 2560.27it/s][A
6626it [00:02, 2578.25it/s][A

100%|██████████| 3441/3441 [00:00<00:00, 1256647.81it/s]

100%|██████████| 3236/3236 [00:00<00:00, 847662.24it/s]

  0%|          | 0/3441 [00:00<?, ?it/s][A
  1%|          | 35/3441 [00:00<00:09, 344.91

AUC:  0.5
AUP:  0.4646845759130697


  2%|▏         | 70/3441 [00:00<00:09, 338.82it/s][A
  3%|▎         | 104/3441 [00:00<00:09, 337.60it/s][A
  4%|▍         | 138/3441 [00:00<00:09, 337.03it/s][A
  5%|▍         | 172/3441 [00:00<00:09, 334.33it/s][A
  6%|▌         | 206/3441 [00:00<00:09, 334.71it/s][A
  7%|▋         | 240/3441 [00:00<00:09, 334.55it/s][A
  8%|▊         | 275/3441 [00:00<00:09, 336.41it/s][A
  9%|▉         | 309/3441 [00:00<00:09, 333.03it/s][A
 10%|▉         | 343/3441 [00:01<00:09, 333.25it/s][A
 11%|█         | 377/3441 [00:01<00:09, 329.70it/s][A
 12%|█▏        | 410/3441 [00:01<00:09, 325.66it/s][A
 13%|█▎        | 443/3441 [00:01<00:09, 325.66it/s][A
 14%|█▍        | 476/3441 [00:01<00:09, 324.82it/s][A
 15%|█▍        | 510/3441 [00:01<00:08, 326.79it/s][A
 16%|█▌        | 545/3441 [00:01<00:08, 330.91it/s][A
 17%|█▋        | 579/3441 [00:01<00:08, 333.44it/s][A
 18%|█▊        | 613/3441 [00:01<00:08, 334.77it/s][A
 19%|█▉        | 647/3441 [00:01<00:08, 335.45it/s][A
 20%|█▉    

Ligands with positive degree 0:  797
Ligands with negative degree 0:  83
Targets with positive degree 0:  2065
Targets with negative degree 0:  239



0it [00:00, ?it/s][A
267it [00:00, 2660.18it/s][A
534it [00:00, 2640.26it/s][A
799it [00:00, 2615.47it/s][A
1061it [00:00, 2603.51it/s][A
1333it [00:00, 2641.99it/s][A
1602it [00:00, 2654.48it/s][A
1868it [00:00, 2654.75it/s][A
2136it [00:00, 2661.57it/s][A
2404it [00:00, 2666.03it/s][A
2671it [00:01, 2647.45it/s][A
2939it [00:01, 2655.47it/s][A
3209it [00:01, 2666.17it/s][A
3696it [00:01, 2655.78it/s][A
100%|██████████| 5/5 [05:32<00:00, 66.54s/it]

AUC:  0.7441010354978903
AUP:  0.7552506710842388





In [84]:
print('Unseen Nodes')
print('AUC: ', np.mean(auc_nodes), '+-', np.std(auc_nodes))
print('AUP: ', np.mean(aup_nodes), '+-', np.std(aup_nodes))

print('Unseen Edges')
print('AUC: ', np.mean(auc_edges), '+-', np.std(auc_edges))
print('AUP: ', np.mean(aup_edges), '+-', np.std(aup_edges))

Unseen Nodes
AUC:  0.5 +- 0.0
AUP:  0.4694108013484294 +- 0.014157461223848512
Unseen Edges
AUC:  0.7225003718211122 +- 0.015038589509461478
AUP:  0.7365911246206653 +- 0.016380999195983965


### Generated the degree files - Unseen Targets

In [87]:
for fold in tqdm(range(5)):

    train = pd.read_csv('../data/sars-busters-consolidated/GitData/VecNet_Unseen_Targets/train_' + str(fold) + '.csv')
    edges_test = pd.read_csv('../data/sars-busters-consolidated/GitData/VecNet_Unseen_Targets/test_unseen_edges_' + str(fold) + '.csv')
    nodes_test = pd.read_csv('../data/sars-busters-consolidated/GitData/VecNet_Unseen_Targets/test_unseen_nodes_' + str(fold) + '.csv')
    dump_degree_sequences(train,nodes_test,fold=fold,unseen_folder='VecNet_Unseen_Targets')

  0%|          | 0/5 [00:00<?, ?it/s]
  0%|          | 0/6751 [00:00<?, ?it/s][A
  0%|          | 18/6751 [00:00<00:39, 171.13it/s][A
  1%|          | 36/6751 [00:00<00:39, 168.76it/s][A
  1%|          | 53/6751 [00:00<00:39, 167.97it/s][A
  1%|          | 70/6751 [00:00<00:39, 167.26it/s][A
  1%|▏         | 87/6751 [00:00<00:39, 167.20it/s][A
  2%|▏         | 104/6751 [00:00<00:39, 167.61it/s][A
  2%|▏         | 121/6751 [00:00<00:39, 167.17it/s][A
  2%|▏         | 138/6751 [00:00<00:40, 163.35it/s][A
  2%|▏         | 155/6751 [00:00<00:40, 164.36it/s][A
  3%|▎         | 172/6751 [00:01<00:40, 164.10it/s][A
  3%|▎         | 189/6751 [00:01<00:39, 164.43it/s][A
  3%|▎         | 206/6751 [00:01<00:39, 164.94it/s][A
  3%|▎         | 223/6751 [00:01<00:39, 165.55it/s][A
  4%|▎         | 240/6751 [00:01<00:39, 165.78it/s][A
  4%|▍         | 257/6751 [00:01<00:39, 166.28it/s][A
  4%|▍         | 274/6751 [00:01<00:39, 164.40it/s][A
  4%|▍         | 291/6751 [00:01<00:40, 159

### Get Unseen Targets Test Performance

In [88]:
auc_targets = []
aup_targets = []

for fold in tqdm(range(5)):

    train = pd.read_csv('../data/sars-busters-consolidated/GitData/VecNet_Unseen_Targets/train_' + str(fold) + '.csv')
    edges_test = pd.read_csv('../data/sars-busters-consolidated/GitData/VecNet_Unseen_Targets/test_unseen_edges_' + str(fold) + '.csv')
    nodes_test = pd.read_csv('../data/sars-busters-consolidated/GitData/VecNet_Unseen_Targets/test_unseen_nodes_' + str(fold) + '.csv')

    ligand_file_path = '/data/sars-busters-consolidated/GitData/VecNet_Unseen_Targets/Degree_Sequences/ligands_' + str(fold) + '.txt'
    target_file_path = '/data/sars-busters-consolidated/GitData/VecNet_Unseen_Targets/Degree_Sequences/targets_' + str(fold) + '.txt'
    summat10_file_path = '/data/sars-busters-consolidated/GitData/VecNet_Unseen_Targets/Degree_Sequences/summat10_' + str(fold) + '.csv'
    summat01_file_path = '/data/sars-busters-consolidated/GitData/VecNet_Unseen_Targets/Degree_Sequences/summat01_' + str(fold) + '.csv'

    auc, aup = get_configuration_model_performance(train,nodes_test,ligand_file_path,target_file_path,summat10_file_path,summat01_file_path)
    auc_targets.append(auc)
    aup_targets.append(aup)

  0%|          | 0/5 [00:00<?, ?it/s]
100%|██████████| 6751/6751 [00:00<00:00, 1213029.44it/s]

100%|██████████| 3594/3594 [00:00<00:00, 858789.30it/s]

  0%|          | 0/6751 [00:00<?, ?it/s][A
  0%|          | 19/6751 [00:00<00:36, 186.80it/s][A
  1%|          | 38/6751 [00:00<00:36, 185.76it/s][A
  1%|          | 57/6751 [00:00<00:35, 186.91it/s][A
  1%|          | 76/6751 [00:00<00:35, 187.47it/s][A
  1%|▏         | 95/6751 [00:00<00:35, 187.40it/s][A
  2%|▏         | 114/6751 [00:00<00:35, 187.57it/s][A
  2%|▏         | 133/6751 [00:00<00:35, 187.94it/s][A
  2%|▏         | 152/6751 [00:00<00:35, 188.05it/s][A
  3%|▎         | 171/6751 [00:00<00:34, 188.35it/s][A
  3%|▎         | 190/6751 [00:01<00:34, 187.94it/s][A
  3%|▎         | 209/6751 [00:01<00:35, 186.77it/s][A
  3%|▎         | 228/6751 [00:01<00:34, 186.67it/s][A
  4%|▎         | 247/6751 [00:01<00:34, 187.01it/s][A
  4%|▍         | 266/6751 [00:01<00:34, 187.41it/s][A
  4%|▍         | 285/6751 [00:01<00:34

Ligands with positive degree 0:  1487
Ligands with negative degree 0:  219
Targets with positive degree 0:  621
Targets with negative degree 0:  0



0it [00:00, ?it/s][A
199it [00:00, 1988.80it/s][A
398it [00:00, 1961.65it/s][A
596it [00:00, 1967.92it/s][A
795it [00:00, 1973.25it/s][A
995it [00:00, 1979.14it/s][A
1195it [00:00, 1983.94it/s][A
1394it [00:00, 1973.85it/s][A
1592it [00:00, 1970.39it/s][A
1797it [00:00, 1991.74it/s][A
1998it [00:01, 1996.52it/s][A
2198it [00:01, 1978.12it/s][A
2402it [00:01, 1995.51it/s][A
2603it [00:01, 1998.72it/s][A
2803it [00:01, 1992.33it/s][A
3011it [00:01, 2016.03it/s][A
3213it [00:01, 2006.41it/s][A
3414it [00:01, 2005.61it/s][A
3617it [00:01, 2009.87it/s][A
3818it [00:01, 1985.95it/s][A
4024it [00:02, 2005.61it/s][A
4225it [00:02, 1998.69it/s][A
4432it [00:02, 2017.42it/s][A
4634it [00:02, 2009.19it/s][A
4838it [00:02, 2017.93it/s][A
5040it [00:02, 2004.51it/s][A
5241it [00:02, 1996.58it/s][A
5441it [00:02, 1991.05it/s][A
5643it [00:02, 1997.00it/s][A
5845it [00:02, 2002.02it/s][A
6046it [00:03, 1986.40it/s][A
6247it [00:03, 1992.84it/s][A
6491it [00:03, 1995.9

AUC:  0.7561647780736922
AUP:  0.731495556420381




  0%|          | 0/6765 [00:00<?, ?it/s][A
  0%|          | 20/6765 [00:00<00:34, 195.77it/s][A
  1%|          | 40/6765 [00:00<00:34, 192.33it/s][A
  1%|          | 60/6765 [00:00<00:35, 189.69it/s][A
  1%|          | 80/6765 [00:00<00:35, 190.53it/s][A
  1%|▏         | 100/6765 [00:00<00:34, 190.64it/s][A
  2%|▏         | 120/6765 [00:00<00:34, 190.22it/s][A
  2%|▏         | 140/6765 [00:00<00:35, 188.87it/s][A
  2%|▏         | 159/6765 [00:00<00:35, 188.50it/s][A
  3%|▎         | 179/6765 [00:00<00:34, 189.34it/s][A
  3%|▎         | 199/6765 [00:01<00:34, 189.78it/s][A
  3%|▎         | 219/6765 [00:01<00:34, 190.51it/s][A
  4%|▎         | 239/6765 [00:01<00:34, 190.35it/s][A
  4%|▍         | 259/6765 [00:01<00:34, 190.43it/s][A
  4%|▍         | 279/6765 [00:01<00:33, 191.00it/s][A
  4%|▍         | 299/6765 [00:01<00:33, 191.28it/s][A
  5%|▍         | 319/6765 [00:01<00:33, 191.60it/s][A
  5%|▌         | 339/6765 [00:01<00:33, 191.99it/s][A
  5%|▌         | 359/67

Ligands with positive degree 0:  1839
Ligands with negative degree 0:  231
Targets with positive degree 0:  636
Targets with negative degree 0:  0



0it [00:00, ?it/s][A
209it [00:00, 2086.13it/s][A
420it [00:00, 2099.65it/s][A
631it [00:00, 2099.56it/s][A
841it [00:00, 2084.12it/s][A
1050it [00:00, 2076.48it/s][A
1258it [00:00, 2047.19it/s][A
1464it [00:00, 2051.06it/s][A
1676it [00:00, 2071.64it/s][A
1889it [00:00, 2087.69it/s][A
2104it [00:01, 2105.23it/s][A
2315it [00:01, 2099.98it/s][A
2529it [00:01, 2111.17it/s][A
2741it [00:01, 2091.17it/s][A
2953it [00:01, 2097.22it/s][A
3165it [00:01, 2103.36it/s][A
3377it [00:01, 2106.13it/s][A
3592it [00:01, 2116.15it/s][A
3804it [00:01, 2093.68it/s][A
4016it [00:01, 2098.24it/s][A
4226it [00:02, 2091.36it/s][A
4436it [00:02, 2069.49it/s][A
4644it [00:02, 2052.51it/s][A
4852it [00:02, 2059.43it/s][A
5068it [00:02, 2088.79it/s][A
5280it [00:02, 2095.94it/s][A
5490it [00:02, 2095.49it/s][A
5703it [00:02, 2102.62it/s][A
5914it [00:02, 2091.47it/s][A
6124it [00:02, 2078.42it/s][A
6335it [00:03, 2085.70it/s][A
6544it [00:03, 2067.31it/s][A
6935it [00:03, 2083.

AUC:  0.7216991252462189
AUP:  0.683563537226239




  0%|          | 0/6755 [00:00<?, ?it/s][A
  0%|          | 20/6755 [00:00<00:34, 195.71it/s][A
  1%|          | 40/6755 [00:00<00:34, 193.55it/s][A
  1%|          | 60/6755 [00:00<00:34, 192.58it/s][A
  1%|          | 80/6755 [00:00<00:34, 192.34it/s][A
  1%|▏         | 100/6755 [00:00<00:34, 191.58it/s][A
  2%|▏         | 120/6755 [00:00<00:34, 190.75it/s][A
  2%|▏         | 140/6755 [00:00<00:34, 189.45it/s][A
  2%|▏         | 160/6755 [00:00<00:34, 189.77it/s][A
  3%|▎         | 180/6755 [00:00<00:34, 190.38it/s][A
  3%|▎         | 200/6755 [00:01<00:34, 190.32it/s][A
  3%|▎         | 220/6755 [00:01<00:34, 190.88it/s][A
  4%|▎         | 240/6755 [00:01<00:34, 191.11it/s][A
  4%|▍         | 260/6755 [00:01<00:34, 191.01it/s][A
  4%|▍         | 280/6755 [00:01<00:33, 191.06it/s][A
  4%|▍         | 300/6755 [00:01<00:33, 189.96it/s][A
  5%|▍         | 320/6755 [00:01<00:33, 190.70it/s][A
  5%|▌         | 340/6755 [00:01<00:33, 191.29it/s][A
  5%|▌         | 360/67

Ligands with positive degree 0:  1861
Ligands with negative degree 0:  194
Targets with positive degree 0:  599
Targets with negative degree 0:  0



0it [00:00, ?it/s][A
199it [00:00, 1985.22it/s][A
398it [00:00, 1968.74it/s][A
599it [00:00, 1985.55it/s][A
799it [00:00, 1990.87it/s][A
999it [00:00, 1966.04it/s][A
1196it [00:00, 1954.77it/s][A
1392it [00:00, 1942.76it/s][A
1602it [00:00, 1990.44it/s][A
1802it [00:00, 1990.58it/s][A
2002it [00:01, 1988.96it/s][A
2209it [00:01, 2012.79it/s][A
2411it [00:01, 2010.36it/s][A
2613it [00:01, 2001.87it/s][A
2814it [00:01, 1976.84it/s][A
3017it [00:01, 1990.48it/s][A
3219it [00:01, 1997.83it/s][A
3420it [00:01, 1999.35it/s][A
3621it [00:01, 2000.90it/s][A
3827it [00:01, 2016.94it/s][A
4029it [00:02, 2014.61it/s][A
4231it [00:02, 2012.38it/s][A
4433it [00:02, 2004.35it/s][A
4634it [00:02, 1998.86it/s][A
4840it [00:02, 2017.00it/s][A
5042it [00:02, 2015.11it/s][A
5246it [00:02, 2020.08it/s][A
5453it [00:02, 2034.52it/s][A
5657it [00:02, 2035.37it/s][A
5867it [00:02, 2053.57it/s][A
6076it [00:03, 2061.58it/s][A
6329it [00:03, 2005.26it/s][A
 60%|██████    | 3/5 

AUC:  0.7388026451315681
AUP:  0.7019190925176266




  0%|          | 0/6753 [00:00<?, ?it/s][A
  0%|          | 20/6753 [00:00<00:35, 191.69it/s][A
  1%|          | 40/6753 [00:00<00:35, 190.76it/s][A
  1%|          | 60/6753 [00:00<00:35, 190.60it/s][A
  1%|          | 80/6753 [00:00<00:34, 190.93it/s][A
  1%|▏         | 100/6753 [00:00<00:34, 190.40it/s][A
  2%|▏         | 120/6753 [00:00<00:34, 190.63it/s][A
  2%|▏         | 140/6753 [00:00<00:34, 190.67it/s][A
  2%|▏         | 160/6753 [00:00<00:34, 190.41it/s][A
  3%|▎         | 180/6753 [00:00<00:34, 189.94it/s][A
  3%|▎         | 199/6753 [00:01<00:34, 189.84it/s][A
  3%|▎         | 219/6753 [00:01<00:34, 190.48it/s][A
  4%|▎         | 239/6753 [00:01<00:34, 190.95it/s][A
  4%|▍         | 259/6753 [00:01<00:33, 191.16it/s][A
  4%|▍         | 279/6753 [00:01<00:33, 191.13it/s][A
  4%|▍         | 299/6753 [00:01<00:33, 191.02it/s][A
  5%|▍         | 319/6753 [00:01<00:33, 191.01it/s][A
  5%|▌         | 339/6753 [00:01<00:33, 191.04it/s][A
  5%|▌         | 359/67

Ligands with positive degree 0:  1796
Ligands with negative degree 0:  200
Targets with positive degree 0:  613
Targets with negative degree 0:  0


 60%|██████    | 3/5 [05:53<03:55, 117.93s/it]


ValueError: Wrong number of columns at line 357

In [89]:
print('Unseen Targets')
print('AUC: ', np.mean(auc_targets), '+-', np.std(auc_targets))
print('AUP: ', np.mean(aup_targets), '+-', np.std(aup_targets))

Unseen Targets
AUC:  0.7388888494838263 +- 0.014070675880443466
AUP:  0.7056593953880822 +- 0.019746088476437614
