In [1]:
import pandas as pd
from bicm import BipartiteGraph
import numpy as np
from tqdm import tqdm
import csv
import itertools 
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, f1_score, classification_report
from sklearn.metrics import roc_curve, roc_auc_score, precision_recall_curve, average_precision_score
from sklearn.metrics import confusion_matrix, f1_score, classification_report
import math

### Functions

In [2]:
def dump_degree_sequences(train,test,fold=0,unseen_folder='VecNet_Unseen_Nodes'):
    
    ligands = list(set(train['InChiKey'].tolist()))
    targets = list(set(train['target_aa_code'].tolist()))
    
    ligands_degree_dict = dict()

    for inchikey_chem in tqdm(ligands):
        sum_df = train[train['InChiKey'] == inchikey_chem]
        ligands_degree_dict[inchikey_chem] = dict()
        ligands_degree_dict[inchikey_chem]['deg_0'] = len(sum_df[sum_df['Y'] == 0])
        ligands_degree_dict[inchikey_chem]['deg_1'] = len(sum_df[sum_df['Y'] == 1])
        
    targets_degree_dict = dict()

    for aa_target in tqdm(targets):
        sum_df = train[train['target_aa_code'] == aa_target]
        targets_degree_dict[aa_target] = dict()
        targets_degree_dict[aa_target]['deg_0'] = len(sum_df[sum_df['Y'] == 0])
        targets_degree_dict[aa_target]['deg_1'] = len(sum_df[sum_df['Y'] == 1])
        
    degree_train_1_0_ligands = [ligands_degree_dict[key_val]['deg_1'] for key_val in tqdm(ligands_degree_dict.keys())]
    degree_train_0_1_ligands = [ligands_degree_dict[key_val]['deg_0'] for key_val in tqdm(ligands_degree_dict.keys())]

    degree_train_1_0_targets = [targets_degree_dict[key_val]['deg_1'] for key_val in tqdm(targets_degree_dict.keys())]
    degree_train_0_1_targets = [targets_degree_dict[key_val]['deg_0'] for key_val in tqdm(targets_degree_dict.keys())]
    
    with open('../data/sars-busters-consolidated/GitData/' + unseen_folder + '/Degree_Sequences/degreetrain10ligands_' + str(fold) + '.txt', 'w') as file:
        for degree in degree_train_1_0_ligands:
            file.write("%i\n" % degree)
        
    file.close()
    
    with open('../data/sars-busters-consolidated/GitData/' + unseen_folder + '/Degree_Sequences/degreetrain01ligands_' + str(fold) + '.txt', 'w') as file:
        for degree in degree_train_0_1_ligands:
            file.write("%i\n" % degree)
        
    file.close()
    
    with open('../data/sars-busters-consolidated/GitData/' + unseen_folder + '/Degree_Sequences/degreetrain10targets_' + str(fold) + '.txt', 'w') as file:
        for degree in degree_train_1_0_targets:
            file.write("%i\n" % degree)
        
    file.close()
    
    with open('../data/sars-busters-consolidated/GitData/' + unseen_folder + '/Degree_Sequences/degreetrain01targets_' + str(fold) + '.txt', 'w') as file:
        for degree in degree_train_0_1_targets:
            file.write("%i\n" % degree)
        
    file.close()
    
    textfile = open('../data/sars-busters-consolidated/GitData/' + unseen_folder + '/Degree_Sequences/ligands_' + str(fold) + '.txt', "w")
    for element in ligands:
        textfile.write(element + "\n")
    textfile.close()
    
    textfile = open('../data/sars-busters-consolidated/GitData/' + unseen_folder + '/Degree_Sequences/targets_' + str(fold) + '.txt', "w")
    for element in targets:
        textfile.write(element + "\n")
    textfile.close()
    
    return 

In [3]:
def get_configuration_model_performance(train,test,ligand_file_path,target_file_path,summat10_file_path,summat01_file_path):
    
    text_file = open(ligand_file_path, "r") # Rows of the adjacency matrix in order
    ligands = text_file.readlines()

    text_file = open(target_file_path, "r") # Columns of the adjacency matrix in order 
    targets = text_file.readlines()
    
    ligands = [j.replace('\n','') for j in tqdm(ligands)]
    targets = [j.replace('\n','') for j in tqdm(targets)]
    
    number_ligands = len(ligands)
    number_targets = len(targets)
    
    train_pos = train[train['Y'] == 1]
    train_neg = train[train['Y'] == 0]
    
    pos_deg_0_ligands = []
    pos_deg_0_targets = []
    neg_deg_0_ligands = []
    neg_deg_0_targets = []
    
    ligand_degree_ratio = dict()
    ligand_all_average = []

    for ligand in tqdm(ligands):
        pos_deg = len(train_pos[train_pos['InChiKey'] == ligand])
        neg_deg = len(train_neg[train_neg['InChiKey'] == ligand])
        ligand_degree_ratio[ligand] = dict()
        ligand_degree_ratio[ligand]['deg_ratio'] = pos_deg / (pos_deg + neg_deg)
        ligand_degree_ratio[ligand]['deg_avg'] = pos_deg / number_targets 
        ligand_all_average.append(pos_deg / number_targets)
        if pos_deg == 0:
            pos_deg_0_ligands.append(ligand)
        if neg_deg == 0:
            neg_deg_0_ligands.append(ligand)
    
    ligands_all_avg = sum(ligand_all_average) / number_ligands
    
    targets_degree_ratio = dict()
    target_all_average = []

    for target in tqdm(targets):
        pos_deg = len(train_pos[train_pos['target_aa_code'] == target])
        neg_deg = len(train_neg[train_neg['target_aa_code'] == target])
        targets_degree_ratio[target] = dict()
        targets_degree_ratio[target]['deg_ratio'] = pos_deg / (pos_deg + neg_deg)
        targets_degree_ratio[target]['deg_avg'] = pos_deg / number_ligands
        target_all_average.append(pos_deg / number_ligands)
        if pos_deg == 0:
            pos_deg_0_targets.append(target)
        if neg_deg == 0:
            neg_deg_0_targets.append(target)
    
    targets_all_avg = sum(target_all_average) / number_targets
    
    print('Ligands with positive degree 0: ',len(pos_deg_0_ligands))
    print('Ligands with negative degree 0: ',len(neg_deg_0_ligands))
    print('Targets with positive degree 0: ',len(pos_deg_0_targets))
    print('Targets with negative degree 0: ',len(neg_deg_0_targets))
    
    
    pos_annotated_ligands = list(set(ligands)-set(pos_deg_0_ligands))
    pos_annotated_targets = list(set(targets)-set(pos_deg_0_targets))
    
    neg_annotated_ligands = list(set(ligands)-set(neg_deg_0_ligands))
    neg_annotated_targets = list(set(targets)-set(neg_deg_0_targets))
    
    summat10 = np.loadtxt(open(summat10_file_path, "rb"), delimiter=",", skiprows=0) # Output of MATLAB run 
    summat01 = np.loadtxt(open(summat01_file_path, "rb"), delimiter=",", skiprows=0) # Output of MATLAB run 
    
    test_probabilty_predicted_conditioned = []

    ## Average conditional probability
    #conditoned_summat = np.divide(summat10,np.add(summat10,summat01)) # Elementwise pos_deg / (pos_deg + neg_deg)
    #conditoned_summat = conditoned_summat[~np.isnan(conditoned_summat)] 
    #average_conditional_probability = sum(conditoned_summat) / len(conditoned_summat) # Average over valid conditional probabilities
    p10_avg = np.mean(summat10)
    p01_avg = np.mean(summat01)
    average_conditional_probability = p10_avg / (p10_avg + p01_avg)
    
    
    drop_nan = []
    
    for index, row in tqdm(test.iterrows()):      
    
        if row['InChiKey'] in pos_annotated_ligands and row['target_aa_code'] in pos_annotated_targets:
            p10 = summat10[ligands.index(row['InChiKey']),targets.index(row['target_aa_code'])]
            p01 = summat01[ligands.index(row['InChiKey']),targets.index(row['target_aa_code'])]
            p10_conditioned = p10 / (p10 + p01)
        
        elif row['InChiKey'] in pos_annotated_ligands and row['target_aa_code'] not in pos_annotated_targets:
            p10_conditioned = ligand_degree_ratio[row['InChiKey']]['deg_ratio']  ## k_+ / (k_+ + k_-)
        
        elif row['InChiKey'] not in pos_annotated_ligands and row['target_aa_code'] in pos_annotated_targets:
            p10_conditioned = targets_degree_ratio[row['target_aa_code']]['deg_ratio'] ## k_+ / (k_+ + k_-)

        else:
            p10_conditioned = average_conditional_probability
            
        if math.isnan(p10_conditioned): 
            drop_nan.append(index)
        else:
            test_probabilty_predicted_conditioned.append(p10_conditioned)
            
        ## Performance on the test dataset

    print('AUC: ', roc_auc_score(test.drop(drop_nan)['Y'].tolist(), test_probabilty_predicted_conditioned))
    print('AUP: ', average_precision_score(test.drop(drop_nan)['Y'].tolist(), test_probabilty_predicted_conditioned))
        
    return roc_auc_score(test.drop(drop_nan)['Y'].tolist(), test_probabilty_predicted_conditioned), average_precision_score(test.drop(drop_nan)['Y'].tolist(), test_probabilty_predicted_conditioned)

### Generated the degree files - Unseen Nodes and Edges

In [19]:
for fold in tqdm(range(5)):

    train = pd.read_csv('../data/sars-busters-consolidated/GitData/VecNet_Unseen_Nodes/train_' + str(fold) + '.csv')
    edges_test = pd.read_csv('../data/sars-busters-consolidated/GitData/VecNet_Unseen_Nodes/test_unseen_edges_' + str(fold) + '.csv')
    nodes_test = pd.read_csv('../data/sars-busters-consolidated/GitData/VecNet_Unseen_Nodes/test_unseen_nodes_' + str(fold) + '.csv')
    dump_degree_sequences(train,nodes_test,fold=fold,unseen_folder='VecNet_Unseen_Nodes')

  0%|          | 0/5 [00:00<?, ?it/s]
  0%|          | 0/2705 [00:00<?, ?it/s][A
  1%|          | 33/2705 [00:00<00:08, 328.57it/s][A
  2%|▏         | 66/2705 [00:00<00:08, 324.92it/s][A
  4%|▎         | 99/2705 [00:00<00:08, 319.51it/s][A
  5%|▍         | 131/2705 [00:00<00:08, 319.10it/s][A
  6%|▌         | 164/2705 [00:00<00:07, 320.32it/s][A
  7%|▋         | 197/2705 [00:00<00:07, 321.93it/s][A
  9%|▊         | 230/2705 [00:00<00:07, 320.68it/s][A
 10%|▉         | 263/2705 [00:00<00:07, 321.78it/s][A
 11%|█         | 296/2705 [00:00<00:07, 321.18it/s][A
 12%|█▏        | 329/2705 [00:01<00:07, 320.56it/s][A
 13%|█▎        | 362/2705 [00:01<00:07, 321.41it/s][A
 15%|█▍        | 395/2705 [00:01<00:07, 320.90it/s][A
 16%|█▌        | 428/2705 [00:01<00:07, 319.24it/s][A
 17%|█▋        | 462/2705 [00:01<00:06, 322.53it/s][A
 18%|█▊        | 495/2705 [00:01<00:06, 321.25it/s][A
 20%|█▉        | 528/2705 [00:01<00:06, 318.47it/s][A
 21%|██        | 560/2705 [00:01<00:06, 3

######## Now run the MATLAB code in Configuration Model - 5 fold folder to generate the matrices. #########

### Get Unseen Nodes and Edges Test Performance

In [4]:
auc_nodes = []
aup_nodes = []
auc_edges =[]
aup_edges = []


for fold in tqdm(range(5)):

    train = pd.read_csv('../data/sars-busters-consolidated/GitData/VecNet_Unseen_Nodes/train_' + str(fold) + '.csv')
    edges_test = pd.read_csv('../data/sars-busters-consolidated/GitData/VecNet_Unseen_Nodes/test_unseen_edges_' + str(fold) + '.csv')
    nodes_test = pd.read_csv('../data/sars-busters-consolidated/GitData/VecNet_Unseen_Nodes/test_unseen_nodes_' + str(fold) + '.csv')

    ligand_file_path = '/data/sars-busters-consolidated/GitData/VecNet_Unseen_Nodes/Degree_Sequences/ligands_' + str(fold) + '.txt'
    target_file_path = '/data/sars-busters-consolidated/GitData/VecNet_Unseen_Nodes/Degree_Sequences/targets_' + str(fold) + '.txt'
    summat10_file_path = '/data/sars-busters-consolidated/GitData/VecNet_Unseen_Nodes/Degree_Sequences/summat10_' + str(fold) + '.csv'
    summat01_file_path = '/data/sars-busters-consolidated/GitData/VecNet_Unseen_Nodes/Degree_Sequences/summat01_' + str(fold) + '.csv'

    auc, aup = get_configuration_model_performance(train,nodes_test,ligand_file_path,target_file_path,summat10_file_path,summat01_file_path)
    auc_nodes.append(auc)
    aup_nodes.append(aup)
    
    auc, aup = get_configuration_model_performance(train,edges_test,ligand_file_path,target_file_path,summat10_file_path,summat01_file_path)
    auc_edges.append(auc)
    aup_edges.append(aup)

  0%|          | 0/5 [00:00<?, ?it/s]
100%|██████████| 2705/2705 [00:00<00:00, 1015719.99it/s]

100%|██████████| 3049/3049 [00:00<00:00, 724516.06it/s]

  0%|          | 0/2705 [00:00<?, ?it/s][A
  1%|▏         | 36/2705 [00:00<00:07, 359.93it/s][A
  3%|▎         | 72/2705 [00:00<00:07, 353.73it/s][A
  4%|▍         | 108/2705 [00:00<00:07, 348.40it/s][A
  5%|▌         | 143/2705 [00:00<00:07, 348.88it/s][A
  7%|▋         | 179/2705 [00:00<00:07, 349.62it/s][A
  8%|▊         | 215/2705 [00:00<00:07, 350.16it/s][A
  9%|▉         | 251/2705 [00:00<00:06, 351.52it/s][A
 11%|█         | 287/2705 [00:00<00:06, 351.98it/s][A
 12%|█▏        | 323/2705 [00:00<00:06, 348.41it/s][A
 13%|█▎        | 358/2705 [00:01<00:06, 348.41it/s][A
 15%|█▍        | 393/2705 [00:01<00:06, 345.60it/s][A
 16%|█▌        | 428/2705 [00:01<00:06, 339.71it/s][A
 17%|█▋        | 462/2705 [00:01<00:06, 338.47it/s][A
 18%|█▊        | 497/2705 [00:01<00:06, 340.46it/s][A
 20%|█▉        | 534/2705 [00:01<00

Ligands with positive degree 0:  446
Ligands with negative degree 0:  69
Targets with positive degree 0:  2069
Targets with negative degree 0:  334



0it [00:00, ?it/s][A
286it [00:00, 2851.66it/s][A
574it [00:00, 2863.18it/s][A
862it [00:00, 2868.63it/s][A
1151it [00:00, 2874.87it/s][A
1439it [00:00, 2859.62it/s][A
1725it [00:00, 2850.90it/s][A
2012it [00:00, 2854.59it/s][A
2298it [00:00, 2852.78it/s][A
2585it [00:00, 2855.48it/s][A
2871it [00:01, 2854.16it/s][A
3157it [00:01, 2851.85it/s][A
3443it [00:01, 2853.74it/s][A
3729it [00:01, 2831.36it/s][A
4013it [00:01, 2827.45it/s][A
4300it [00:01, 2838.95it/s][A
4585it [00:01, 2841.07it/s][A
4875it [00:01, 2857.58it/s][A
5161it [00:01, 2850.12it/s][A
5447it [00:01, 2841.88it/s][A
5732it [00:02, 2818.98it/s][A
6017it [00:02, 2827.18it/s][A
6491it [00:02, 2841.40it/s][A

100%|██████████| 2705/2705 [00:00<00:00, 1207491.73it/s]

100%|██████████| 3049/3049 [00:00<00:00, 907560.35it/s]

  0%|          | 0/2705 [00:00<?, ?it/s][A
  1%|▏         | 39/2705 [00:00<00:06, 380.95it/s][A

AUC:  0.5
AUP:  0.482976428901556



  3%|▎         | 78/2705 [00:00<00:06, 376.22it/s][A
  4%|▍         | 116/2705 [00:00<00:06, 369.99it/s][A
  6%|▌         | 154/2705 [00:00<00:06, 370.05it/s][A
  7%|▋         | 192/2705 [00:00<00:06, 369.73it/s][A
  8%|▊         | 229/2705 [00:00<00:06, 364.38it/s][A
 10%|▉         | 266/2705 [00:00<00:06, 365.95it/s][A
 11%|█         | 304/2705 [00:00<00:06, 368.06it/s][A
 13%|█▎        | 341/2705 [00:00<00:06, 368.34it/s][A
 14%|█▍        | 378/2705 [00:01<00:06, 368.07it/s][A
 15%|█▌        | 416/2705 [00:01<00:06, 369.56it/s][A
 17%|█▋        | 454/2705 [00:01<00:06, 370.39it/s][A
 18%|█▊        | 492/2705 [00:01<00:05, 370.89it/s][A
 20%|█▉        | 530/2705 [00:01<00:05, 370.53it/s][A
 21%|██        | 568/2705 [00:01<00:05, 370.47it/s][A
 22%|██▏       | 606/2705 [00:01<00:05, 369.37it/s][A
 24%|██▍       | 643/2705 [00:01<00:05, 367.66it/s][A
 25%|██▌       | 680/2705 [00:01<00:05, 364.60it/s][A
 27%|██▋       | 717/2705 [00:01<00:05, 359.95it/s][A
 28%|██▊  

Ligands with positive degree 0:  446
Ligands with negative degree 0:  69
Targets with positive degree 0:  2069
Targets with negative degree 0:  334



0it [00:00, ?it/s][A
287it [00:00, 2861.26it/s][A
574it [00:00, 2852.18it/s][A
863it [00:00, 2865.89it/s][A
1151it [00:00, 2869.52it/s][A
1440it [00:00, 2874.37it/s][A
1728it [00:00, 2857.32it/s][A
2015it [00:00, 2859.77it/s][A

2594it [00:00, 2639.92it/s][A
2887it [00:01, 2723.33it/s][A
3177it [00:01, 2774.50it/s][A
3693it [00:01, 2812.13it/s][A
 20%|██        | 1/5 [00:59<03:56, 59.02s/it]
100%|██████████| 2525/2525 [00:00<00:00, 1205808.68it/s]

100%|██████████| 2994/2994 [00:00<00:00, 847351.29it/s]

  0%|          | 0/2525 [00:00<?, ?it/s][A


AUC:  0.7204551598098383
AUP:  0.7231112618821931


  2%|▏         | 39/2525 [00:00<00:06, 387.02it/s][A
  3%|▎         | 78/2525 [00:00<00:06, 383.22it/s][A
  5%|▍         | 117/2525 [00:00<00:06, 385.28it/s][A
  6%|▌         | 156/2525 [00:00<00:06, 384.66it/s][A
  8%|▊         | 195/2525 [00:00<00:06, 381.26it/s][A
  9%|▉         | 234/2525 [00:00<00:06, 379.79it/s][A
 11%|█         | 272/2525 [00:00<00:05, 379.21it/s][A
 12%|█▏        | 311/2525 [00:00<00:05, 380.56it/s][A
 14%|█▍        | 350/2525 [00:00<00:05, 381.26it/s][A
 15%|█▌        | 389/2525 [00:01<00:05, 382.44it/s][A
 17%|█▋        | 428/2525 [00:01<00:05, 382.28it/s][A
 18%|█▊        | 467/2525 [00:01<00:05, 382.39it/s][A
 20%|██        | 506/2525 [00:01<00:05, 382.04it/s][A
 22%|██▏       | 545/2525 [00:01<00:05, 381.97it/s][A
 23%|██▎       | 584/2525 [00:01<00:05, 381.90it/s][A
 25%|██▍       | 623/2525 [00:01<00:04, 382.09it/s][A
 26%|██▌       | 662/2525 [00:01<00:04, 379.34it/s][A
 28%|██▊       | 700/2525 [00:01<00:04, 379.28it/s][A
 29%|██▉    

Ligands with positive degree 0:  492
Ligands with negative degree 0:  59
Targets with positive degree 0:  1988
Targets with negative degree 0:  326



0it [00:00, ?it/s][A
293it [00:00, 2928.68it/s][A
586it [00:00, 2909.50it/s][A
884it [00:00, 2938.72it/s][A
1184it [00:00, 2959.97it/s][A
1481it [00:00, 2948.33it/s][A
1776it [00:00, 2878.24it/s][A
2071it [00:00, 2899.91it/s][A
2370it [00:00, 2926.70it/s][A
2664it [00:00, 2928.04it/s][A
2960it [00:01, 2935.28it/s][A
3257it [00:01, 2945.73it/s][A
3560it [00:01, 2970.30it/s][A
3862it [00:01, 2984.81it/s][A
4163it [00:01, 2991.75it/s][A
4464it [00:01, 2996.37it/s][A
4764it [00:01, 2993.31it/s][A
5064it [00:01, 2990.41it/s][A
5364it [00:01, 2953.90it/s][A
5660it [00:01, 2910.68it/s][A
5959it [00:02, 2931.19it/s][A
6258it [00:02, 2947.22it/s][A
6558it [00:02, 2961.04it/s][A
6935it [00:02, 2950.81it/s][A

100%|██████████| 2525/2525 [00:00<00:00, 1174126.12it/s]

100%|██████████| 2994/2994 [00:00<00:00, 879763.64it/s]

  0%|          | 0/2525 [00:00<?, ?it/s][A
  2%|▏         | 38/2525 [00:00<00:06, 375.17it/s][A


AUC:  0.5
AUP:  0.487238644556597


  3%|▎         | 76/2525 [00:00<00:06, 377.12it/s][A
  5%|▍         | 114/2525 [00:00<00:06, 374.34it/s][A
  6%|▌         | 152/2525 [00:00<00:06, 375.52it/s][A
  8%|▊         | 190/2525 [00:00<00:06, 373.01it/s][A
  9%|▉         | 228/2525 [00:00<00:06, 368.47it/s][A
 11%|█         | 266/2525 [00:00<00:06, 369.93it/s][A
 12%|█▏        | 304/2525 [00:00<00:06, 365.95it/s][A
 14%|█▎        | 342/2525 [00:00<00:05, 368.47it/s][A
 15%|█▌        | 380/2525 [00:01<00:05, 370.78it/s][A
 17%|█▋        | 418/2525 [00:01<00:05, 372.88it/s][A
 18%|█▊        | 456/2525 [00:01<00:05, 374.76it/s][A
 20%|█▉        | 494/2525 [00:01<00:05, 375.19it/s][A
 21%|██        | 532/2525 [00:01<00:05, 376.48it/s][A
 23%|██▎       | 570/2525 [00:01<00:05, 375.55it/s][A
 24%|██▍       | 608/2525 [00:01<00:05, 375.68it/s][A
 26%|██▌       | 646/2525 [00:01<00:04, 376.41it/s][A
 27%|██▋       | 684/2525 [00:01<00:04, 375.25it/s][A
 29%|██▊       | 722/2525 [00:01<00:04, 376.06it/s][A
 30%|███   

Ligands with positive degree 0:  492
Ligands with negative degree 0:  59
Targets with positive degree 0:  1988
Targets with negative degree 0:  326



0it [00:00, ?it/s][A
305it [00:00, 3041.14it/s][A
610it [00:00, 3005.59it/s][A
913it [00:00, 3013.75it/s][A
1218it [00:00, 3027.94it/s][A
1525it [00:00, 3040.71it/s][A
1830it [00:00, 3043.69it/s][A
2135it [00:00, 3036.94it/s][A
2441it [00:00, 3041.61it/s][A
2748it [00:00, 3047.98it/s][A
3053it [00:01, 3046.39it/s][A
3603it [00:01, 3044.32it/s][A
 40%|████      | 2/5 [01:53<02:49, 56.44s/it]
100%|██████████| 3394/3394 [00:00<00:00, 1191751.17it/s]

100%|██████████| 3184/3184 [00:00<00:00, 823497.81it/s]

  0%|          | 0/3394 [00:00<?, ?it/s][A


AUC:  0.74058061892011
AUP:  0.7377056734719398


  1%|          | 33/3394 [00:00<00:10, 321.07it/s][A
  2%|▏         | 66/3394 [00:00<00:10, 313.25it/s][A
  3%|▎         | 98/3394 [00:00<00:10, 311.31it/s][A
  4%|▍         | 131/3394 [00:00<00:10, 316.80it/s][A
  5%|▍         | 164/3394 [00:00<00:10, 320.98it/s][A
  6%|▌         | 197/3394 [00:00<00:09, 322.65it/s][A
  7%|▋         | 230/3394 [00:00<00:09, 323.73it/s][A
  8%|▊         | 263/3394 [00:00<00:09, 325.06it/s][A
  9%|▊         | 296/3394 [00:00<00:09, 325.06it/s][A
 10%|▉         | 329/3394 [00:01<00:09, 324.84it/s][A
 11%|█         | 362/3394 [00:01<00:09, 326.29it/s][A
 12%|█▏        | 395/3394 [00:01<00:09, 326.69it/s][A
 13%|█▎        | 428/3394 [00:01<00:09, 315.62it/s][A
 14%|█▎        | 460/3394 [00:01<00:09, 312.10it/s][A
 15%|█▍        | 493/3394 [00:01<00:09, 316.84it/s][A
 15%|█▌        | 525/3394 [00:01<00:09, 316.78it/s][A
 16%|█▋        | 558/3394 [00:01<00:08, 319.78it/s][A
 17%|█▋        | 591/3394 [00:01<00:08, 321.43it/s][A
 18%|█▊      

Ligands with positive degree 0:  887
Ligands with negative degree 0:  80
Targets with positive degree 0:  2052
Targets with negative degree 0:  273



0it [00:00, ?it/s][A
269it [00:00, 2686.43it/s][A
538it [00:00, 2680.78it/s][A
807it [00:00, 2673.68it/s][A
1075it [00:00, 2664.04it/s][A
1342it [00:00, 2665.19it/s][A
1612it [00:00, 2676.75it/s][A
1883it [00:00, 2684.57it/s][A
2152it [00:00, 2682.53it/s][A
2422it [00:00, 2685.52it/s][A
2692it [00:01, 2687.99it/s][A
2961it [00:01, 2683.66it/s][A
3230it [00:01, 2683.48it/s][A
3499it [00:01, 2679.47it/s][A
3770it [00:01, 2687.64it/s][A
4039it [00:01, 2669.95it/s][A
4308it [00:01, 2674.89it/s][A
4579it [00:01, 2684.59it/s][A
4848it [00:01, 2681.47it/s][A
5118it [00:01, 2685.71it/s][A
5387it [00:02, 2681.49it/s][A
5656it [00:02, 2645.27it/s][A
5921it [00:02, 2569.47it/s][A
6329it [00:02, 2655.43it/s][A

100%|██████████| 3394/3394 [00:00<00:00, 1236039.57it/s]

100%|██████████| 3184/3184 [00:00<00:00, 855793.91it/s]

  0%|          | 0/3394 [00:00<?, ?it/s][A
  1%|          | 34/3394 [00:00<00:10, 332.01it/s][A


AUC:  0.5
AUP:  0.4482540685732343


  2%|▏         | 68/3394 [00:00<00:10, 303.32it/s][A
  3%|▎         | 99/3394 [00:00<00:11, 298.32it/s][A
  4%|▍         | 131/3394 [00:00<00:10, 305.21it/s][A
  5%|▍         | 164/3394 [00:00<00:10, 312.27it/s][A
  6%|▌         | 197/3394 [00:00<00:10, 318.01it/s][A
  7%|▋         | 230/3394 [00:00<00:09, 320.53it/s][A
  8%|▊         | 263/3394 [00:00<00:09, 323.39it/s][A
  9%|▊         | 296/3394 [00:00<00:09, 322.62it/s][A
 10%|▉         | 329/3394 [00:01<00:09, 324.25it/s][A
 11%|█         | 362/3394 [00:01<00:09, 325.31it/s][A
 12%|█▏        | 395/3394 [00:01<00:09, 326.63it/s][A
 13%|█▎        | 428/3394 [00:01<00:09, 327.45it/s][A
 14%|█▎        | 461/3394 [00:01<00:08, 328.17it/s][A
 15%|█▍        | 494/3394 [00:01<00:08, 327.50it/s][A
 16%|█▌        | 527/3394 [00:01<00:08, 327.93it/s][A
 16%|█▋        | 560/3394 [00:01<00:08, 327.04it/s][A
 17%|█▋        | 593/3394 [00:01<00:08, 327.62it/s][A
 18%|█▊        | 626/3394 [00:01<00:08, 325.97it/s][A
 19%|█▉     

Ligands with positive degree 0:  887
Ligands with negative degree 0:  80
Targets with positive degree 0:  2052
Targets with negative degree 0:  273



0it [00:00, ?it/s][A
271it [00:00, 2702.59it/s][A
542it [00:00, 2688.44it/s][A
813it [00:00, 2696.11it/s][A
1093it [00:00, 2733.15it/s][A
1368it [00:00, 2739.18it/s][A
1642it [00:00, 2721.74it/s][A
1915it [00:00, 2714.92it/s][A
2189it [00:00, 2722.65it/s][A
2462it [00:00, 2723.69it/s][A
2738it [00:01, 2732.77it/s][A
3012it [00:01, 2720.08it/s][A
3285it [00:01, 2707.42it/s][A
3785it [00:01, 2704.31it/s][A
 60%|██████    | 3/5 [03:08<02:10, 65.01s/it]
100%|██████████| 2271/2271 [00:00<00:00, 1078738.89it/s]

100%|██████████| 2965/2965 [00:00<00:00, 847204.26it/s]

  0%|          | 0/2271 [00:00<?, ?it/s][A


AUC:  0.7474432797054515
AUP:  0.7571712363369368


  2%|▏         | 43/2271 [00:00<00:05, 420.06it/s][A
  4%|▍         | 86/2271 [00:00<00:05, 414.28it/s][A
  6%|▌         | 128/2271 [00:00<00:05, 410.05it/s][A
  7%|▋         | 170/2271 [00:00<00:05, 412.39it/s][A
  9%|▉         | 212/2271 [00:00<00:04, 412.64it/s][A
 11%|█         | 254/2271 [00:00<00:04, 413.04it/s][A
 13%|█▎        | 296/2271 [00:00<00:04, 413.00it/s][A
 15%|█▍        | 338/2271 [00:00<00:04, 412.65it/s][A
 17%|█▋        | 380/2271 [00:00<00:04, 413.62it/s][A
 19%|█▊        | 422/2271 [00:01<00:04, 415.49it/s][A
 20%|██        | 464/2271 [00:01<00:04, 416.57it/s][A
 22%|██▏       | 506/2271 [00:01<00:04, 417.00it/s][A
 24%|██▍       | 548/2271 [00:01<00:04, 417.25it/s][A
 26%|██▌       | 590/2271 [00:01<00:04, 416.06it/s][A
 28%|██▊       | 632/2271 [00:01<00:03, 412.46it/s][A
 30%|██▉       | 674/2271 [00:01<00:03, 414.03it/s][A
 32%|███▏      | 716/2271 [00:01<00:03, 410.42it/s][A
 33%|███▎      | 758/2271 [00:01<00:03, 410.65it/s][A
 35%|███▌   

Ligands with positive degree 0:  490
Ligands with negative degree 0:  70
Targets with positive degree 0:  2060
Targets with negative degree 0:  308



0it [00:00, ?it/s][A
327it [00:00, 3265.30it/s][A
654it [00:00, 3255.96it/s][A
980it [00:00, 3255.64it/s][A
1311it [00:00, 3274.57it/s][A
1640it [00:00, 3278.03it/s][A
1971it [00:00, 3285.75it/s][A
2302it [00:00, 3290.79it/s][A
2632it [00:00, 3285.86it/s][A
2962it [00:00, 3288.79it/s][A
3291it [00:01, 3287.51it/s][A
3621it [00:01, 3291.28it/s][A
3951it [00:01, 3289.24it/s][A
4280it [00:01, 3276.81it/s][A
4608it [00:01, 3231.32it/s][A
4932it [00:01, 3151.88it/s][A
5248it [00:01, 3128.66it/s][A
5573it [00:01, 3161.66it/s][A
5897it [00:01, 3183.29it/s][A
6224it [00:01, 3206.59it/s][A
6579it [00:02, 3237.92it/s][A

100%|██████████| 2271/2271 [00:00<00:00, 1089847.18it/s]

100%|██████████| 2965/2965 [00:00<00:00, 860868.85it/s]

  0%|          | 0/2271 [00:00<?, ?it/s][A
  2%|▏         | 43/2271 [00:00<00:05, 422.34it/s][A


AUC:  0.5
AUP:  0.46390028879768963


  4%|▍         | 86/2271 [00:00<00:05, 418.06it/s][A
  6%|▌         | 128/2271 [00:00<00:05, 416.18it/s][A
  7%|▋         | 170/2271 [00:00<00:05, 415.88it/s][A
  9%|▉         | 212/2271 [00:00<00:04, 416.27it/s][A
 11%|█         | 254/2271 [00:00<00:04, 414.95it/s][A
 13%|█▎        | 296/2271 [00:00<00:04, 413.58it/s][A
 15%|█▍        | 338/2271 [00:00<00:04, 414.77it/s][A
 17%|█▋        | 380/2271 [00:00<00:04, 415.27it/s][A
 19%|█▊        | 422/2271 [00:01<00:04, 414.96it/s][A
 20%|██        | 464/2271 [00:01<00:04, 414.95it/s][A
 22%|██▏       | 506/2271 [00:01<00:04, 409.59it/s][A
 24%|██▍       | 548/2271 [00:01<00:04, 411.65it/s][A
 26%|██▌       | 591/2271 [00:01<00:04, 414.43it/s][A
 28%|██▊       | 633/2271 [00:01<00:03, 413.76it/s][A
 30%|██▉       | 675/2271 [00:01<00:03, 414.36it/s][A
 32%|███▏      | 717/2271 [00:01<00:03, 411.89it/s][A
 33%|███▎      | 759/2271 [00:01<00:03, 413.63it/s][A
 35%|███▌      | 801/2271 [00:01<00:03, 413.00it/s][A
 37%|███▋  

Ligands with positive degree 0:  490
Ligands with negative degree 0:  70
Targets with positive degree 0:  2060
Targets with negative degree 0:  308



0it [00:00, ?it/s][A
315it [00:00, 3145.01it/s][A
630it [00:00, 3059.76it/s][A
952it [00:00, 3130.95it/s][A
1277it [00:00, 3176.08it/s][A
1602it [00:00, 3199.30it/s][A
1927it [00:00, 3213.26it/s][A
2256it [00:00, 3236.24it/s][A
2580it [00:00, 3209.53it/s][A
2902it [00:00, 3094.03it/s][A
3225it [00:01, 3133.74it/s][A
3710it [00:01, 3165.97it/s][A
 80%|████████  | 4/5 [03:56<00:58, 58.25s/it]
100%|██████████| 3441/3441 [00:00<00:00, 1228201.86it/s]

100%|██████████| 3236/3236 [00:00<00:00, 811380.19it/s]

  0%|          | 0/3441 [00:00<?, ?it/s][A


AUC:  0.7236075362945602
AUP:  0.7172598259267702


  1%|          | 33/3441 [00:00<00:10, 324.30it/s][A
  2%|▏         | 66/3441 [00:00<00:10, 317.95it/s][A
  3%|▎         | 98/3441 [00:00<00:10, 318.02it/s][A
  4%|▍         | 130/3441 [00:00<00:10, 316.75it/s][A
  5%|▍         | 162/3441 [00:00<00:10, 317.29it/s][A
  6%|▌         | 194/3441 [00:00<00:10, 315.65it/s][A
  7%|▋         | 226/3441 [00:00<00:10, 308.35it/s][A
  7%|▋         | 258/3441 [00:00<00:10, 310.16it/s][A
  8%|▊         | 291/3441 [00:00<00:10, 313.49it/s][A
  9%|▉         | 323/3441 [00:01<00:09, 315.16it/s][A
 10%|█         | 356/3441 [00:01<00:09, 317.33it/s][A
 11%|█▏        | 389/3441 [00:01<00:09, 318.31it/s][A
 12%|█▏        | 422/3441 [00:01<00:09, 319.14it/s][A
 13%|█▎        | 455/3441 [00:01<00:09, 319.53it/s][A
 14%|█▍        | 487/3441 [00:01<00:09, 319.11it/s][A
 15%|█▌        | 519/3441 [00:01<00:09, 318.09it/s][A
 16%|█▌        | 552/3441 [00:01<00:09, 319.26it/s][A
 17%|█▋        | 584/3441 [00:01<00:08, 319.01it/s][A
 18%|█▊      

Ligands with positive degree 0:  797
Ligands with negative degree 0:  83
Targets with positive degree 0:  2065
Targets with negative degree 0:  239



0it [00:00, ?it/s][A
264it [00:00, 2631.53it/s][A
528it [00:00, 2632.54it/s][A
792it [00:00, 2634.91it/s][A
1056it [00:00, 2635.18it/s][A
1320it [00:00, 2634.85it/s][A
1584it [00:00, 2635.40it/s][A
1848it [00:00, 2632.07it/s][A
2112it [00:00, 2632.72it/s][A
2376it [00:00, 2626.55it/s][A
2639it [00:01, 2614.04it/s][A
2901it [00:01, 2614.88it/s][A
3163it [00:01, 2612.21it/s][A
3425it [00:01, 2610.86it/s][A
3689it [00:01, 2617.11it/s][A
3951it [00:01, 2612.15it/s][A
4213it [00:01, 2609.99it/s][A
4475it [00:01, 2607.61it/s][A
4737it [00:01, 2609.01it/s][A
4998it [00:01, 2591.61it/s][A
5263it [00:02, 2606.50it/s][A
5525it [00:02, 2609.32it/s][A
5787it [00:02, 2609.59it/s][A
6049it [00:02, 2610.14it/s][A
6312it [00:02, 2615.27it/s][A
6626it [00:02, 2615.38it/s][A

100%|██████████| 3441/3441 [00:00<00:00, 1258949.76it/s]

100%|██████████| 3236/3236 [00:00<00:00, 863517.48it/s]

  0%|          | 0/3441 [00:00<?, ?it/s][A
  1%|          | 33/3441 [00:00<00:10, 322.44

AUC:  0.5
AUP:  0.4646845759130697


  2%|▏         | 66/3441 [00:00<00:10, 318.25it/s][A
  3%|▎         | 98/3441 [00:00<00:10, 316.22it/s][A
  4%|▍         | 131/3441 [00:00<00:10, 317.86it/s][A
  5%|▍         | 163/3441 [00:00<00:10, 318.25it/s][A
  6%|▌         | 195/3441 [00:00<00:10, 318.30it/s][A
  7%|▋         | 228/3441 [00:00<00:10, 319.48it/s][A
  8%|▊         | 261/3441 [00:00<00:09, 320.13it/s][A
  9%|▊         | 294/3441 [00:00<00:09, 318.58it/s][A
  9%|▉         | 326/3441 [00:01<00:09, 317.49it/s][A
 10%|█         | 359/3441 [00:01<00:09, 318.72it/s][A
 11%|█▏        | 391/3441 [00:01<00:09, 319.06it/s][A
 12%|█▏        | 423/3441 [00:01<00:09, 319.30it/s][A
 13%|█▎        | 456/3441 [00:01<00:09, 320.21it/s][A
 14%|█▍        | 489/3441 [00:01<00:09, 321.21it/s][A
 15%|█▌        | 522/3441 [00:01<00:09, 320.62it/s][A
 16%|█▌        | 555/3441 [00:01<00:09, 320.31it/s][A
 17%|█▋        | 588/3441 [00:01<00:08, 319.75it/s][A
 18%|█▊        | 620/3441 [00:01<00:08, 319.42it/s][A
 19%|█▉     

Ligands with positive degree 0:  797
Ligands with negative degree 0:  83
Targets with positive degree 0:  2065
Targets with negative degree 0:  239



0it [00:00, ?it/s][A
242it [00:00, 2413.55it/s][A
505it [00:00, 2540.16it/s][A
768it [00:00, 2579.77it/s][A
1028it [00:00, 2585.22it/s][A
1290it [00:00, 2595.14it/s][A
1550it [00:00, 2590.80it/s][A
1810it [00:00, 2535.61it/s][A
2077it [00:00, 2576.21it/s][A
2343it [00:00, 2601.28it/s][A
2608it [00:01, 2615.86it/s][A
2875it [00:01, 2630.40it/s][A
3139it [00:01, 2569.18it/s][A
3404it [00:01, 2592.03it/s][A
3696it [00:01, 2585.17it/s][A
100%|██████████| 5/5 [05:14<00:00, 62.91s/it]

AUC:  0.7602582697659274
AUP:  0.7580169573705





In [5]:
print('Unseen Nodes')
print('AUC: ', np.mean(auc_nodes), '+-', np.std(auc_nodes))
print('AUP: ', np.mean(aup_nodes), '+-', np.std(aup_nodes))

print('Unseen Edges')
print('AUC: ', np.mean(auc_edges), '+-', np.std(auc_edges))
print('AUP: ', np.mean(aup_edges), '+-', np.std(aup_edges))

Unseen Nodes
AUC:  0.5 +- 0.0
AUP:  0.4694108013484294 +- 0.014157461223848512
Unseen Edges
AUC:  0.7384689728991776 +- 0.014866946219643889
AUP:  0.738652990997668 +- 0.016840420839620104


### Generated the degree files - Unseen Targets

In [87]:
for fold in tqdm(range(5)):

    train = pd.read_csv('../data/sars-busters-consolidated/GitData/VecNet_Unseen_Targets/train_' + str(fold) + '.csv')
    edges_test = pd.read_csv('../data/sars-busters-consolidated/GitData/VecNet_Unseen_Targets/test_unseen_edges_' + str(fold) + '.csv')
    nodes_test = pd.read_csv('../data/sars-busters-consolidated/GitData/VecNet_Unseen_Targets/test_unseen_nodes_' + str(fold) + '.csv')
    dump_degree_sequences(train,nodes_test,fold=fold,unseen_folder='VecNet_Unseen_Targets')

  0%|          | 0/5 [00:00<?, ?it/s]
  0%|          | 0/6751 [00:00<?, ?it/s][A
  0%|          | 18/6751 [00:00<00:39, 171.13it/s][A
  1%|          | 36/6751 [00:00<00:39, 168.76it/s][A
  1%|          | 53/6751 [00:00<00:39, 167.97it/s][A
  1%|          | 70/6751 [00:00<00:39, 167.26it/s][A
  1%|▏         | 87/6751 [00:00<00:39, 167.20it/s][A
  2%|▏         | 104/6751 [00:00<00:39, 167.61it/s][A
  2%|▏         | 121/6751 [00:00<00:39, 167.17it/s][A
  2%|▏         | 138/6751 [00:00<00:40, 163.35it/s][A
  2%|▏         | 155/6751 [00:00<00:40, 164.36it/s][A
  3%|▎         | 172/6751 [00:01<00:40, 164.10it/s][A
  3%|▎         | 189/6751 [00:01<00:39, 164.43it/s][A
  3%|▎         | 206/6751 [00:01<00:39, 164.94it/s][A
  3%|▎         | 223/6751 [00:01<00:39, 165.55it/s][A
  4%|▎         | 240/6751 [00:01<00:39, 165.78it/s][A
  4%|▍         | 257/6751 [00:01<00:39, 166.28it/s][A
  4%|▍         | 274/6751 [00:01<00:39, 164.40it/s][A
  4%|▍         | 291/6751 [00:01<00:40, 159

### Get Unseen Targets Test Performance

In [12]:
auc_targets = []
aup_targets = []

for fold in tqdm(range(5)):

    train = pd.read_csv('../data/sars-busters-consolidated/GitData/VecNet_Unseen_Targets/train_' + str(fold) + '.csv')
    edges_test = pd.read_csv('../data/sars-busters-consolidated/GitData/VecNet_Unseen_Targets/test_unseen_edges_' + str(fold) + '.csv')
    nodes_test = pd.read_csv('../data/sars-busters-consolidated/GitData/VecNet_Unseen_Targets/test_unseen_nodes_' + str(fold) + '.csv')

    ligand_file_path = '/data/sars-busters-consolidated/GitData/VecNet_Unseen_Targets/Degree_Sequences/ligands_' + str(fold) + '.txt'
    target_file_path = '/data/sars-busters-consolidated/GitData/VecNet_Unseen_Targets/Degree_Sequences/targets_' + str(fold) + '.txt'
    summat10_file_path = '/data/sars-busters-consolidated/GitData/VecNet_Unseen_Targets/Degree_Sequences/summat10_' + str(fold) + '.csv'
    summat01_file_path = '/data/sars-busters-consolidated/GitData/VecNet_Unseen_Targets/Degree_Sequences/summat01_' + str(fold) + '.csv'

    
    try:
        auc, aup = get_configuration_model_performance(train,nodes_test,ligand_file_path,target_file_path,summat10_file_path,summat01_file_path)
        auc_targets.append(auc)
        aup_targets.append(aup)
    except:
        continue

  0%|          | 0/5 [00:00<?, ?it/s]
100%|██████████| 6751/6751 [00:00<00:00, 982060.36it/s]

100%|██████████| 3594/3594 [00:00<00:00, 515097.51it/s]

  0%|          | 0/6751 [00:00<?, ?it/s][A
  0%|          | 18/6751 [00:00<00:39, 170.41it/s][A
  1%|          | 36/6751 [00:00<00:39, 168.73it/s][A
  1%|          | 53/6751 [00:00<00:40, 166.30it/s][A
  1%|          | 70/6751 [00:00<00:39, 167.06it/s][A
  1%|▏         | 87/6751 [00:00<00:39, 166.77it/s][A
  2%|▏         | 104/6751 [00:00<00:40, 165.39it/s][A
  2%|▏         | 121/6751 [00:00<00:39, 166.05it/s][A
  2%|▏         | 138/6751 [00:00<00:39, 166.01it/s][A
  2%|▏         | 155/6751 [00:00<00:39, 166.16it/s][A
  3%|▎         | 172/6751 [00:01<00:39, 166.08it/s][A
  3%|▎         | 189/6751 [00:01<00:39, 165.93it/s][A
  3%|▎         | 206/6751 [00:01<00:39, 166.69it/s][A
  3%|▎         | 223/6751 [00:01<00:39, 167.02it/s][A
  4%|▎         | 240/6751 [00:01<00:38, 167.87it/s][A
  4%|▍         | 257/6751 [00:01<00:38,

Ligands with positive degree 0:  1487
Ligands with negative degree 0:  219
Targets with positive degree 0:  621
Targets with negative degree 0:  0



0it [00:00, ?it/s][A
188it [00:00, 1870.42it/s][A
376it [00:00, 1853.86it/s][A
568it [00:00, 1880.90it/s][A
757it [00:00, 1876.89it/s][A
945it [00:00, 1861.92it/s][A
1132it [00:00, 1853.83it/s][A
1318it [00:00, 1840.52it/s][A
1503it [00:00, 1823.74it/s][A
1692it [00:00, 1843.21it/s][A
1880it [00:01, 1852.13it/s][A
2072it [00:01, 1869.34it/s][A
2259it [00:01, 1869.45it/s][A
2452it [00:01, 1885.62it/s][A
2641it [00:01, 1846.44it/s][A
2826it [00:01, 1844.32it/s][A
3021it [00:01, 1874.14it/s][A
3209it [00:01, 1871.18it/s][A
3406it [00:01, 1898.91it/s][A
3598it [00:01, 1902.47it/s][A
3789it [00:02, 1863.78it/s][A
3984it [00:02, 1886.77it/s][A
4173it [00:02, 1883.32it/s][A
4366it [00:02, 1894.77it/s][A
4556it [00:02, 1884.08it/s][A
4747it [00:02, 1889.73it/s][A
4939it [00:02, 1897.57it/s][A
5129it [00:02, 1892.81it/s][A
5319it [00:02, 1886.41it/s][A
5508it [00:02, 1879.71it/s][A
5696it [00:03, 1875.69it/s][A
5887it [00:03, 1883.35it/s][A
6078it [00:03, 1890.0

AUC:  0.7866021104337396
AUP:  0.739409327085565


100%|██████████| 6765/6765 [00:00<00:00, 1066108.08it/s]

100%|██████████| 3533/3533 [00:00<00:00, 867439.91it/s]

  0%|          | 0/6765 [00:00<?, ?it/s][A
  0%|          | 18/6765 [00:00<00:38, 176.01it/s][A
  1%|          | 36/6765 [00:00<00:38, 174.58it/s][A
  1%|          | 54/6765 [00:00<00:38, 174.14it/s][A
  1%|          | 72/6765 [00:00<00:38, 173.86it/s][A
  1%|▏         | 90/6765 [00:00<00:38, 173.81it/s][A
  2%|▏         | 108/6765 [00:00<00:38, 173.60it/s][A
  2%|▏         | 126/6765 [00:00<00:38, 173.59it/s][A
  2%|▏         | 144/6765 [00:00<00:38, 174.01it/s][A
  2%|▏         | 162/6765 [00:00<00:37, 173.81it/s][A
  3%|▎         | 180/6765 [00:01<00:37, 173.34it/s][A
  3%|▎         | 198/6765 [00:01<00:38, 172.68it/s][A
  3%|▎         | 216/6765 [00:01<00:37, 172.60it/s][A
  3%|▎         | 234/6765 [00:01<00:37, 173.30it/s][A
  4%|▎         | 252/6765 [00:01<00:37, 173.98it/s][A
  4%|▍         | 270/6765 [00:01<00:37, 173.83it/s][A
  4%|▍         | 288/

Ligands with positive degree 0:  1839
Ligands with negative degree 0:  231
Targets with positive degree 0:  636
Targets with negative degree 0:  0



0it [00:00, ?it/s][A
199it [00:00, 1986.59it/s][A
398it [00:00, 1976.83it/s][A
596it [00:00, 1976.30it/s][A
795it [00:00, 1980.71it/s][A
994it [00:00, 1978.92it/s][A
1192it [00:00, 1953.82it/s][A
1388it [00:00, 1952.18it/s][A
1586it [00:00, 1959.19it/s][A
1786it [00:00, 1971.30it/s][A
1984it [00:01, 1963.10it/s][A
2181it [00:01, 1958.62it/s][A
2382it [00:01, 1972.35it/s][A
2582it [00:01, 1978.87it/s][A
2784it [00:01, 1988.41it/s][A
2984it [00:01, 1990.40it/s][A
3184it [00:01, 1988.45it/s][A
3383it [00:01, 1986.41it/s][A
3583it [00:01, 1987.91it/s][A
3782it [00:01, 1981.94it/s][A
3981it [00:02, 1978.86it/s][A
4180it [00:02, 1979.03it/s][A
4380it [00:02, 1983.20it/s][A
4579it [00:02, 1958.30it/s][A
4775it [00:02, 1942.42it/s][A
4970it [00:02, 1939.65it/s][A
5165it [00:02, 1918.69it/s][A
5358it [00:02, 1920.70it/s][A
5553it [00:02, 1928.60it/s][A
5748it [00:02, 1933.34it/s][A
5942it [00:03, 1921.93it/s][A
6135it [00:03, 1918.77it/s][A
6332it [00:03, 1932.0

AUC:  0.7504912313232229
AUP:  0.69072055939785




100%|██████████| 3547/3547 [00:00<00:00, 858960.52it/s]

  0%|          | 0/6755 [00:00<?, ?it/s][A
  0%|          | 18/6755 [00:00<00:37, 179.02it/s][A
  1%|          | 36/6755 [00:00<00:38, 175.59it/s][A
  1%|          | 54/6755 [00:00<00:38, 175.29it/s][A
  1%|          | 72/6755 [00:00<00:38, 175.07it/s][A
  1%|▏         | 90/6755 [00:00<00:38, 174.64it/s][A
  2%|▏         | 108/6755 [00:00<00:38, 174.07it/s][A
  2%|▏         | 126/6755 [00:00<00:38, 174.17it/s][A
  2%|▏         | 144/6755 [00:00<00:37, 174.31it/s][A
  2%|▏         | 162/6755 [00:00<00:37, 174.60it/s][A
  3%|▎         | 180/6755 [00:01<00:37, 173.20it/s][A
  3%|▎         | 198/6755 [00:01<00:37, 173.96it/s][A
  3%|▎         | 216/6755 [00:01<00:37, 174.46it/s][A
  3%|▎         | 234/6755 [00:01<00:37, 174.19it/s][A
  4%|▎         | 252/6755 [00:01<00:37, 174.01it/s][A
  4%|▍         | 270/6755 [00:01<00:37, 174.41it/s][A
  4%|▍         | 288/6755 [00:01<00:36, 174.99it/s][A
  5%|▍         | 306/6

Ligands with positive degree 0:  1861
Ligands with negative degree 0:  194
Targets with positive degree 0:  599
Targets with negative degree 0:  0



0it [00:00, ?it/s][A
199it [00:00, 1977.89it/s][A
397it [00:00, 1950.53it/s][A
594it [00:00, 1957.72it/s][A
790it [00:00, 1947.20it/s][A
985it [00:00, 1931.01it/s][A
1179it [00:00, 1932.92it/s][A
1373it [00:00, 1925.45it/s][A
1570it [00:00, 1936.09it/s][A
1764it [00:00, 1924.89it/s][A
1957it [00:01, 1892.13it/s][A
2149it [00:01, 1899.04it/s][A
2344it [00:01, 1912.02it/s][A
2536it [00:01, 1914.03it/s][A
2730it [00:01, 1918.43it/s][A
2931it [00:01, 1943.40it/s][A
3128it [00:01, 1948.79it/s][A
3328it [00:01, 1962.04it/s][A
3525it [00:01, 1945.32it/s][A
3720it [00:01, 1945.11it/s][A
3918it [00:02, 1955.43it/s][A
4114it [00:02, 1951.25it/s][A
4310it [00:02, 1948.87it/s][A
4505it [00:02, 1934.22it/s][A
4701it [00:02, 1939.73it/s][A
4895it [00:02, 1934.33it/s][A
5089it [00:02, 1896.61it/s][A
5279it [00:02, 1883.99it/s][A
5481it [00:02, 1922.37it/s][A
5680it [00:02, 1942.26it/s][A
5878it [00:03, 1951.03it/s][A
6084it [00:03, 1982.53it/s][A
6329it [00:03, 1937.6

AUC:  0.7572409830657798
AUP:  0.7037736188614591


100%|██████████| 3527/3527 [00:00<00:00, 845187.12it/s]

  0%|          | 0/6753 [00:00<?, ?it/s][A
  0%|          | 18/6753 [00:00<00:37, 179.82it/s][A
  1%|          | 36/6753 [00:00<00:37, 177.81it/s][A
  1%|          | 54/6753 [00:00<00:38, 174.58it/s][A
  1%|          | 72/6753 [00:00<00:38, 174.35it/s][A
  1%|▏         | 90/6753 [00:00<00:38, 174.88it/s][A
  2%|▏         | 108/6753 [00:00<00:38, 173.85it/s][A
  2%|▏         | 126/6753 [00:00<00:37, 174.69it/s][A
  2%|▏         | 144/6753 [00:00<00:37, 174.95it/s][A
  2%|▏         | 162/6753 [00:00<00:37, 175.02it/s][A
  3%|▎         | 180/6753 [00:01<00:37, 175.16it/s][A
  3%|▎         | 198/6753 [00:01<00:37, 174.72it/s][A
  3%|▎         | 216/6753 [00:01<00:37, 174.55it/s][A
  3%|▎         | 234/6753 [00:01<00:37, 174.44it/s][A
  4%|▎         | 252/6753 [00:01<00:37, 174.14it/s][A
  4%|▍         | 270/6753 [00:01<00:37, 173.47it/s][A
  4%|▍         | 288/6753 [00:01<00:37, 174.01it/s][A
  5%|▍         | 306/675

Ligands with positive degree 0:  1796
Ligands with negative degree 0:  200
Targets with positive degree 0:  613
Targets with negative degree 0:  0


 80%|████████  | 4/5 [05:54<01:22, 82.70s/it]
100%|██████████| 6744/6744 [00:00<00:00, 1236779.60it/s]

100%|██████████| 3518/3518 [00:00<00:00, 863858.17it/s]

  0%|          | 0/6744 [00:00<?, ?it/s][A
  0%|          | 19/6744 [00:00<00:37, 180.07it/s][A
  1%|          | 38/6744 [00:00<00:37, 177.75it/s][A
  1%|          | 56/6744 [00:00<00:37, 176.13it/s][A
  1%|          | 74/6744 [00:00<00:38, 171.07it/s][A
  1%|▏         | 92/6744 [00:00<00:39, 169.53it/s][A
  2%|▏         | 110/6744 [00:00<00:38, 170.44it/s][A
  2%|▏         | 128/6744 [00:00<00:39, 169.09it/s][A
  2%|▏         | 145/6744 [00:00<00:38, 169.36it/s][A
  2%|▏         | 163/6744 [00:00<00:38, 170.83it/s][A
  3%|▎         | 181/6744 [00:01<00:38, 172.62it/s][A
  3%|▎         | 199/6744 [00:01<00:37, 173.35it/s][A
  3%|▎         | 217/6744 [00:01<00:37, 174.59it/s][A
  3%|▎         | 235/6744 [00:01<00:37, 175.04it/s][A
  4%|▍         | 253/6744 [00:01<00:37, 175.21it/s][A
  4%|▍         | 271/6744 [00:

Ligands with positive degree 0:  1815
Ligands with negative degree 0:  210
Targets with positive degree 0:  590
Targets with negative degree 0:  0



0it [00:00, ?it/s][A
195it [00:00, 1945.41it/s][A
390it [00:00, 1915.60it/s][A
583it [00:00, 1917.46it/s][A
777it [00:00, 1925.55it/s][A
982it [00:00, 1968.27it/s][A
1179it [00:00, 1967.18it/s][A
1376it [00:00, 1961.03it/s][A
1579it [00:00, 1981.36it/s][A
1778it [00:00, 1979.51it/s][A
1976it [00:01, 1973.20it/s][A
2174it [00:01, 1966.98it/s][A
2374it [00:01, 1974.83it/s][A
2574it [00:01, 1979.33it/s][A
2772it [00:01, 1960.16it/s][A
2969it [00:01, 1936.65it/s][A
3163it [00:01, 1931.55it/s][A
3358it [00:01, 1936.94it/s][A
3556it [00:01, 1948.59it/s][A
3751it [00:01, 1945.27it/s][A
3949it [00:02, 1953.59it/s][A
4148it [00:02, 1961.73it/s][A
4345it [00:02, 1951.70it/s][A
4541it [00:02, 1939.27it/s][A
4735it [00:02, 1938.52it/s][A
4929it [00:02, 1928.56it/s][A
5128it [00:02, 1945.08it/s][A
5323it [00:02, 1906.72it/s][A
5520it [00:02, 1922.60it/s][A
5721it [00:02, 1945.11it/s][A
5916it [00:03, 1920.67it/s][A
6111it [00:03, 1929.02it/s][A
6308it [00:03, 1940.7

AUC:  0.7251730645670953
AUP:  0.6325534590013489





In [13]:
print('Unseen Targets')
print('AUC: ', np.mean(auc_targets), '+-', np.std(auc_targets))
print('AUP: ', np.mean(aup_targets), '+-', np.std(aup_targets))

Unseen Targets
AUC:  0.7548768473474594 +- 0.02187252726003232
AUP:  0.6916142410865558 +- 0.038474651918511626
