In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:


# std
import numpy as np
import pandas as pd
import random as rn
import json
import time
from collections import defaultdict
from itertools import combinations
from sklearn.model_selection import KFold
import sys, os


# my lib


sys.path.append('/content/drive/MyDrive/DDI_Network/PPI_L3N_Network-main/src/')
import PPILinkPred as pred
sys.path.append('/content/drive/MyDrive/DDI_Network/PPI_L3N_Network-main/src/notebook')

import genData_helper as helper
import traversalHelper as tr
import helper as hr

# Generate Random PPI Samples from Datasets

In [None]:
# yeast dataset
# DataFrame standard: {nodeA, nodeB, type, score}
# randomly 50% of the dataset 10 times, save into json

import_funcs = [
    '/content/drive/MyDrive/DDI_Dataset/BioSNAP/processed_drug_pairs_biosnap.csv',
    '/content/drive/MyDrive/DDI_Dataset/DDInter/processed_drug_pairs_DDInter_A.csv',
    '/content/drive/MyDrive/DDI_Dataset/DDInter/processed_drug_pairs_DDInter.csv'
]

names = ['biosnap', 'ddinter_A', 'ddinter_R']

for n in range(len(names)):
    df = pd.read_csv(import_funcs[n])
    ppi = [list(arr) for arr in np.asarray(df[['nodeA', 'nodeB']])]
    sampledPPIs = [rn.sample(ppi, int(len(ppi) * 0.5)) for _ in range(10)]
    with open(f"/content/drive/MyDrive/DDI_Dataset/Process_data/{names[n]}_sampledPPIs.json", "w") as f:
        json.dump(sampledPPIs, f)

In [None]:
# yeast dataset
# DataFrame standard: {nodeA, nodeB, type, score}
# do 5-fold CV, rmb when do PR, do until 20% of top PPIs (because 5-fold is about 80% train and 20% test)

import_funcs = [
    '/content/drive/MyDrive/DDI_Dataset/BioSNAP/processed_drug_pairs_biosnap.csv',
    '/content/drive/MyDrive/DDI_Dataset/DDInter/processed_drug_pairs_DDInter_A.csv',
    '/content/drive/MyDrive/DDI_Dataset/DDInter/processed_drug_pairs_DDInter.csv'
]

names = ['biosnap', 'ddinter_A', 'ddinter_R']

for n in range(len(names)):
    df = pd.read_csv(import_funcs[n])
    ppi = [list(arr) for arr in np.asarray(df[['nodeA', 'nodeB']])]
    rn.shuffle(ppi)
    kf = KFold(n_splits=5)
    sampledPPIs = []
    for train_index, test_index in kf.split(ppi):
        sampledPPIs.append(np.asarray(ppi)[train_index].tolist())
    with open("/content/drive/MyDrive/DDI_Dataset/Process_data/{}_5FoldCV_sampledPPIs.json".format(names[n]), "w") as f:
        f.write(json.dumps(sampledPPIs))

Sampled Positive PPIs Calculation:
First Code Block: Uniquely, it reduces the dataset by 50% minus the salt percentage of the original size before adding negative PPIs. For example, if saltSz is 5%, it first samples 45% of positive PPIs.
Second Code Block: Directly samples 50% of positive PPIs without adjusting for the salt size to be added later.

In [None]:
# yeast dataset
# DataFrame standard: {nodeA, nodeB, type, score}
# randomly 50% of the dataset, then add 5%,10%,20% of neg PPIs, 10 replicates, save into json

import_funcs = [
    '/content/drive/MyDrive/DDI_Dataset/BioSNAP/processed_drug_pairs_biosnap.csv',
    '/content/drive/MyDrive/DDI_Dataset/DDInter/processed_drug_pairs_DDInter_A.csv',
    '/content/drive/MyDrive/DDI_Dataset/DDInter/processed_drug_pairs_DDInter.csv'
]

names = ['biosnap', 'ddinter_A', 'ddinter_R']

for n in range(len(names)):
    df = pd.read_csv(import_funcs[n])
    ppi = [list(arr) for arr in np.asarray(df[['nodeA', 'nodeB']])]
    sampledPPIs = [rn.sample(ppi, int(len(ppi)*0.5)) for i in range(10)]
    realPPIsz = len(ppi)
    dual_ppiStr = set(tr.Helper.list_to_pathStrs(tr.Helper.to_dual_binary_relation(ppi)))
    sampleNodes = tr.Helper.binary_relation_to_node(ppi)
    nodePairs = tr.Helper.list_to_pathStrs(list(combinations(sampleNodes, 2)))
    nonPPIs = [ppi for ppi in nodePairs if ppi not in dual_ppiStr]

    for saltSz in [5, 10, 20]:
        sampled_nonPPIs = [tr.Helper.pathStrs_to_list(
            rn.sample(nonPPIs, int(realPPIsz*(saltSz*0.01)))) for i in range(10)]
        sampledPPIs = [sampledPPIs[i]+sampled_nonPPIs[i] for i in range(10)]
        with open("/content/drive/MyDrive/DDI_Dataset/Process_data/{}_salted{}Percent_sampledPPIs.json".format(
            names[n], saltSz), "w") as f:
            f.write(json.dumps(sampledPPIs))

In [9]:
# yeast dataset
# DataFrame standard: {nodeA, nodeB, type, score}
# randomly 50% of the dataset, then add 5%,10%,20% of neg PPIs, 10 replicates, save into json

import_funcs = [
    '/content/drive/MyDrive/DDI_Dataset/BioSNAP/processed_drug_pairs_biosnap.csv',
    '/content/drive/MyDrive/DDI_Dataset/DDInter/processed_drug_pairs_DDInter_A.csv',
    '/content/drive/MyDrive/DDI_Dataset/DDInter/processed_drug_pairs_DDInter.csv'
]

names = ['biosnap', 'ddinter_A', 'ddinter_R']

for n in range(len(names)):
    for saltSz in [5,10,15,20,25]:
        df = pd.read_csv(import_funcs[n])
        ppi = [list(arr) for arr in np.asarray(df[['nodeA', 'nodeB']])]
        sampledPPIs = [rn.sample(ppi, int(len(ppi)*(0.5-(saltSz*0.01)))) for i in range(10)]
        realPPIsz = len(ppi)
        dual_ppiStr = set(tr.Helper.list_to_pathStrs(tr.Helper.to_dual_binary_relation(ppi)))
        sampleNodes = tr.Helper.binary_relation_to_node(ppi)
        nodePairs = tr.Helper.list_to_pathStrs(list(combinations(sampleNodes, 2)))
        nonPPIs = [ppi for ppi in nodePairs if ppi not in dual_ppiStr]
        sampled_nonPPIs = [tr.Helper.pathStrs_to_list(
            rn.sample(nonPPIs, int(realPPIsz*(saltSz*0.01)))) for i in range(10)]
        sampledPPIs = [sampledPPIs[i]+sampled_nonPPIs[i] for i in range(10)]
        with open("/content/drive/MyDrive/DDI_Dataset/Process_data/{}_contam{}Percent_sampledPPIs.json".format(
            names[n], saltSz), "w") as f:
            f.write(json.dumps(sampledPPIs))

In [None]:
# yeast dataset
# DataFrame standard: {nodeA, nodeB, type, score}
# randomly 50% of the dataset 10 times, save into json

import_funcs = [
    '/content/drive/MyDrive/DDI_Dataset/BioSNAP/processed_drug_pairs_biosnap.csv',
    '/content/drive/MyDrive/DDI_Dataset/DDInter/processed_drug_pairs_DDInter_A.csv',
    '/content/drive/MyDrive/DDI_Dataset/DDInter/processed_drug_pairs_DDInter.csv'
]

names = ['biosnap', 'ddinter_A', 'ddinter_R']

for randSz in range(95, 54, -5):
    for n in range(len(names)):
        df = pd.read_csv(import_funcs[n])
        ppi = [list(arr) for arr in np.asarray(df[['nodeA', 'nodeB']])]
        sampledPPIs = [rn.sample(ppi, int(len(ppi)*(randSz*0.01))) for i in range(10)]
        with open("/content/drive/MyDrive/DDI_Dataset/Process_data/{}_sampledPPIs_{}Percent.json".format(names[n], randSz), "w") as f:
            f.write(json.dumps(sampledPPIs))

In [None]:
# yeast dataset
# sample non-PPIs of real-PPIs size

import_funcs = [
    '/content/drive/MyDrive/DDI_Dataset/BioSNAP/processed_drug_pairs_biosnap.csv',
    '/content/drive/MyDrive/DDI_Dataset/DDInter/processed_drug_pairs_DDInter_A.csv',
    '/content/drive/MyDrive/DDI_Dataset/DDInter/processed_drug_pairs_DDInter.csv'
]

names = ['biosnap', 'ddinter_A', 'ddinter_R']
for n in range(len(names)):
    df = pd.read_csv(import_funcs[n])
    ppi = [list(arr) for arr in np.asarray(df[['nodeA', 'nodeB']])]
    realPPIsz = len(ppi)
    dual_ppiStr = set(tr.Helper.list_to_pathStrs(tr.Helper.to_dual_binary_relation(ppi)))

    sampleNodes = tr.Helper.binary_relation_to_node(ppi)
    nodePairs = tr.Helper.list_to_pathStrs(list(combinations(sampleNodes, 2)))

    nonPPIs = [ppi for ppi in nodePairs if ppi not in dual_ppiStr]
    sampled_nonPPIs = [tr.Helper.pathStrs_to_list(rn.sample(nonPPIs, realPPIsz)) for i in range(10)]

    with open("/content/drive/MyDrive/DDI_Dataset/Process_data/{}_sampled_nonPPIs.json".format(names[n]), "w") as f:
        f.write(json.dumps(sampled_nonPPIs))

# Run Link Prediction

In [None]:
methods = ["commonNeighbor", "L3Normalizing", "CRA", "CH2_L3", "Sim", 'L3E1_f1', 'L3E1_f2', 'L3E_f1Alt', 'L3E_f2Alt', 'random']
ds_names = ['biosnap', 'ddinter_A', 'ddinter_R']


for ds_name in ds_names:
    # read dataset
    samplePPIs = []
    with open("/content/drive/MyDrive/DDI_Dataset/Process_data/{}_sampledPPIs.json".format(ds_name), "r") as f:
        samplePPIs = json.loads(f.read())

    # do link prediction & save results
    for method in methods:
        for i in range(len(samplePPIs)):
            saveFilename = "{}_{}_sample_{}".format(method, ds_name, i)
            print(saveFilename)
            startTime = time.time()

            # jupyter notebook cannot display multi core logging, do it only in terminal
            predPPI, predScore = pred.multiCore_PPILinkPred(samplePPIs[i], method, coreNo=14, logging=False)
            helper.write_runTime(saveFilename, time.time()-startTime)
            helper.write_resultData(predPPI, predScore, saveFilename)

commonNeighbor_biosnap_sample_0


commonNeighbor_biosnap_sample_1


commonNeighbor_biosnap_sample_2


commonNeighbor_biosnap_sample_3


commonNeighbor_biosnap_sample_4


commonNeighbor_biosnap_sample_5


commonNeighbor_biosnap_sample_6


commonNeighbor_biosnap_sample_7


commonNeighbor_biosnap_sample_8


commonNeighbor_biosnap_sample_9


L3Normalizing_biosnap_sample_0


L3Normalizing_biosnap_sample_1


L3Normalizing_biosnap_sample_2


L3Normalizing_biosnap_sample_3


L3Normalizing_biosnap_sample_4


L3Normalizing_biosnap_sample_5


L3Normalizing_biosnap_sample_6


L3Normalizing_biosnap_sample_7


L3Normalizing_biosnap_sample_8


L3Normalizing_biosnap_sample_9


CRA_biosnap_sample_0


CRA_biosnap_sample_1


CRA_biosnap_sample_2


CRA_biosnap_sample_3


CRA_biosnap_sample_4


CRA_biosnap_sample_5


CRA_biosnap_sample_6


CRA_biosnap_sample_7


CRA_biosnap_sample_8


CRA_biosnap_sample_9


CH2_L3_biosnap_sample_0


CH2_L3_biosnap_sample_1


CH2_L3_biosnap_sample_2


CH2_L3_biosnap_sample_

In [None]:
methods = ["commonNeighbor", "L3Normalizing", "CRA", "CH2_L3", "Sim", 'L3E1_f1', 'L3E1_f2', 'random']
ds_names = ['bioGRID_5FoldCV', 'STRING_5FoldCV', 'MINT_5FoldCV']

for ds_name in ds_names:
    # read dataset
    samplePPIs = []
    with open("./sampled_datasets/{}_sampledPPIs.json".format(ds_name), "r") as f:
        samplePPIs = json.loads(f.read())

    # do link prediction & save results
    for method in methods:
        for i in range(len(samplePPIs)):
            saveFilename = "{}_{}_sample_{}".format(method, ds_name, i)
            print(saveFilename)
            if os.path.exists("./linkPred_out/"+saveFilename+"_PPI.json"): continue
            startTime = time.time()

            # jupyter notebook cannot display multi core logging, do it only in terminal
            predPPI, predScore = pred.multiCore_PPILinkPred(samplePPIs[i]
                                                            , method, coreNo=14, logging=False)
            helper.write_runTime(saveFilename, time.time()-startTime)
            helper.write_resultData(predPPI, predScore, saveFilename)

commonNeighbor_bioGRID_5FoldCV_sample_0
commonNeighbor_bioGRID_5FoldCV_sample_1
commonNeighbor_bioGRID_5FoldCV_sample_2
commonNeighbor_bioGRID_5FoldCV_sample_3
commonNeighbor_bioGRID_5FoldCV_sample_4
L3Normalizing_bioGRID_5FoldCV_sample_0
L3Normalizing_bioGRID_5FoldCV_sample_1
L3Normalizing_bioGRID_5FoldCV_sample_2
L3Normalizing_bioGRID_5FoldCV_sample_3
L3Normalizing_bioGRID_5FoldCV_sample_4
CRA_bioGRID_5FoldCV_sample_0
CRA_bioGRID_5FoldCV_sample_1
CRA_bioGRID_5FoldCV_sample_2
CRA_bioGRID_5FoldCV_sample_3
CRA_bioGRID_5FoldCV_sample_4
CH2_L3_bioGRID_5FoldCV_sample_0
CH2_L3_bioGRID_5FoldCV_sample_1
CH2_L3_bioGRID_5FoldCV_sample_2
CH2_L3_bioGRID_5FoldCV_sample_3
CH2_L3_bioGRID_5FoldCV_sample_4


Sim_bioGRID_5FoldCV_sample_0


Sim_bioGRID_5FoldCV_sample_1


Sim_bioGRID_5FoldCV_sample_2


Sim_bioGRID_5FoldCV_sample_3


Sim_bioGRID_5FoldCV_sample_4


L3E1_f1_bioGRID_5FoldCV_sample_0


L3E1_f1_bioGRID_5FoldCV_sample_1


L3E1_f1_bioGRID_5FoldCV_sample_2


L3E1_f1_bioGRID_5FoldCV_sample_3


L3E

In [None]:
methods = ["commonNeighbor", "L3Normalizing", "CRA", "CH2_L3", "Sim", 'L3E1_f1', 'L3E1_f2', 'random']
ds_names = ['biosnap', 'ddinter_A', 'ddinter_R']

for ds_name in ds_names:
    for saltSz in [15, 25]:
        # read dataset
        samplePPIs = []
        with open("/content/drive/MyDrive/DDI_Dataset/Process_data/{}_contam{}Percent_sampledPPIs.json".format(
            ds_name, saltSz), "r") as f:
            samplePPIs = json.loads(f.read())

        # do link prediction & save results
        for method in methods:
            for i in range(len(samplePPIs)):
                saveFilename = "{}_{}_contam{}Percent_sample_{}".format(method, ds_name, saltSz, i)
                print(saveFilename)
                if os.path.exists("/content/drive/MyDrive/DDI_Network/PPI_L3N_Network-main/src/notebook/linkPred_out/"+saveFilename+"_PPI.json"): continue
                startTime = time.time()

                # jupyter notebook cannot display multi core logging, do it only in terminal
                predPPI, predScore = pred.multiCore_PPILinkPred(samplePPIs[i]
                                                                , method, coreNo=14, logging=False)
                helper.write_runTime(saveFilename, time.time()-startTime)
                helper.write_resultData(predPPI, predScore, saveFilename)

commonNeighbor_biosnap_contam15Percent_sample_0


commonNeighbor_biosnap_contam15Percent_sample_1


commonNeighbor_biosnap_contam15Percent_sample_2


commonNeighbor_biosnap_contam15Percent_sample_3


commonNeighbor_biosnap_contam15Percent_sample_4


commonNeighbor_biosnap_contam15Percent_sample_5


commonNeighbor_biosnap_contam15Percent_sample_6


commonNeighbor_biosnap_contam15Percent_sample_7


commonNeighbor_biosnap_contam15Percent_sample_8


commonNeighbor_biosnap_contam15Percent_sample_9


L3Normalizing_biosnap_contam15Percent_sample_0


L3Normalizing_biosnap_contam15Percent_sample_1


L3Normalizing_biosnap_contam15Percent_sample_2


L3Normalizing_biosnap_contam15Percent_sample_3


L3Normalizing_biosnap_contam15Percent_sample_4


L3Normalizing_biosnap_contam15Percent_sample_5


L3Normalizing_biosnap_contam15Percent_sample_6


L3Normalizing_biosnap_contam15Percent_sample_7


L3Normalizing_biosnap_contam15Percent_sample_8


L3Normalizing_biosnap_contam15Percent_sample_9


CRA_biosna

In [None]:
methods = ["commonNeighbor", "L3Normalizing", "CRA", "Sim", "L3E1_f1", 'L3E1_f2', 'random']
# skip CH2 and L3E1_f2 first because waste time, may use HPC
ds_names = ['biosnap', 'ddinter_A', 'ddinter_R']

for randSz in range(60, 100, 10):
    for ds_name in ds_names:
        # read dataset
        samplePPIs = []
        with open("./sampled_datasets/{}_sampledPPIs_{}Percent.json".format(ds_name, randSz), "r") as f:
            samplePPIs = json.loads(f.read())

        # do link prediction & save results
        for method in methods:
            print(randSz, ds_name, method)
            for i in range(len(samplePPIs)):
                saveFilename = "{}_{}_sample_{}_randSz{}Percent".format(method, ds_name, i, randSz)
                startTime = time.time()

                # jupyter notebook cannot display multi core logging, do it only in terminal
                predPPI, predScore = pred.multiCore_PPILinkPred(samplePPIs[i]
                                                                , method, coreNo=14, logging=False)
                helper.write_runTime(saveFilename, time.time()-startTime)
                helper.write_resultData(predPPI, predScore, saveFilename)

# Data Cleaning for Analysis & Processing

In [None]:
# extract only top n edges, n = size of sampled dataset
methods = ["commonNeighbor", "L3Normalizing", "CRA", "CH2_L3", "Sim", 'random', "L3E1_f1", "L3E1_f2"]
ds_names = ['biosnap', 'ddinter_A', 'ddinter_R']

for ds_name in ds_names:
    samplePPIs = []
    with open("/content/drive/MyDrive/DDI_Dataset/Process_data/{}_sampledPPIs.json".format(ds_name), "r") as f:
        samplePPIs = json.loads(f.read())
    sampleSize = len(samplePPIs[0])

    # loop each method, each trial, extract the number into one json
    for method in methods:
        fullPPI, fullScore = [], []
        for trial in range(10):
            with open("/content/drive/MyDrive/DDI_Network/PPI_L3N_Network-main/src/notebook/linkPred_out/{}_{}_sample_{}_PPI.json".format(method, ds_name, trial), "r") as f:
                fullPPI.append(json.loads(f.read())[0:sampleSize])
            with open("/content/drive/MyDrive/DDI_Network/PPI_L3N_Network-main/src/notebook/linkPred_out/{}_{}_sample_{}_score.json".format(method, ds_name, trial), "r") as f:
                fullScore.append(json.loads(f.read())[0:sampleSize])
        with open("/content/drive/MyDrive/DDI_Network/PPI_L3N_Network-main/src/notebook/linkPred_out_reduced/{}_{}_topPPI.json".format(method, ds_name), "w") as f:
            f.write(json.dumps(fullPPI))
        with open("/content/drive/MyDrive/DDI_Network/PPI_L3N_Network-main/src/notebook/linkPred_out_reduced/{}_{}_topScore.json".format(method, ds_name), "w") as f:
            f.write(json.dumps(fullScore))

In [None]:
# extract only top n edges, n = size of sampled dataset
methods = ["commonNeighbor", "L3Normalizing", "CRA", "CH2_L3", "Sim", 'random', "L3E1_f1", "L3E1_f2"]
ds_names = ['biosnap', 'ddinter_A', 'ddinter_R']
#ds_names = [ds+"_contam{}Percent".format(salt) for ds in ds_names for salt in [15,25]]
#ds_names = ['synthetic']
#ds_names = ['biosnap_5FoldCV', 'ddinter_A_5FoldCV', 'ddinter_R_5FoldCV']

for ds_name in ds_names:
    samplePPIs = []
    with open("/content/drive/MyDrive/DDI_Dataset/Process_data/{}_sampledPPIs.json".format(ds_name), "r") as f:
        samplePPIs = json.loads(f.read())
    sampleSize = int((len(samplePPIs[0])/0.8)*0.2)

    # loop each method, each trial, extract the number into one json
    for method in methods:
        fullPPI, fullScore = [], []
        saveFilename = "{}_{}".format(method, ds_name)
        print(saveFilename)
        if os.path.exists("/content/drive/MyDrive/DDI_Network/PPI_L3N_Network-main/src/notebook/linkPred_out_reduced/"+saveFilename+"_topPPI.json"): continue

        for trial in range(5):
            with open("/content/drive/MyDrive/DDI_Network/PPI_L3N_Network-main/src/notebook/linkPred_out//{}_{}_sample_{}_PPI.json".format(method, ds_name, trial), "r") as f:
                fullPPI.append(json.loads(f.read())[0:sampleSize])
            with open("/content/drive/MyDrive/DDI_Network/PPI_L3N_Network-main/src/notebook/linkPred_out//{}_{}_sample_{}_score.json".format(method, ds_name, trial), "r") as f:
                fullScore.append(json.loads(f.read())[0:sampleSize])
        with open("/content/drive/MyDrive/DDI_Network/PPI_L3N_Network-main/src/notebook/linkPred_out_reduced/{}_{}_topPPI.json".format(method, ds_name), "w") as f:
            f.write(json.dumps(fullPPI))
        with open("/content/drive/MyDrive/DDI_Network/PPI_L3N_Network-main/src/notebook/linkPred_out_reduced/{}_{}_topScore.json".format(method, ds_name), "w") as f:
            f.write(json.dumps(fullScore))

commonNeighbor_biosnap
L3Normalizing_biosnap
CRA_biosnap
CH2_L3_biosnap
Sim_biosnap
random_biosnap
L3E1_f1_biosnap
L3E1_f2_biosnap
commonNeighbor_ddinter_A
L3Normalizing_ddinter_A
CRA_ddinter_A
CH2_L3_ddinter_A
Sim_ddinter_A
random_ddinter_A
L3E1_f1_ddinter_A
L3E1_f2_ddinter_A
commonNeighbor_ddinter_R
L3Normalizing_ddinter_R
CRA_ddinter_R
CH2_L3_ddinter_R
Sim_ddinter_R
random_ddinter_R
L3E1_f1_ddinter_R
L3E1_f2_ddinter_R


In [6]:
# extract only top n edges, n = size of sampled dataset
methods = ["commonNeighbor", "L3Normalizing", "CRA", "CH2_L3", "Sim", 'random', "L3E1_f1", "L3E1_f2"]
ds_names = ['biosnap', 'ddinter_A', 'ddinter_R']
ds_names = [ds+"_contam{}Percent".format(salt) for ds in ds_names for salt in [15,25]]
#ds_names = ['synthetic']
#ds_names = ['biosnap_5FoldCV', 'ddinter_A_5FoldCV', 'ddinter_R_5FoldCV']

for ds_name in ds_names:
    samplePPIs = []
    with open("/content/drive/MyDrive/DDI_Dataset/Process_data/{}_sampledPPIs.json".format(ds_name), "r") as f:
        samplePPIs = json.loads(f.read())
    sampleSize = int((len(samplePPIs[0])/0.8)*0.2)

    # loop each method, each trial, extract the number into one json
    for method in methods:
        fullPPI, fullScore = [], []
        saveFilename = "{}_{}".format(method, ds_name)
        print(saveFilename)
        if os.path.exists("/content/drive/MyDrive/DDI_Network/PPI_L3N_Network-main/src/notebook/linkPred_out_reduced/"+saveFilename+"_topPPI.json"): continue

        for trial in range(5):
            with open("/content/drive/MyDrive/DDI_Network/PPI_L3N_Network-main/src/notebook/linkPred_out//{}_{}_sample_{}_PPI.json".format(method, ds_name, trial), "r") as f:
                fullPPI.append(json.loads(f.read())[0:sampleSize])
            with open("/content/drive/MyDrive/DDI_Network/PPI_L3N_Network-main/src/notebook/linkPred_out//{}_{}_sample_{}_score.json".format(method, ds_name, trial), "r") as f:
                fullScore.append(json.loads(f.read())[0:sampleSize])
        with open("/content/drive/MyDrive/DDI_Network/PPI_L3N_Network-main/src/notebook/linkPred_out_reduced/{}_{}_topPPI.json".format(method, ds_name), "w") as f:
            f.write(json.dumps(fullPPI))
        with open("/content/drive/MyDrive/DDI_Network/PPI_L3N_Network-main/src/notebook/linkPred_out_reduced/{}_{}_topScore.json".format(method, ds_name), "w") as f:
            f.write(json.dumps(fullScore))

commonNeighbor_biosnap_contam15Percent
L3Normalizing_biosnap_contam15Percent
CRA_biosnap_contam15Percent
CH2_L3_biosnap_contam15Percent
Sim_biosnap_contam15Percent
random_biosnap_contam15Percent
L3E1_f1_biosnap_contam15Percent
L3E1_f2_biosnap_contam15Percent
commonNeighbor_biosnap_contam25Percent
L3Normalizing_biosnap_contam25Percent
CRA_biosnap_contam25Percent
CH2_L3_biosnap_contam25Percent
Sim_biosnap_contam25Percent
random_biosnap_contam25Percent
L3E1_f1_biosnap_contam25Percent
L3E1_f2_biosnap_contam25Percent
commonNeighbor_ddinter_A_contam15Percent
L3Normalizing_ddinter_A_contam15Percent
CRA_ddinter_A_contam15Percent
CH2_L3_ddinter_A_contam15Percent
Sim_ddinter_A_contam15Percent
random_ddinter_A_contam15Percent
L3E1_f1_ddinter_A_contam15Percent
L3E1_f2_ddinter_A_contam15Percent
commonNeighbor_ddinter_A_contam25Percent
L3Normalizing_ddinter_A_contam25Percent
CRA_ddinter_A_contam25Percent
CH2_L3_ddinter_A_contam25Percent
Sim_ddinter_A_contam25Percent
random_ddinter_A_contam25Percent


In [10]:
# extract only top n edges, n = size of sampled dataset
methods = ["commonNeighbor", "L3Normalizing", "CRA", "CH2_L3", "Sim", 'random', "L3E1_f1", "L3E1_f2"]
ds_names = ['biosnap', 'ddinter_A', 'ddinter_R']
ds_names = [ds+"_contam{}Percent".format(contam) for contam in [5,10,15,20,25] for ds in ds_names]
#ds_names = ['synthetic']
#ds_names = ['biosnap_5FoldCV', 'ddinter_A_5FoldCV', 'ddinter_R_5FoldCV']

for ds_name in ds_names:
    samplePPIs = []
    with open("/content/drive/MyDrive/DDI_Dataset/Process_data/{}_sampledPPIs.json".format(ds_name), "r") as f:
        samplePPIs = json.loads(f.read())
    sampleSize = int((len(samplePPIs[0])/0.8)*0.2)

    # loop each method, each trial, extract the number into one json
    for method in methods:
        fullPPI, fullScore = [], []
        saveFilename = "{}_{}".format(method, ds_name)
        print(saveFilename)
        if os.path.exists("/content/drive/MyDrive/DDI_Network/PPI_L3N_Network-main/src/notebook/linkPred_out_reduced/"+saveFilename+"_topPPI.json"): continue

        for trial in range(5):
            with open("/content/drive/MyDrive/DDI_Network/PPI_L3N_Network-main/src/notebook/linkPred_out/{}_{}_sample_{}_PPI.json".format(method, ds_name, trial), "r") as f:
                fullPPI.append(json.loads(f.read())[0:sampleSize])
            with open("/content/drive/MyDrive/DDI_Network/PPI_L3N_Network-main/src/notebook/linkPred_out//{}_{}_sample_{}_score.json".format(method, ds_name, trial), "r") as f:
                fullScore.append(json.loads(f.read())[0:sampleSize])
        with open("/content/drive/MyDrive/DDI_Network/PPI_L3N_Network-main/src/notebook/linkPred_out_reduced/{}_{}_topPPI.json".format(method, ds_name), "w") as f:
            f.write(json.dumps(fullPPI))
        with open("/content/drive/MyDrive/DDI_Network/PPI_L3N_Network-main/src/notebook/linkPred_out_reduced/{}_{}_topScore.json".format(method, ds_name), "w") as f:
            f.write(json.dumps(fullScore))

commonNeighbor_biosnap_contam5Percent


FileNotFoundError: [Errno 2] No such file or directory: '/content/drive/MyDrive/DDI_Network/PPI_L3N_Network-main/src/notebook/linkPred_out/commonNeighbor_biosnap_contam5Percent_sample_0_PPI.json'

In [None]:
# extract only top n edges, n = size of sampled dataset
methods = ["commonNeighbor", "L3Normalizing", "CRA", "CH2_L3", "Sim", 'random', "L3E1_f1", "L3E1_f2"]
#ds_names = ['bioGRID', 'STRING', 'MINT']
#ds_names = [ds+"_contam{}Percent".format(salt) for ds in ds_names for salt in [15,25]]
#ds_names = ['synthetic']
ds_names = ['biosnap_5FoldCV', 'ddinter_A_5FoldCV', 'ddinter_R_5FoldCV']

for ds_name in ds_names:
    samplePPIs = []
    with open("/content/drive/MyDrive/DDI_Dataset/Process_data/{}_sampledPPIs.json".format(ds_name), "r") as f:
        samplePPIs = json.loads(f.read())
    sampleSize = int((len(samplePPIs[0])/0.8)*0.2)

    # loop each method, each trial, extract the number into one json
    for method in methods:
        fullPPI, fullScore = [], []
        saveFilename = "{}_{}".format(method, ds_name)
        print(saveFilename)
        if os.path.exists("/content/drive/MyDrive/DDI_Network/PPI_L3N_Network-main/src/notebook/linkPred_out_reduced/"+saveFilename+"_topPPI.json"): continue

        for trial in range(10):
            with open("/content/drive/MyDrive/DDI_Network/PPI_L3N_Network-main/src/notebook/linkPred_out//{}_{}_sample_{}_PPI.json".format(method, ds_name, trial), "r") as f:
                fullPPI.append(json.loads(f.read())[0:sampleSize])
            with open("/content/drive/MyDrive/DDI_Network/PPI_L3N_Network-main/src/notebook/linkPred_out//{}_{}_sample_{}_score.json".format(method, ds_name, trial), "r") as f:
                fullScore.append(json.loads(f.read())[0:sampleSize])
        with open("/content/drive/MyDrive/DDI_Network/PPI_L3N_Network-main/src/notebook/linkPred_out_reduced/{}_{}_topPPI.json".format(method, ds_name), "w") as f:
            f.write(json.dumps(fullPPI))
        with open("/content/drive/MyDrive/DDI_Network/PPI_L3N_Network-main/src/notebook/linkPred_out_reduced/{}_{}_topScore.json".format(method, ds_name), "w") as f:
            f.write(json.dumps(fullScore))

commonNeighbor_biosnap_5FoldCV
L3Normalizing_biosnap_5FoldCV
CRA_biosnap_5FoldCV
CH2_L3_biosnap_5FoldCV
Sim_biosnap_5FoldCV
random_biosnap_5FoldCV
L3E1_f1_biosnap_5FoldCV
L3E1_f2_biosnap_5FoldCV
commonNeighbor_ddinter_A_5FoldCV
L3Normalizing_ddinter_A_5FoldCV
CRA_ddinter_A_5FoldCV
CH2_L3_ddinter_A_5FoldCV
Sim_ddinter_A_5FoldCV
random_ddinter_A_5FoldCV
L3E1_f1_ddinter_A_5FoldCV
L3E1_f2_ddinter_A_5FoldCV
commonNeighbor_ddinter_R_5FoldCV
L3Normalizing_ddinter_R_5FoldCV
CRA_ddinter_R_5FoldCV
CH2_L3_ddinter_R_5FoldCV
Sim_ddinter_R_5FoldCV
random_ddinter_R_5FoldCV
L3E1_f1_ddinter_R_5FoldCV
L3E1_f2_ddinter_R_5FoldCV


In [None]:
# extract only top n edges, n = size of sampled dataset
methods = ["commonNeighbor", "L3Normalizing", "CRA", "CH2_L3", "Sim", 'random', "L3E1_f1", "L3E1_f2"]
#ds_names = ['bioGRID', 'STRING', 'MINT']
ds_names = ['biosnap', 'ddinter_A', 'ddinter_R']
ds_names = [ds+"_contam{}Percent".format(salt) for ds in ds_names for salt in [15,25]]
#ds_names = ['synthetic']


for ds_name in ds_names:
    samplePPIs = []
    with open("/content/drive/MyDrive/DDI_Dataset/Process_data/{}_sampledPPIs.json".format(ds_name), "r") as f:
        samplePPIs = json.loads(f.read())
    sampleSize = int((len(samplePPIs[0])/0.8)*0.2)

    # loop each method, each trial, extract the number into one json
    for method in methods:
        fullPPI, fullScore = [], []
        saveFilename = "{}_{}".format(method, ds_name)
        print(saveFilename)
        if os.path.exists("/content/drive/MyDrive/DDI_Network/PPI_L3N_Network-main/src/notebook/linkPred_out_reduced/"+saveFilename+"_topPPI.json"): continue

        for trial in range(5):
            with open("/content/drive/MyDrive/DDI_Network/PPI_L3N_Network-main/src/notebook/linkPred_out//{}_{}_sample_{}_PPI.json".format(method, ds_name, trial), "r") as f:
                fullPPI.append(json.loads(f.read())[0:sampleSize])
            with open("/content/drive/MyDrive/DDI_Network/PPI_L3N_Network-main/src/notebook/linkPred_out//{}_{}_sample_{}_score.json".format(method, ds_name, trial), "r") as f:
                fullScore.append(json.loads(f.read())[0:sampleSize])
        with open("/content/drive/MyDrive/DDI_Network/PPI_L3N_Network-main/src/notebook/linkPred_out_reduced/{}_{}_topPPI.json".format(method, ds_name), "w") as f:
            f.write(json.dumps(fullPPI))
        with open("/content/drive/MyDrive/DDI_Network/PPI_L3N_Network-main/src/notebook/linkPred_out_reduced/{}_{}_topScore.json".format(method, ds_name), "w") as f:
            f.write(json.dumps(fullScore))

commonNeighbor_biosnap_contam15Percent
L3Normalizing_biosnap_contam15Percent
CRA_biosnap_contam15Percent
CH2_L3_biosnap_contam15Percent
Sim_biosnap_contam15Percent
random_biosnap_contam15Percent
L3E1_f1_biosnap_contam15Percent
L3E1_f2_biosnap_contam15Percent
commonNeighbor_biosnap_contam25Percent
L3Normalizing_biosnap_contam25Percent
CRA_biosnap_contam25Percent
CH2_L3_biosnap_contam25Percent
Sim_biosnap_contam25Percent
random_biosnap_contam25Percent
L3E1_f1_biosnap_contam25Percent
L3E1_f2_biosnap_contam25Percent
commonNeighbor_ddinter_A_contam15Percent
L3Normalizing_ddinter_A_contam15Percent
CRA_ddinter_A_contam15Percent
CH2_L3_ddinter_A_contam15Percent
Sim_ddinter_A_contam15Percent
random_ddinter_A_contam15Percent
L3E1_f1_ddinter_A_contam15Percent
L3E1_f2_ddinter_A_contam15Percent
commonNeighbor_ddinter_A_contam25Percent
L3Normalizing_ddinter_A_contam25Percent
CRA_ddinter_A_contam25Percent
CH2_L3_ddinter_A_contam25Percent
Sim_ddinter_A_contam25Percent
random_ddinter_A_contam25Percent


In [3]:
allPaths = ["/content/drive/MyDrive/DDI_Network/PPI_L3N_Network-main/src/notebook"
]
coreNo, trialNum = 24, 10
def verify(method, ds, randSz):
    # check HPC or not
    isHPC = None
    for path in allPaths:
        # check if file exists in linkPred_out_reduced
        if os.path.exists("./linkPred_out_reduced/{}_{}_randSz{}_topPPI.json".format(
            method, ds, randSz)): return 0, None, None
        if os.path.exists("{}/linkPred_out/{}_{}_sample_9_randSz{}Percent_c0_PPI.json".format(
            path, method, ds, randSz)):
            isHPC = True
            break
        elif os.path.exists("{}/linkPred_out/{}_{}_sample_9_randSz{}Percent_PPI.json".format(
            path, method, ds, randSz)):
            isHPC = False
            break
    if isHPC is None: return 2, None, None
    # iterate the abs path to all related files
    filenames = []
    if isHPC:
        for trial in range(trialNum):
            for core in range(coreNo):
                for path in allPaths:
                    filename = "{}/linkPred_out/{}_{}_sample_{}_randSz{}Percent_c{}_PPI.json".format(
                        path, method, ds, trial, randSz, core)
                    if os.path.exists(filename): filenames.append(filename.split("_PPI.json")[0])
    else:
        for trial in range(trialNum):
            for path in allPaths:
                filename = "{}/linkPred_out/{}_{}_sample_{}_randSz{}Percent_PPI.json".format(
                    path, method, ds, trial, randSz)
                if os.path.exists(filename): filenames.append(filename.split("_PPI.json")[0])
    # return available, list of files, also isHPC
    return 1, filenames, isHPC

In [7]:
# trim data that isn't trimmed yet
#methods = ["commonNeighbor", "L3Normalizing", "CRA", "CH2_L3", "Sim", 'random', "L3E1_f1", "L3E1_f2"]
methods = ["CH2_L3"]

dss = ['biosnap', 'ddinter_A', 'ddinter_R']
coreNo, trialNum = 24, 10

for randSz in range(60, 100, 10):
    for ds in dss:
        samplePPIs = []
        with open("/content/drive/MyDrive/DDI_Dataset/Process_data/{}_sampledPPIs_{}Percent.json".format(ds, randSz), "r") as f:
            samplePPIs = json.loads(f.read())
        sampleSize = len(samplePPIs[0])

        for method in methods:
            available, filenames, isHPC = verify(method, ds, randSz)
            print(randSz, ds, method, isHPC, available)
            if available != 1: continue

            if isHPC:

                for trial in range(trialNum):
                    topPPIs, topScores = [], []
                    for core in range(coreNo):
                        with open(filenames[trial*coreNo+core]+"_PPI.json", "r") as f: topPPIs += json.loads(f.read())
                        with open(filenames[trial*coreNo+core]+"_score.json", "r") as f: topScores += json.loads(f.read())
                        topPPIs, topScores = hr.sort_key_val(topPPIs, topScores)
                        topPPIs, topScores = topPPIs[:sampleSize], topScores[:sampleSize]
                    with open("/content/drive/MyDrive/DDI_Network/PPI_L3N_Network-main/src/notebook/linkPred_out_combined/{}_{}_sample_{}_randSz{}_topPPI.json".format(method, ds, trial, randSz), "w") as f:
                        f.write(json.dumps(topPPIs))
                    with open("/content/drive/MyDrive/DDI_Network/PPI_L3N_Network-main/src/notebook/linkPred_out_combined/{}_{}_sample_{}_randSz{}_topScore.json".format(method, ds, trial, randSz), "w") as f:
                        f.write(json.dumps(topScores))

                fullPPIs, fullScores = [], []
                for trial in range(10):
                    with open("/content/drive/MyDrive/DDI_Network/PPI_L3N_Network-main/src/notebook/linkPred_out_combined/{}_{}_sample_{}_randSz{}_topPPI.json".format(method, ds, trial, randSz), "r") as f:
                        fullPPIs.append(json.loads(f.read()))
                    with open("/content/drive/MyDrive/DDI_Network/PPI_L3N_Network-main/src/notebook/linkPred_out_combined/{}_{}_sample_{}_randSz{}_topScore.json".format(method, ds, trial, randSz), "r") as f:
                        fullScores.append(json.loads(f.read()))
                with open("/content/drive/MyDrive/DDI_Network/PPI_L3N_Network-main/src/notebook/linkPred_out_reduced/{}_{}_randSz{}_topPPI.json".format(method, ds, randSz), "w") as f:
                    f.write(json.dumps(fullPPIs))
                with open("/content/drive/MyDrive/DDI_Network/PPI_L3N_Network-main/src/notebook/linkPred_out_reduced/{}_{}_randSz{}_topScore.json".format(method, ds, randSz), "w") as f:
                    f.write(json.dumps(fullScores))

            else:

                fullPPIs, fullScores = [], []
                for filename in filenames:
                    with open(filename+"_PPI.json", "r") as f: fullPPIs.append(json.loads(f.read())[0:sampleSize])
                    with open(filename+"_score.json", "r") as f: fullScores.append(json.loads(f.read())[0:sampleSize])
                with open("/content/drive/MyDrive/DDI_Network/PPI_L3N_Network-main/src/notebook/linkPred_out_reduced/{}_{}_randSz{}_topPPI.json".format(method, ds, randSz), "w") as f:
                    f.write(json.dumps(fullPPIs))
                with open("/content/drive/MyDrive/DDI_Network/PPI_L3N_Network-main/src/notebook/linkPred_out_reduced/{}_{}_randSz{}_topScore.json".format(method, ds, randSz), "w") as f:
                    f.write(json.dumps(fullScores))

60 biosnap CH2_L3 False 1
60 ddinter_A CH2_L3 False 1
60 ddinter_R CH2_L3 False 1
70 biosnap CH2_L3 False 1
70 ddinter_A CH2_L3 False 1
70 ddinter_R CH2_L3 False 1
80 biosnap CH2_L3 False 1
80 ddinter_A CH2_L3 False 1
80 ddinter_R CH2_L3 False 1
90 biosnap CH2_L3 False 1
90 ddinter_A CH2_L3 False 1
90 ddinter_R CH2_L3 False 1


In [8]:
# above randSz reduced wrong, reduce even more
methods = ["commonNeighbor", "L3Normalizing", "CRA", "CH2_L3", "Sim", 'random', "L3E1_f1", "L3E1_f2"]
dss = ['biosnap', 'ddinter_A', 'ddinter_R']

for randSz in range(60, 100, 10):
    for ds in dss:
        samplePPIs = []
        with open("/content/drive/MyDrive/DDI_Dataset/Process_data/{}_sampledPPIs_{}Percent.json".format(ds, randSz), "r") as f:
            samplePPIs = json.loads(f.read())
        sampleSize = len(samplePPIs[0])

        for method in methods:
            reducedPPIs, reducedScores = [], []
            with open("/content/drive/MyDrive/DDI_Network/PPI_L3N_Network-main/src/notebook/linkPred_out_reduced/{}_{}_randSz{}_topPPI.json".format(method, ds, randSz), "r") as f:
                reducedPPIs = json.loads(f.read())
            with open("/content/drive/MyDrive/DDI_Network/PPI_L3N_Network-main/src/notebook/linkPred_out_reduced/{}_{}_randSz{}_topScore.json".format(method, ds, randSz), "r") as f:
                reducedScores = json.loads(f.read())

            for i in range(len(reducedPPIs)):
                reducedPPIs[i] = reducedPPIs[i][:int(sampleSize/(randSz/100)*np.around(1-randSz/100, 2))]
                reducedScores[i] = reducedScores[i][:int(sampleSize/(randSz/100)*np.around(1-randSz/100, 2))]

            with open("/content/drive/MyDrive/DDI_Network/PPI_L3N_Network-main/src/notebook/linkPred_out_reduced/{}_{}_randSz{}_topPPI.json".format(method, ds, randSz), "w") as f:
                f.write(json.dumps(reducedPPIs))
            with open("/content/drive/MyDrive/DDI_Network/PPI_L3N_Network-main/src/notebook/linkPred_out_reduced/{}_{}_randSz{}_topScore.json".format(method, ds, randSz), "w") as f:
                f.write(json.dumps(reducedScores))

# Generate GOSemSim

run **GOSemSim_compute.R** of the same directory, it scans ./linkPred_out and output GOSemSim in the same format of **xxx_topScore.json**

# Generate precision recall

In [4]:
# yeast dataset
# sample non-PPIs of real-PPIs size

import_funcs = [
    '/content/drive/MyDrive/DDI_Dataset/BioSNAP/processed_drug_pairs_biosnap.csv',
    '/content/drive/MyDrive/DDI_Dataset/DDInter/processed_drug_pairs_DDInter_A.csv',
    '/content/drive/MyDrive/DDI_Dataset/DDInter/processed_drug_pairs_DDInter.csv'
]
bio_snap = pd.read_csv(import_funcs[0])
ddinter_A = pd.read_csv(import_funcs[1])

ddinter_R = pd.read_csv(import_funcs[2])
names = ['biosnap', 'ddinter_A', 'ddinter_R']
for n in range(len(names)):
    df = pd.read_csv(import_funcs[n])
    ppi = [list(arr) for arr in np.asarray(df[['nodeA', 'nodeB']])]

In [None]:
methods = ["commonNeighbor", "L3Normalizing", "CRA", "CH2_L3", "Sim", 'L3E1_f1', 'L3E1_f2', 'random']


In [None]:
# for each dataset & predictor, get precision recall and save in one file for each trial
methods = ["commonNeighbor", "L3Normalizing", "CRA", "CH2_L3", "Sim", "L3E1_f1", "L3E1_f2", "random"]#, "L3E_f1Alt", "L3E_f2Alt",这个没有做
ds_names = ['biosnap', 'ddinter_A', 'ddinter_R']

bio_snap = pd.read_csv(import_funcs[0])
ddinter_A = pd.read_csv(import_funcs[1])

ddinter_R = pd.read_csv(import_funcs[2])

completePPIs_map = [
    [list(ppi) for ppi in np.asarray(bio_snap[['nodeA', 'nodeB']])]
    , [list(ppi) for ppi in np.asarray(ddinter_A[['nodeA', 'nodeB']])]
    , [list(ppi) for ppi in np.asarray(ddinter_R[['nodeA', 'nodeB']])]
]
completePPIs = dict(zip(ds_names, completePPIs_map))


for ds_name in ds_names:
    samplePPIs = []
    with open("/content/drive/MyDrive/DDI_Dataset/Process_data/{}_sampledPPIs.json".format(ds_name), "r") as f:
        samplePPIs = json.loads(f.read())
    i = 0
    for method in methods:
        fullPPIs = []
        i+=1
        print(i)
        with open("/content/drive/MyDrive/DDI_Network/PPI_L3N_Network-main/src/notebook/linkPred_out_reduced/{}_{}_topPPI.json".format(method, ds_name), "r") as f:
            fullPPIs = json.loads(f.read())

        # len(fullPPIs) = len(samplePPIs) = 10, because 10 trials
        precRecMap = pred.precRecMap_multiCore(
            ["{}_{}_topPPI_{}".format(method, ds_name, i) for i in range(len(fullPPIs))]
          , fullPPIs, samplePPIs, [completePPIs[ds_name] for i in range(len(fullPPIs))]
          , coreNo=10)

        for key in precRecMap:
            with open("/content/drive/MyDrive/DDI_Network/PPI_L3N_Network-main/src/notebook/precision_recall_out/{}.json".format(key), 'w') as f:
                f.write(json.dumps(precRecMap[key]))

In [None]:
# for each dataset & predictor, get precision recall and save in one file for each trial
methods = ["commonNeighbor", "L3Normalizing", "CRA", "CH2_L3", "Sim", "L3E1_f1", "L3E1_f2", "random"]
ds_names = ['biosnap', 'ddinter_A', 'ddinter_R']

completePPIs_map = [
    [list(ppi) for ppi in np.asarray(bio_snap[['nodeA', 'nodeB']])]
    , [list(ppi) for ppi in np.asarray(ddinter_A[['nodeA', 'nodeB']])]
    , [list(ppi) for ppi in np.asarray(ddinter_R[['nodeA', 'nodeB']])]
]
completePPIs = dict(zip(ds_names, completePPIs_map))


for ds_name in ds_names:
    for salt in [15, 25]:
        samplePPIs = []
        with open("/content/drive/MyDrive/DDI_Dataset/Process_data/{}_contam{}Percent_sampledPPIs.json".format(ds_name, salt), "r") as f:
            samplePPIs = json.loads(f.read())

        for method in methods:
            fullPPIs = []
            with open("/content/drive/MyDrive/DDI_Network/PPI_L3N_Network-main/src/notebook/linkPred_out_reduced/{}_{}_contam{}Percent_topPPI.json".format(
                method, ds_name, salt), "r") as f:
                fullPPIs = json.loads(f.read())

            # len(fullPPIs) = len(samplePPIs) = 10, because 10 trials
            precRecMap = pred.precRecMap_multiCore(
                ["{}_{}_contam{}Percent_topPPI_{}".format(method, ds_name, salt, i) for i in range(len(fullPPIs))]
              , fullPPIs, samplePPIs, [completePPIs[ds_name] for i in range(len(fullPPIs))]
              , coreNo=10)
            i = 0
            for key in precRecMap:
                i+=1
                with open("/content/drive/MyDrive/DDI_Network/PPI_L3N_Network-main/src/notebook/precision_recall_out/{}.json".format(key), 'w') as f:
                    f.write(json.dumps(precRecMap[key]))

In [5]:
# for each dataset & predictor, get precision recall and save in one file for each trial
methods = ["commonNeighbor", "L3Normalizing", "CRA", "CH2_L3", "Sim", "L3E1_f1", "L3E1_f2", "random"]
ds_names = ['biosnap', 'ddinter_A', 'ddinter_R']

completePPIs_map = [
    [list(ppi) for ppi in np.asarray(bio_snap[['nodeA', 'nodeB']])]
    , [list(ppi) for ppi in np.asarray(ddinter_A[['nodeA', 'nodeB']])]
    , [list(ppi) for ppi in np.asarray(ddinter_R[['nodeA', 'nodeB']])]
]
completePPIs = dict(zip(ds_names, completePPIs_map))


for ds_name in ds_names:
    for salt in [5,10,15,20,25]:
        samplePPIs = []
        with open("/content/drive/MyDrive/DDI_Dataset/Process_data/{}_contam{}Percent_sampledPPIs.json".format(ds_name, salt), "r") as f:
            samplePPIs = json.loads(f.read())

        for method in methods:
            fullPPIs = []
            with open("/content/drive/MyDrive/DDI_Network/PPI_L3N_Network-main/src/notebook/linkPred_out_reduced/{}_{}_contam{}Percent_topPPI.json".format(
                method, ds_name, salt), "r") as f:
                fullPPIs = json.loads(f.read())

            # len(fullPPIs) = len(samplePPIs) = 10, because 10 trials
            precRecMap = pred.precRecMap_multiCore(
                ["{}_{}_contam{}Percent_topPPI_{}".format(method, ds_name, salt, i) for i in range(len(fullPPIs))]
              , fullPPIs, samplePPIs, [completePPIs[ds_name] for i in range(len(fullPPIs))]
              , coreNo=10)
            i = 0
            for key in precRecMap:
                i+=1
                with open("/content/drive/MyDrive/DDI_Network/PPI_L3N_Network-main/src/notebook/precision_recall_out/{}.json".format(key), 'w') as f:
                    f.write(json.dumps(precRecMap[key]))

FileNotFoundError: [Errno 2] No such file or directory: '/content/drive/MyDrive/DDI_Network/PPI_L3N_Network-main/src/notebook/linkPred_out_reduced/commonNeighbor_biosnap_contam5Percent_topPPI.json'

In [None]:
# for each dataset & predictor, get precision recall and save in one file for each trial
methods = ["commonNeighbor", "L3Normalizing", "CRA", "CH2_L3", "Sim", "L3E1_f1", "L3E1_f2", "random"]
ds_names = ['biosnap_5FoldCV', 'ddinter_A_5FoldCV', 'ddinter_R_5FoldCV']


completePPIs_map = [
    [list(ppi) for ppi in np.asarray(bio_snap[['nodeA', 'nodeB']])]
    , [list(ppi) for ppi in np.asarray(ddinter_A[['nodeA', 'nodeB']])]
    , [list(ppi) for ppi in np.asarray(ddinter_R[['nodeA', 'nodeB']])]
]
completePPIs = dict(zip(ds_names, completePPIs_map))


for ds_name in ds_names:
    samplePPIs = []
    with open("/content/drive/MyDrive/DDI_Dataset/Process_data/{}_sampledPPIs.json".format(ds_name), "r") as f:
        samplePPIs = json.loads(f.read())

    for method in methods:
        fullPPIs = []
        with open("/content/drive/MyDrive/DDI_Network/PPI_L3N_Network-main/src/notebook/linkPred_out_reduced/{}_{}_topPPI.json".format(
            method, ds_name), "r") as f:
            fullPPIs = json.loads(f.read())

        # len(fullPPIs) = len(samplePPIs) = 10, because 10 trials
        print(len(fullPPIs))
        precRecMap = pred.precRecMap_multiCore(
            ["{}_{}_topPPI_{}".format(method, ds_name, i) for i in range(len(fullPPIs))]
          , fullPPIs, samplePPIs, [completePPIs[ds_name] for i in range(len(fullPPIs))]
          , coreNo=10)
        i = 0
        for key in precRecMap:
            i+=1
            with open("/content/drive/MyDrive/DDI_Network/PPI_L3N_Network-main/src/notebook/precision_recall_out/{}.json".format(key), 'w') as f:
                f.write(json.dumps(precRecMap[key]))

5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5


In [12]:
# for each dataset & predictor, get precision recall and save in one file for each trial
#methods = ["commonNeighbor", "L3Normalizing", "CRA", "CH2_L3", "Sim", "random", "L3E1_f1", "L3E1_f2"]

import_funcs = [
    '/content/drive/MyDrive/DDI_Dataset/BioSNAP/processed_drug_pairs_biosnap.csv',
    '/content/drive/MyDrive/DDI_Dataset/DDInter/processed_drug_pairs_DDInter_A.csv',
    '/content/drive/MyDrive/DDI_Dataset/DDInter/processed_drug_pairs_DDInter.csv'
]
# for each dataset & predictor, get precision recall and save in one file for each trial
methods = ["commonNeighbor", "L3Normalizing", "CRA", "CH2_L3", "Sim", "L3E1_f1", "L3E1_f2", "random"]#, "L3E_f1Alt", "L3E_f2Alt",这个没有做
ds_names = ['biosnap', 'ddinter_A', 'ddinter_R']

bio_snap = pd.read_csv(import_funcs[0])
ddinter_A = pd.read_csv(import_funcs[1])

ddinter_R = pd.read_csv(import_funcs[2])

ds_names = ['biosnap', 'ddinter_A', 'ddinter_R']

completePPIs_map = [
    [list(ppi) for ppi in np.asarray(bio_snap[['nodeA', 'nodeB']])]
    , [list(ppi) for ppi in np.asarray(ddinter_A[['nodeA', 'nodeB']])]
    , [list(ppi) for ppi in np.asarray(ddinter_R[['nodeA', 'nodeB']])]
]
completePPIs = dict(zip(ds_names, completePPIs_map))

for randSz in range(60, 100, 10):
    for ds_name in ds_names:
        samplePPIs = []
        with open("/content/drive/MyDrive/DDI_Dataset/Process_data//{}_sampledPPIs_{}Percent.json".format(ds_name, randSz), "r") as f:
            samplePPIs = json.loads(f.read())

        for method in methods:
            fullPPIs = []
            with open("/content/drive/MyDrive/DDI_Network/PPI_L3N_Network-main/src/notebook/linkPred_out_reduced/{}_{}_randSz{}_topPPI.json".format(method, ds_name, randSz), "r") as f:
                fullPPIs = json.loads(f.read())

            # len(fullPPIs) = len(samplePPIs) = 10, because 10 trials
            precRecMap = pred.precRecMap_multiCore(
                ["{}_{}_randSz{}_topPPI_{}".format(method, ds_name, randSz, i) for i in range(len(fullPPIs))]
              , fullPPIs, samplePPIs, [completePPIs[ds_name] for i in range(len(fullPPIs))]
              , coreNo=10)

            for key in precRecMap:
                with open("/content/drive/MyDrive/DDI_Network/PPI_L3N_Network-main/src/notebook/precision_recall_out/{}.json".format(key), 'w') as f:
                    f.write(json.dumps(precRecMap[key]))