In [1]:
# 05_calculate_overlap
#
# created by LuYF-Lemon-love <luyanfeng_nlp@qq.com> on March 5, 2023
# updated by LuYF-Lemon-love <luyanfeng_nlp@qq.com> on March 11, 2023
#
# 该脚本计算了四个个预训练模型 (transE_l1, transE_l2, ComplEx, rotatE) 药物重定位 (Alzheimer's disease) 结果重叠情况.
#
# 需要的包:
#          csv
#
# 需要的文件:
#          ./results/01_transE_l1_top50.csv
#          ./results/02_transE_l2_top50.csv
#          ./results/03_ComplEx_top100.csv
#          ./results/04_rotatE_top50.csv

## TransE_l1 top50 结果

In [2]:
import csv

transE_l1_top50 = []
with open("./results/01_transE_l1_top50.csv", newline='', encoding='utf-8') as csvfile:
    reader = csv.DictReader(csvfile, delimiter=',', fieldnames=['rank', 'drug', 'score'])
    for row_val in reader:
        transE_l1_top50.append(row_val['drug'])

In [3]:
transE_l1_top50

['Compound::DB04216',
 'Compound::DB06774',
 'Compound::DB00783',
 'Compound::DB00295',
 'Compound::DB09341',
 'Compound::DB00143',
 'Compound::DB00363',
 'Compound::DB04540',
 'Compound::DB00907',
 'Compound::DB00917',
 'Compound::DB00661',
 'Compound::DB01026',
 'Compound::DB01183',
 'Compound::DB01320',
 'Compound::DB00252',
 'Compound::DB00624',
 'Compound::DB01223',
 'Compound::DB00477',
 'Compound::DB00563']

In [4]:
len(transE_l1_top50)

19

## TransE_l2 top50 结果¶

In [5]:
import csv

transE_l2_top50 = []
with open("./results/02_transE_l2_top50.csv", newline='', encoding='utf-8') as csvfile:
    reader = csv.DictReader(csvfile, delimiter=',', fieldnames=['rank', 'drug', 'score'])
    for row_val in reader:
        transE_l2_top50.append(row_val['drug'])

In [6]:
transE_l2_top50

['Compound::DB04540',
 'Compound::DB09341',
 'Compound::DB00143',
 'Compound::DB00515',
 'Compound::DB00997',
 'Compound::DB00171',
 'Compound::DB01229',
 'Compound::DB00477',
 'Compound::DB00755',
 'Compound::DB00502',
 'Compound::DB00783',
 'Compound::DB00295',
 'Compound::DB00661',
 'Compound::DB00675',
 'Compound::DB00624',
 'Compound::DB00363',
 'Compound::DB12153',
 'Compound::DB01708',
 'Compound::DB00541',
 'Compound::DB00959',
 'Compound::DB00396',
 'Compound::DB00907',
 'Compound::DB04216',
 'Compound::DB00531']

In [7]:
len(transE_l2_top50)

24

## ComplEx top100 结果

In [8]:
import csv

ComplEx_top100 = []
with open("./results/03_ComplEx_top100.csv", newline='', encoding='utf-8') as csvfile:
    reader = csv.DictReader(csvfile, delimiter=',', fieldnames=['rank', 'drug', 'score'])
    for row_val in reader:
        ComplEx_top100.append(row_val['drug'])

In [9]:
ComplEx_top100

['Compound::DB00715',
 'Compound::DB00661',
 'Compound::DB08860',
 'Compound::DB00959',
 'Compound::DB01118',
 'Compound::DB08868',
 'Compound::DB06287',
 'Compound::DB00175',
 'Compound::DB00563',
 'Compound::DB00759',
 'Compound::DB00381',
 'Compound::DB00860',
 'Compound::DB00678',
 'Compound::DB00829',
 'Compound::DB01016',
 'Compound::DB00457',
 'Compound::DB00602',
 'Compound::DB00876',
 'Compound::DB02315',
 'Compound::DB01149',
 'Compound::DB01307',
 'Compound::DB01054',
 'Compound::DB00950',
 'Compound::DB01115',
 'Compound::DB00363',
 'Compound::DB04630',
 'Compound::DB00530',
 'Compound::DB01224',
 'Compound::DB01076',
 'Compound::DB01004',
 'Compound::DB00983',
 'Compound::DB01407',
 'Compound::DB00343',
 'Compound::DB01023']

In [10]:
len(ComplEx_top100)

34

## RotatE top50 结果

In [11]:
rotatE_top50 = []
rotatE_top50_ranks = []
with open("./results/04_rotatE_top50.csv", newline='', encoding='utf-8') as csvfile:
    reader = csv.DictReader(csvfile, delimiter=',', fieldnames=['rank', 'drug', 'score'])
    for row_val in reader:
        rotatE_top50.append(row_val['drug'])

In [12]:
rotatE_top50

['Compound::DB00143',
 'Compound::DB00502',
 'Compound::DB06774',
 'Compound::DB04216',
 'Compound::DB00783',
 'Compound::DB09341',
 'Compound::DB00822',
 'Compound::DB00640',
 'Compound::DB01105',
 'Compound::DB00715',
 'Compound::DB00907',
 'Compound::DB01229',
 'Compound::DB04540',
 'Compound::DB01016',
 'Compound::DB02010',
 'Compound::DB14681',
 'Compound::DB00321']

In [13]:
len(rotatE_top50)

17

## 4 种模型药物重定位结果重叠情况

In [14]:
# TransE_l1 和 TransE_l2 重接结果
overlap_1_2 = set(transE_l1_top50).intersection(set(transE_l2_top50))

for drug in overlap_1_2:
    print(drug)
print("*" * 42)
print(len(overlap_1_2))

Compound::DB00783
Compound::DB00624
Compound::DB09341
Compound::DB00477
Compound::DB04216
Compound::DB04540
Compound::DB00661
Compound::DB00143
Compound::DB00295
Compound::DB00907
Compound::DB00363
******************************************
11


In [15]:
# TransE_l1 和 ComplEx 重接结果
overlap_1_3 = set(transE_l1_top50).intersection(set(ComplEx_top100))

for drug in overlap_1_3:
    print(drug)
print("*" * 42)
print(len(overlap_1_3))

Compound::DB00363
Compound::DB00661
Compound::DB00563
******************************************
3


In [16]:
# TransE_l1 和 RotatE 重接结果
overlap_1_4 = set(transE_l1_top50).intersection(set(rotatE_top50))

for drug in overlap_1_4:
    print(drug)
print("*" * 42)
print(len(overlap_1_4))

Compound::DB00783
Compound::DB09341
Compound::DB04216
Compound::DB04540
Compound::DB00143
Compound::DB06774
Compound::DB00907
******************************************
7


In [17]:
# TransE_l2 和 ComplEx 重接结果
overlap_2_3 = set(transE_l2_top50).intersection(set(ComplEx_top100))

for drug in overlap_2_3:
    print(drug)
print("*" * 42)
print(len(overlap_2_3))

Compound::DB00959
Compound::DB00363
Compound::DB00661
******************************************
3


In [18]:
# TransE_l2 和 RotatE 重接结果
overlap_2_4 = set(transE_l2_top50).intersection(set(rotatE_top50))

for drug in overlap_2_4:
    print(drug)
print("*" * 42)
print(len(overlap_2_4))

Compound::DB00783
Compound::DB09341
Compound::DB04216
Compound::DB04540
Compound::DB00143
Compound::DB00907
Compound::DB01229
Compound::DB00502
******************************************
8


In [19]:
# ComplEx 和 RotatE 重接结果
overlap_3_4 = set(ComplEx_top100).intersection(set(rotatE_top50))

for drug in overlap_3_4:
    print(drug)
print("*" * 42)
print(len(overlap_3_4))

Compound::DB00715
Compound::DB01016
******************************************
2


In [20]:
# TransE_l1, TransE_l2, ComplEx 重接结果
overlap_1_2_3 = set(transE_l1_top50).intersection(set(transE_l2_top50)).intersection(set(ComplEx_top100))

for drug in overlap_1_2_3:
    print(drug)
print("*" * 42)
print(len(overlap_1_2_3))

Compound::DB00363
Compound::DB00661
******************************************
2


In [21]:
# TransE_l1, TransE_l2, RotatE 重接结果
overlap_1_2_4 = set(transE_l1_top50).intersection(set(transE_l2_top50)).intersection(set(rotatE_top50))
overlap_1_2_4
for drug in overlap_1_2_4:
    print(drug)
print("*" * 42)
print(len(overlap_1_2_4))

Compound::DB00783
Compound::DB09341
Compound::DB04216
Compound::DB04540
Compound::DB00143
Compound::DB00907
******************************************
6


In [22]:
# TransE_l1, ComplEx, RotatE 重接结果
overlap_1_3_4 = set(transE_l1_top50).intersection(set(ComplEx_top100)).intersection(set(rotatE_top50))
overlap_1_3_4

set()

In [23]:
# TransE_l2, ComplEx, RotatE 重接结果
overlap_2_3_4 = set(transE_l2_top50).intersection(set(ComplEx_top100)).intersection(set(rotatE_top50))
overlap_2_3_4

set()

In [24]:
# TransE_l1, TransE_l2, ComplEx, RotatE 重接结果
overlap = set(transE_l1_top50).intersection(set(transE_l2_top50)).intersection(set(ComplEx_top100)).intersection(set(rotatE_top50))
overlap

set()