### Sort AMIE rules

In [1]:
def sort_and_store_amie_rules(in_path, out_path):
    with open(in_path) as file:
        amie_rules = file.readlines()
# Copying AMIE details (head lines)
    heading = []
    i = 0
    while True:
        line = amie_rules[i]
        i += 1
        if line.startswith("?"):
            break
        heading.append(line)
# Filter and sort rules w.r.t. Partial Compleness Assumption (PCA) confidence
    amie_rules = list(filter(lambda x: x.startswith("?"), amie_rules))
    amie_rules = sorted(amie_rules, key=lambda x: x.split("\t")[3], reverse=True)
    print("\nFound {} rules".format(len(amie_rules)))
# Store sorted rules
    with open(out_path, "w") as file:
        file.writelines(heading+amie_rules)

In [10]:
sort_and_store_amie_rules("./amie/out_amie_v3_sameAs_v1.txt", "./amie/amie_sorted_rules_v3.txt")


Found 4329 rules


In [3]:
sort_and_store_amie_rules("./amie/out_amie_v2.txt", "./amie/amie_sorted_rules_v2.txt")


Found 4361 rules


### Sort AnyBURL rules

In [65]:
def sort_and_store_anyBURL_rules(in_path, out_path):
    with open(in_path) as file:
        anyBURL_rules = file.readlines()
# Filter and sort rules w.r.t. confidence
#    anyBURL_rules = list(filter(lambda x: x.split("<=")[-1].strip() != "", anyBURL_rules))
    anyBURL_rules = sorted(anyBURL_rules, key=lambda x: x.split("\t")[2], reverse=True)
    print("\nFound {} rules".format(len(anyBURL_rules)))
# Store sorted rules
    with open(out_path, "w") as file:
        file.writelines(anyBURL_rules)

In [None]:
sort_and_store_anyBURL_rules("./anyBURL/rules/alpha-100", "./anyBURL/rules/sorted_rules_v1")

### SAFRAN

Steps: 1) run ./SAFRAN calcjacc config-file, 2) run ./SAFRAN learnnrnoisy config-file, and 3) ./SAFRAN applynrnoisy config-file

### Link prediction results before KG enrichment using SAFRAN

WN18RR
`MRR: 0.478
Hits@1: 0.442
Hits@3: 0.494
Hits@10: 0.560`

DRKG
`MRR: 0.429
Hits@1: 0.385
Hits@3: 0.453
Hits@10: 0.533`

MUTAGENESIS
`MRR: 0.513
Hits@1: 0.474
Hits@3: 0.530
Hits@10: 0.612`

CARCINOGENESIS
`MRR: 0.503
Hits@1: 0.467
Hits@3: 0.516
Hits@10: 0.598`

OPENBIOLINK
`MRR: 0.246
Hits@1: 0.170
Hits@3: 0.283
Hits@10: 0.440`

FB15k
` MRR: 0.309
Hits@1: 0.236
Hits@3: 0.348
Hits@10: 0.486
`

YAGO3:
`MRR: 0.549
Hits@1: 0.487
Hits@3: 0.594
Hits@10: 0.668`

### After KG enrichment

### Compare mined rules (AMIE+)

In [2]:
### import numpy as np
with open("./amie/out_amie_v1.txt") as file:
    rules_v1 = file.readlines()
    rules_v1 = list(filter(lambda x: x.startswith("?"), rules_v1))
with open("./amie/out_amie_v3_sameAs_v1.txt") as file:
    rules_v2 = file.readlines()
    rules_v2 = list(filter(lambda x: x.startswith("?"), rules_v2))


In [3]:
rules_v1[0]

'?b  INTACT::DEPHOSPHORYLATION REACTION::Gene:Gene  ?a   => ?a  INTACT::DEPHOSPHORYLATION REACTION::Gene:Gene  ?b\t0.227722772\t0.227722772\t0.250909091\t69\t303\t275\t?a\n'

In [4]:
import numpy as np
rule_expression1 = set(list(map(lambda x: x.split("\t")[0], rules_v1)))
rule_expression2 = set(list(map(lambda x: x.split("\t")[0], rules_v2)))
index = len(rule_expression1.intersection(rule_expression2))/len(rule_expression1.union(rule_expression2))
print("Jaccard index of mined rules v1 and v2: ", index)

# Confidence of mined rules
avg_conf1 = np.mean([float(rule.split("\t")[3]) for rule in rules_v1])
avg_conf2 = np.mean([float(rule.split("\t")[3]) for rule in rules_v2])

print("\nAvg confidence v1: {}, v2: {}".format(avg_conf1, avg_conf2))

Jaccard index of mined rules v1 and v2:  0.9876004592422503

Avg confidence v1: 0.3118219945172175, v2: 0.3118627327964888


### Compare mined rules (AnyBURL)

In [70]:
def return_confidence(rule):
    return float(rule.split("\t")[2])

In [81]:
threshold = 1.
anyBURL_rules_filt = list(filter(lambda x: return_confidence(x)>=threshold, anyBURL_rules_v1))

anyBURL_rules_filt[-1]

'5\t5\t1.0\tSTRING::OTHER::Gene:Gene(Gene::83998,Y) <= STRING::OTHER::Gene:Gene(Y,Gene::83998)\n'

In [87]:
### import numpy as np
with open("./anyBURL/rules/sorted_rules_v1") as file:
    anyBURL_rules_v1 = file.readlines()
with open("./anyBURL/rules/sorted_rules_v2") as file:
    anyBURL_rules_v2 = file.readlines()

# Jaccard index
#anyBURL_rules_v1 = list(filter(lambda x: return_confidence(x)>=threshold, anyBURL_rules_v1))
#anyBURL_rules_v2 = list(filter(lambda x: return_confidence(x)>=threshold, anyBURL_rules_v2))

rule_expression1 = set(list(map(lambda x: x.split("\t")[3], anyBURL_rules_v1)))
rule_expression2 = set(list(map(lambda x: x.split("\t")[3], anyBURL_rules_v2)))
index = len(rule_expression1.intersection(rule_expression2))/len(rule_expression1.union(rule_expression2))
print("Jaccard index of mined rules v1 and v2: ", index)

# Confidence of mined rules
avg_conf1 = np.mean([float(rule.split("\t")[2]) for rule in anyBURL_rules_v1])
avg_conf2 = np.mean([float(rule.split("\t")[2]) for rule in anyBURL_rules_v2])

print("\nAvg confidence v1: {}, v2: {}".format(avg_conf1, avg_conf2))

Jaccard index of mined rules v1 and v2:  0.2712462672137839

Avg confidence v1: 0.5285819316297514, v2: 0.534223820369677


In [83]:
anyBURL_rules_v1

['50\t50\t1.0\tSTRING::BINDING::Gene:Gene(Gene::51077,Y) <= STRING::BINDING::Gene:Gene(Y,Gene::51077)\n',
 '211\t211\t1.0\tSTRING::CATALYSIS::Gene:Gene(X,Gene::1233) <= STRING::REACTION::Gene:Gene(X,Gene::1233)\n',
 '211\t211\t1.0\tSTRING::CATALYSIS::Gene:Gene(X,Gene::1233) <= STRING::REACTION::Gene:Gene(Gene::1233,X)\n',
 '13\t13\t1.0\tSTRING::OTHER::Gene:Gene(X,Gene::5140) <= STRING::OTHER::Gene:Gene(Gene::5140,X)\n',
 '105\t105\t1.0\tSTRING::BINDING::Gene:Gene(Gene::23063,Y) <= STRING::BINDING::Gene:Gene(Y,Gene::23063)\n',
 '6\t6\t1.0\tGNBR::E+::Gene:Gene(Gene::6287,Y) <= GNBR::E+::Gene:Gene(Y,Gene::6287)\n',
 '17\t17\t1.0\tSTRING::OTHER::Gene:Gene(Gene::788,Y) <= STRING::OTHER::Gene:Gene(Y,Gene::788)\n',
 '3\t3\t1.0\tGNBR::E::Compound:Gene(Compound::DB11257,Y) <= GNBR::E+::Compound:Gene(Compound::DB11257,Y)\n',
 '112\t112\t1.0\tSTRING::BINDING::Gene:Gene(X,Gene::2072) <= STRING::BINDING::Gene:Gene(Gene::2072,X)\n',
 '26\t26\t1.0\tSTRING::REACTION::Gene:Gene(X,Gene::9984) <= STRING:

In [84]:
anyBURL_rules_v2[:20]

['50\t50\t1.0\tSTRING::BINDING::Gene:Gene(Gene::51077,Y) <= STRING::BINDING::Gene:Gene(Y,Gene::51077)\n',
 '3\t3\t1.0\tGNBR::Rg::Gene:Gene(Gene::23191,Y) <= GNBR::Rg::Gene:Gene(Y,Gene::23191)\n',
 '2\t2\t1.0\tbioarx::HumGenHumGen:Gene:Gene(Gene::27284,Y) <= INTACT::PHYSICAL ASSOCIATION::Gene:Gene(Gene::27284,Y)\n',
 '211\t211\t1.0\tSTRING::CATALYSIS::Gene:Gene(X,Gene::1233) <= STRING::REACTION::Gene:Gene(X,Gene::1233)\n',
 '211\t211\t1.0\tSTRING::CATALYSIS::Gene:Gene(X,Gene::1233) <= STRING::REACTION::Gene:Gene(Gene::1233,X)\n',
 '3\t3\t1.0\tSTRING::CATALYSIS::Gene:Gene(Gene::23774,Y) <= STRING::INHIBITION::Gene:Gene(Y,Gene::23774)\n',
 '13\t13\t1.0\tSTRING::OTHER::Gene:Gene(X,Gene::5140) <= STRING::OTHER::Gene:Gene(Gene::5140,X)\n',
 '105\t105\t1.0\tSTRING::BINDING::Gene:Gene(Gene::23063,Y) <= STRING::BINDING::Gene:Gene(Y,Gene::23063)\n',
 '112\t112\t1.0\tSTRING::BINDING::Gene:Gene(X,Gene::2072) <= STRING::BINDING::Gene:Gene(Gene::2072,X)\n',
 '26\t26\t1.0\tSTRING::REACTION::Gene:Gene

In [86]:
len(set(anyBURL_rules_v1).intersection(set(anyBURL_rules_v2)))

57124

In [88]:
len(anyBURL_rules_v1)

290590

In [89]:
len(anyBURL_rules_v2)

301137

In [2]:
import pandas as pd