In [None]:
import csv
import json

In [None]:
edgesPath = "input/merged-kg_edges.tsv"
nodesPath = "input/merged-kg_nodes.tsv"
outputPath = "output/kg-microbe.ttl"
chunkSize = 100000

In [None]:
prefixes = {
    "biolink": "https://w3id.org/biolink/vocab/",
    "rdfs": "http://www.w3.org/2000/01/rdf-schema#",
    "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
    "owl": "http://www.w3.org/2002/07/owl#",
    "dc": "http://purl.org/dc/terms/",
    "obo": "http://purl.obolibrary.org/obo/"
}

In [None]:
outputStream = open(outputPath, "w")

for p, ns in prefixes.items():
    outputStream.write(f"@prefix {p}: <{ns}> .\n")

outputStream.write("\n")

In [None]:
def add_triple(s: str, p: str, o: str):
    outputStream.write(f"{s} {p} {o} .\n")
    # print(f"{s} {p} {o} .")

def add_label(s: str, label: str):
    add_triple(s, "rdfs:label", json.dumps(label))


def add_type(s: str, t: str):
    add_triple(s, "rdf:type", t)


def add_synonym(s: str, syn: str):
    add_triple(s, "biolink:synonym", json.dumps(syn))


def add_same_as(s: str, s2: str):
    add_triple(s, "owl:sameAs", s2)

In [None]:
add_triple("biolink:synonym", "rdfs:label", "\"Synonym\"")

In [None]:
i = 0
f_in = open(nodesPath, newline="")
reader = csv.reader(f_in, delimiter="\t")
rowsIt = iter(reader)
header = {k: v for v, k in enumerate(next(rowsIt))}
for row in rowsIt:
    i += 1
    if i % 50000 == 0: print(f"processed lines: {i}")
    idSet = set(filter(lambda x: len(x) > 0, map(lambda x: str(x).lstrip(prefixes["obo"]), str(row[header["iri"]]).split("|"))))
    if len(idSet) == 0: continue
    s = f"obo:{idSet.pop()}"
    for sameAs in idSet:
        add_same_as(s, f"obo:{sameAs}")
    n = str(row[header["name"]]).strip()
    if len(n) > 0: add_label(s, n)
    for syn in row[header["synonym"]].split("|"):
        syn = syn.strip()
        if len(syn) > 0: add_synonym(s, syn)
    for t in str(row[header['category']]).split("|"):
        t = t.strip()
        if len(t) > 0: add_type(s, t)
print(f"(Done) processed lines: {i}")
f_in.close()

In [None]:
outputStream.close()