In [198]:
%%capture
!pip install wikidataintegrator

In [199]:
from rdflib import Graph, URIRef
from wikidataintegrator import wdi_core, wdi_login
from datetime import datetime
import copy
import getpass

In [200]:
# functions
def createDOReference(doid):
    statedin = wdi_core.WDItemID("Q5282129", prop_nr="P248", is_reference=True)
    retrieved = datetime.now()
    timeStringNow = retrieved.strftime("+%Y-%m-%dT00:00:00Z")
    refRetrieved = wdi_core.WDTime(timeStringNow, prop_nr="P813", is_reference=True)
    doid = wdi_core.WDExternalID(doid, prop_nr="P699", is_reference=True)
    return [statedin, refRetrieved, doid]

def createIORef():
    statedin = wdi_core.WDItemID("Q16335166", prop_nr="P248", is_reference=True)
    referenceURL = wdi_core.WDUrl("https://registry.identifiers.org/registry/doid", prop_nr="P854", is_reference=True)
    return [statedin, referenceURL]

In [201]:
print("username:")
username = input()
print("password:")
password = getpass.getpass()
login = wdi_login.WDLogin(user=username, pwd=password)

username:
andrawaag
password:
········


In [203]:
doid = "DOID:0080972"
douri = "http://purl.obolibrary.org/obo/"+doid.replace(":", "_")
print(douri)
dog = Graph()
dog.parse("http://www.ontobee.org/ontology/rdf/DOID?iri="+douri, format="xml")
query = f"""
SELECT * WHERE {{?item wdt:P699 '{doid}'}}
"""
qid = wdi_core.WDFunctionsEngine.execute_sparql_query(query, as_dataframe=True)
if len(qid) >0:
    qid = qid.iloc[0]["item"].replace("http://www.wikidata.org/entity/", "")
else:
    qid = None

if qid:
    item = wdi_core.WDItemEngine(wd_item_id=qid) 
    precheck = item.check_entity_schema(eid="E323", output="result")
    if not precheck["result"]:
        print(qid + " needs fixing to conform to E323")
        quit()

do_reference = createDOReference(doid)
identorg_reference = createIORef()

statements = []
# Disease Ontology ID (P31)
statements.append(wdi_core.WDString(value=doid, prop_nr="P699", references=[copy.deepcopy(do_reference)]))
# exact match (P2888)
statements.append(wdi_core.WDUrl(value=douri, prop_nr="P2888", references=[copy.deepcopy(do_reference)]))
# identifiers.org URI
statements.append(wdi_core.WDUrl("http://identifiers.org/doid/"+doid, prop_nr="P2888", references=[copy.deepcopy(identorg_reference)]))

#external identifiers based on skos:exactMatch
for extID in dog.objects(predicate=URIRef("http://www.w3.org/2004/02/skos/core#exactMatch")):
    # if "MESH:" in extID:
    #    statements.append(wdi_core.WDExternalID(row["exactMatch"].replace("MESH:", ""), prop_nr="P486", references=[copy.deepcopy(do_reference)]))
    if "NCI:" in extID:
        statements.append(wdi_core.WDExternalID(row["exactMatch"], prop_nr="P1748", references=[copy.deepcopy(do_reference)]))
    if "ICD10CM:" in extID:
        statements.append(wdi_core.WDExternalID(row["exactMatch"], prop_nr="P4229", references=[copy.deepcopy(do_reference)]))
    if "UMLS_CUI:" in extID:
        statements.append(wdi_core.WDExternalID(row["exactMatch"], prop_nr="P2892", references=[copy.deepcopy(do_reference)]))
item = wdi_core.WDItemEngine(data=statements, keep_good_ref_statements=True)

if item.get_label(lang="en") == "":
    item.set_label(str(dog.value(URIRef(douri), URIRef("http://www.w3.org/2000/01/rdf-schema#label"))), lang="en")
    item.get_description("human disease", lang="en")

item.write(login)

http://purl.obolibrary.org/obo/DOID_0080972
2022-02-22 19:13:58.425011: maxlag. sleeping for 107.41666666666667 seconds


KeyboardInterrupt: 