In [1]:
%%capture
!pip install wikidataintegrator

In [31]:
from rdflib import Graph, URIRef
from wikidataintegrator import wdi_core, wdi_login
from datetime import datetime
import copy
import pandas as pd
import getpass

In [32]:
print("username:")
username = input()
print("password:")
password = getpass.getpass()
login = wdi_login.WDLogin(user=username, pwd=password)

username:
andrawaag
password:
········


In [33]:
# functions
def createOBOReference(doid):
    statedin = wdi_core.WDItemID(obowditem, prop_nr="P248", is_reference=True)
    retrieved = datetime.now()
    timeStringNow = retrieved.strftime("+%Y-%m-%dT00:00:00Z")
    refRetrieved = wdi_core.WDTime(timeStringNow, prop_nr="P813", is_reference=True)
    id = wdi_core.WDExternalID(oboid, prop_nr=oboidwdprop, is_reference=True)
    return [statedin, refRetrieved, id]

In [34]:
query = """

SELECT * WHERE {
  ?ontology rdfs:label ?ontologyLabel ;
            wdt:P361 wd:Q4117183 ;
            wdt:P1687 ?wdprop .
  OPTIONAL {?ontology            wdt:P1813 ?shortname .}
  
  OPTIONAL {?wdprop wdt:P1630 ?formatterURL .}
  FILTER (lang(?ontologyLabel) = "en")
}
"""
wdmappings = wdi_core.WDFunctionsEngine.execute_sparql_query(query, as_dataframe=True)

In [35]:
oboid = "SO:0000110"
obouri = "http://purl.obolibrary.org/obo/"+oboid.replace(":", "_")
oboontology = oboid.split(":")[0]

## Fetch the OBO ontology
obog = Graph()
obog.parse(f"http://www.ontobee.org/ontology/rdf/{oboontology}?iri="+obouri, format="xml")

<Graph identifier=N653af537c3734d64a6c65f7c06a2f027 (<class 'rdflib.graph.Graph'>)>

In [36]:
oboqid = wdmappings[wdmappings["shortname"]==oboid.split(":")[0]]["ontology"].iloc[0].replace("http://www.wikidata.org/entity/", "")
wdmappings

Unnamed: 0,ontology,wdprop,ontologyLabel,shortname,formatterURL
0,http://www.wikidata.org/entity/Q55118646,http://www.wikidata.org/entity/P1928,Vaccine Ontology,VO,http://purl.obolibrary.org/obo/$1
1,http://www.wikidata.org/entity/Q42404539,http://www.wikidata.org/entity/P4537,Spider Ontology,SPD,http://purl.obolibrary.org/obo/SPD_$1
2,http://www.wikidata.org/entity/Q81661549,http://www.wikidata.org/entity/P5501,BRENDA tissue / enzyme source,BTO,https://www.brenda-enzymes.org/ontology.php?on...
3,http://www.wikidata.org/entity/Q55118395,http://www.wikidata.org/entity/P6767,Food Ontology,FOODON,http://purl.obolibrary.org/obo/FOODON_$1
4,http://www.wikidata.org/entity/Q81661634,http://www.wikidata.org/entity/P6778,Gazetteer,GAZ,http://purl.obolibrary.org/obo/GAZ_$1
5,http://www.wikidata.org/entity/Q55118285,http://www.wikidata.org/entity/P7963,Cell Ontology,CL,http://purl.obolibrary.org/obo/$1
6,http://www.wikidata.org/entity/Q81661810,http://www.wikidata.org/entity/P8656,Symptom Ontology,SYMP,https://www.ebi.ac.uk/ols/ontologies/symp/term...
7,http://www.wikidata.org/entity/Q104030182,http://www.wikidata.org/entity/P9334,Cephalopod Ontology,CEPH,http://purl.obolibrary.org/obo/CEPH_$1
8,http://www.wikidata.org/entity/Q81661648,http://www.wikidata.org/entity/P9356,Hymenoptera Anatomy Ontology,HAO,http://purl.obolibrary.org/obo/HAO_$1
9,http://www.wikidata.org/entity/Q97063846,http://www.wikidata.org/entity/P9827,"Gender, Sex, and Sexual Orientation Ontology",GSSO,http://purl.obolibrary.org/obo/GSSO_$1


In [26]:
# wikidata
obowditem = wdmappings[wdmappings["shortname"]==oboid.split(":")[0]]["ontology"].iloc[0].replace("http://www.wikidata.org/entity/", "")
oboidwdprop =wdmappings[wdmappings["shortname"]==oboid.split(":")[0]]["wdprop"].iloc[0].replace("http://www.wikidata.org/entity/", "") #gene ontology id


## Fetch Wikidata part of the OBO ontology
query = f"""
SELECT * WHERE {{?item wdt:{oboidwdprop} '{oboid}'}}
"""
qid = wdi_core.WDFunctionsEngine.execute_sparql_query(query, as_dataframe=True)
if len(qid) >0:
    qid = qid.iloc[0]["item"].replace("http://www.wikidata.org/entity/", "")
else:
    qid = None

# Bot
## ShEx precheck

if qid:
    item = wdi_core.WDItemEngine(wd_item_id=qid) 
    # precheck = item.check_entity_schema(eid="E323", output="result")
    #if not precheck["result"]:
    #    print(qid + " needs fixing to conform to E323")
    #    quit()
print("continue")

obo_reference = createOBOReference(oboid)

# Statements build up
## OBO ontology generic
statements = []
# OBO ID 
statements.append(wdi_core.WDString(value=oboid, prop_nr=oboidwdprop, references=[copy.deepcopy(obo_reference)]))
# exact match (P2888)
statements.append(wdi_core.WDUrl(value=obouri, prop_nr="P2888", references=[copy.deepcopy(obo_reference)]))

## OBO resource specific 
### Gene Ontology
gotypes =  {"biological_process": "Q2996394", 
 "molecular_function": "Q14860489", 
 "cellular_component": "Q5058355",
}

for gotype in obog.objects(predicate=URIRef("http://www.geneontology.org/formats/oboInOwl#hasOBONamespace")):
    statements.append(wdi_core.WDItemID(gotypes[str(gotype)], prop_nr="P31", references=[copy.deepcopy(obo_reference)]))

#external identifiers based on skos:exactMatch
for extID in obog.objects(predicate=URIRef("http://www.w3.org/2004/02/skos/core#exactMatch")):
    # if "MESH:" in extID:
    #    statements.append(wdi_core.WDExternalID(row["exactMatch"].replace("MESH:", ""), prop_nr="P486", references=[copy.deepcopy(do_reference)]))
    if "NCI:" in extID:
        statements.append(wdi_core.WDExternalID(row["exactMatch"], prop_nr="P1748", references=[copy.deepcopy(do_reference)]))
    if "ICD10CM:" in extID:
        statements.append(wdi_core.WDExternalID(row["exactMatch"], prop_nr="P4229", references=[copy.deepcopy(do_reference)]))
    if "UMLS_CUI:" in extID:
        statements.append(wdi_core.WDExternalID(row["exactMatch"], prop_nr="P2892", references=[copy.deepcopy(do_reference)]))
item = wdi_core.WDItemEngine(data=statements, keep_good_ref_statements=True)
print(item.write(login))


continue


KeyError: 'sequence'

In [27]:
bloeb = Graph()
uri = bloeb.parse("http://www.ontobee.org/ontology/rdf/SO?iri=http://purl.obolibrary.org/obo/SO_0001565", format="xml")

In [29]:
print(bloeb.serialize(format="turtle"))

@prefix ns3: <http://purl.obolibrary.org/obo/> .
@prefix oboInOwl: <http://www.geneontology.org/formats/oboInOwl#> .
@prefix owl: <http://www.w3.org/2002/07/owl#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

ns3:IAO_0000115 a owl:AnnotationProperty .

ns3:SO_0001565 a owl:Class ;
    rdfs:label "gene_fusion"^^xsd:string ;
    ns3:IAO_0000115 "A sequence variant whereby a two genes have become joined."^^xsd:string ;
    oboInOwl:created_by "kareneilbeck"^^xsd:string ;
    oboInOwl:creation_date "2010-03-22T02:28:28Z"^^xsd:string ;
    oboInOwl:hasExactSynonym "gene fusion"^^xsd:string ;
    oboInOwl:hasOBONamespace "sequence"^^xsd:string ;
    oboInOwl:id "SO:0001565"^^xsd:string ;
    rdfs:subClassOf ns3:SO_0001564,
        ns3:SO_0001882 .

oboInOwl:created_by a owl:AnnotationProperty .

oboInOwl:creation_date a owl:AnnotationProperty .

oboInOwl:hasExactSynonym a owl:AnnotationProperty .

oboInOwl:hasOBONamespace a owl: