# Loading the Symptom ontology
Source: https://raw.githubusercontent.com/DiseaseOntology/SymptomOntology/main/symp.owl. 

In [1]:
from rdflib import Graph, URIRef
import pandas as pd
import ipywidgets as widgets
from ipywidgets import interact, interactive
from IPython.display import IFrame, clear_output
#import tasks

df = pd.DataFrame(columns=["so_uri", "soid", "label", "subclassof", "aliases"])
# Download
url = "https://raw.githubusercontent.com/DiseaseOntology/SymptomOntology/main/symp.owl"
# Parse owl file into a graph object
symptomGraph = Graph()
symptomGraph.parse(url, format="xml")

qres = symptomGraph.query(
"""
   PREFIX obo: <http://www.geneontology.org/formats/oboInOwl#>
   PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
   PREFIX oboInOwl: <http://www.geneontology.org/formats/oboInOwl#>

   SELECT DISTINCT ?so_uri ?soid ?label (GROUP_CONCAT(?subClassOf;separator="|") as ?subclasses)
                                        (GROUP_CONCAT(?exactsynonym;separator="|") as ?exact_synonyms)
   WHERE {
    ?so_uri obo:id ?soid ;
              rdfs:label ?label .
    FILTER NOT EXISTS {?so_uri owl:deprecated true}
     OPTIONAL {?so_uri rdfs:subClassOf ?subClassOf ;}
     OPTIONAL {?so_uri oboInOwl:hasExactSynonym ?exactsynonym}
   }
   GROUP BY ?so_uri """)

for row in qres:
    df = df.append({
     "so_uri": str(row[0]),
     "soid": str(row[1]),
     "label":  str(row[2]),
     "subclassof": str(row[3]),
     "aliases": str(row[4])
      }, ignore_index=True)
pd.set_option('display.max_rows', 40)
df

Unnamed: 0,so_uri,soid,label,subclassof,aliases
0,http://purl.obolibrary.org/obo/SYMP_0000008,SYMP:0000008,blindness,http://purl.obolibrary.org/obo/SYMP_0000320,
1,http://purl.obolibrary.org/obo/SYMP_0000316,SYMP:0000316,leg cramp,http://purl.obolibrary.org/obo/SYMP_0000093,
2,http://purl.obolibrary.org/obo/SYMP_0000260,SYMP:0000260,dry mouth,http://purl.obolibrary.org/obo/SYMP_0000386,
3,http://purl.obolibrary.org/obo/SYMP_0000739,SYMP:0000739,bronchopulmonary bleeding,http://purl.obolibrary.org/obo/SYMP_0000514,
4,http://purl.obolibrary.org/obo/SYMP_0000742,SYMP:0000742,gum bleeding,http://purl.obolibrary.org/obo/SYMP_0000655,gingival hemorrhage
...,...,...,...,...,...
837,http://purl.obolibrary.org/obo/SYMP_0000107,SYMP:0000107,pleural effusion,http://purl.obolibrary.org/obo/SYMP_0000488,
838,http://purl.obolibrary.org/obo/SYMP_0000385,SYMP:0000385,throat symptom,http://purl.obolibrary.org/obo/SYMP_0000597,
839,http://purl.obolibrary.org/obo/SYMP_0000647,SYMP:0000647,tache noire,http://purl.obolibrary.org/obo/SYMP_0000136,
840,http://purl.obolibrary.org/obo/SYMP_0000535,SYMP:0000535,change in skin texture,http://purl.obolibrary.org/obo/SYMP_0000488,


In [2]:
resolveWikiButton = widgets.Button(description="Search concept")
wikis = widgets.Output()
display(resolveWikiButton, wikis)
@resolveWikiButton.on_click
def resolveWiki(b):
    if "df" in dir(soDownloadButton):  
        with wikis:
            display(tasks.task2(soDownloadButton.df))

Button(description='Search concept', style=ButtonStyle())

Output()

# Which symptom ontology IDs are already in Wikidata

In [3]:
from wikidataintegrator import wdi_core, wdi_login
query = """
SELECT ?symptom ?symptomLabel ?soid WHERE {
   ?symptom wdt:P8656 ?soid .
  SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
}
"""
soQids = {}
inwikidata=wdi_core.WDFunctionsEngine.execute_sparql_query(query, as_dataframe=True)
for index, row in inwikidata.iterrows():
    soQids["SOID:"+row["soid"]] = row["symptom"]
len(soQids.keys())

844

In [4]:
newItems = set(df["soid"].tolist())-set(inwikidata["soid"].tolist())
len(newItems)

841

# Which symptom labels are already in Wikidata

In [5]:
chunks = df["label"].to_list()
for index, row in df.iterrows():
    if row["aliases"] != "":
        for alias in row["aliases"].split("|"):
            chunks.append(alias.replace("'s", "\'s"))
pages = 100
loops = [chunks[i:i+pages] for i in range(0, len(chunks), pages)]
loops
t =1 lo

frames = []
for run in loops:
    values = "VALUES ?label {\"" + "\"@en \"".join(run)+"\"} VALUES ?altLabel {\"" + "\"@en \"".join(run)+"\"}"
    sparql = "SELECT ?label ?symptom ?symptomLabel ?symptomAltLabel (GROUP_CONCAT(?typeLabel; separator=\", \") as ?types) ?soid WHERE {"
    sparql += values 
    sparql += """  SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
  {
    ?symptom wdt:P31 ?type ;
            rdfs:label ?label .
    ?type rdfs:label ?typeLabel .
    FILTER (lang(?typeLabel)="en")          
  } UNION {
    ?symptom wdt:P31 ?type ;
             skos:altLabel ?label .
    ?type rdfs:label ?typeLabel .
    FILTER (lang(?typeLabel)="en")
   }
   FILTER NOT EXISTS {?symptom wdt:P8656 ?soid}
}
GROUP BY ?label ?symptom ?symptomLabel ?symptomAltLabel ?soid"""

    frames.append(wdi_core.WDFunctionsEngine.execute_sparql_query(sparql, as_dataframe=True))
    print(str(t)+"/"+str(len(loops)))
    t+=1
    
results = pd.concat(frames)
results = pd.merge(left=results, right=df, how='left', left_on='label', right_on='label')
results

1/11
2/11
3/11
4/11
5/11
6/11
7/11
8/11
9/11
10/11
11/11


Unnamed: 0,label,symptom,symptomLabel,symptomAltLabel,types,so_uri,soid,subclassof,aliases
0,hair loss,http://www.wikidata.org/entity/Q181391,baldness,"alopecia, hair loss, calvity, loss of hair","absence, absence, absence, absence, absence, a...",http://purl.obolibrary.org/obo/SYMP_0000440,SYMP:0000440,http://purl.obolibrary.org/obo/SYMP_0000488,
1,tracheobronchitis,http://www.wikidata.org/entity/Q244834,kennel cough,"tracheobronchitis, canine infectious tracheobr...","dog disease, dog disease, dog disease, dog dis...",http://purl.obolibrary.org/obo/SYMP_0000160,SYMP:0000160,http://purl.obolibrary.org/obo/SYMP_0000385,
2,bleeding,http://www.wikidata.org/entity/Q356405,bloodletting,"bleeding, blood letting","treatment, treatment, treatment, treatment, tr...",http://purl.obolibrary.org/obo/SYMP_0000007,SYMP:0000007,http://purl.obolibrary.org/obo/SYMP_0000567,
3,wasting,http://www.wikidata.org/entity/Q7972881,wasting,"Wasting disease, Wasting disease (context-depe...","syndrome, syndrome, syndrome, syndrome, syndro...",http://purl.obolibrary.org/obo/SYMP_0000174,SYMP:0000174,http://purl.obolibrary.org/obo/SYMP_0000473,
4,hair loss,http://www.wikidata.org/entity/Q18251216,hl,hair loss,"gene, gene, gene, gene, gene, gene, gene, gene...",http://purl.obolibrary.org/obo/SYMP_0000440,SYMP:0000440,http://purl.obolibrary.org/obo/SYMP_0000488,
...,...,...,...,...,...,...,...,...,...
98,AF,http://www.wikidata.org/entity/Q27144123,Ala-Phe,"AF, alanylphenylalanine, L-Ala-L-Phe","chemical compound, chemical compound, chemical...",,,,
99,pyrexia,http://www.wikidata.org/entity/Q29714892,pyx,"pyrexia, Pyx, CG17142, CT33412, Dmel_CG17142, ...","gene, gene, gene, gene, gene, gene, gene, gene...",,,,
100,pyrexia,http://www.wikidata.org/entity/Q29811957,Pyrexia Dmel_CG17142,"pyrexia, CG17142 gene product from transcript ...","protein, protein, protein, protein, protein, p...",,,,
101,AF,http://www.wikidata.org/entity/Q30266834,Arannayk Foundation,"AF, Bangladesh Tropical Forest Conservation Fo...","nonprofit organization, nonprofit organization...",,,,


In [6]:
pd.set_option('display.max_rows', 10)
results.to_csv("labelmatchsymptoms.csv")


In [7]:
from datetime import datetime
import copy

def create_reference(soid, retrieved):
    refStatedIn = wdi_core.WDItemID(value="Q81661810", prop_nr="P248", is_reference=True)
    timeStringNow = retrieved.strftime("+%Y-%m-%dT00:00:00Z")
    refRetrieved = wdi_core.WDTime(timeStringNow, prop_nr="P813", is_reference=True)
    refSympotomOntologyID = wdi_core.WDString(value=soid, prop_nr="P8656", is_reference=True)
    reference = [refStatedIn, refRetrieved, refSympotomOntologyID]
    return reference

def create_reference_wosoid(soid, retrieved):
    refStatedIn = wdi_core.WDItemID(value="Q81661810", prop_nr="P248", is_reference=True)
    timeStringNow = retrieved.strftime("+%Y-%m-%dT00:00:00Z")
    refRetrieved = wdi_core.WDTime(timeStringNow, prop_nr="P813", is_reference=True)
    refSympotomOntologyID = wdi_core.WDString(value=soid, prop_nr="P8656", is_reference=True)

    reference = [refStatedIn, refRetrieved]
    return reference
 
WDUSER = "Andrawaag"
WDPASS = <not disclosed>

login = wdi_login.WDLogin(WDUSER, WDPASS)


SyntaxError: invalid syntax (<ipython-input-7-2ae1ee2158ea>, line 22)

In [None]:
from ipywidgets import interact, interactive, fixed, interact_manual, GridspecLayout, Layout
import ipywidgets as widgets

grid = GridspecLayout(len(results)+1, 9)

grid[0,0]= widgets.Label("wd_symptom")
grid[0,1]= widgets.Label("symptom")
grid[0,2]= widgets.Label("symptomLabels")
grid[0,3]= widgets.Label("symptomAliases")
grid[0,4]= widgets.Label("types")
grid[0,5]= widgets.Label("wd_soid")
grid[0,6]= widgets.Label("soid")
grid[0,7]= widgets.Label("update")
grid[0,8]= widgets.Label("create")

display(grid)
def create_button(description, button_style, tupel):
    button = widgets.Button(
        description=description, 
        button_style=button_style, 
        layout=Layout(height='auto', 
                      width='auto'), 
        )
    button.row=tupel
    return button

def prepare_statements(row):
    reference = create_reference(row["soid"].replace("SYMP:", "") , datetime.now())
    reference_wosoid =  create_reference_wosoid(row["soid"].replace("SYMP:", "") , datetime.now())
    statements = []

    # instance of symptom (P31)
    statements.append(wdi_core.WDItemID(value="Q169872", prop_nr="P31",  references=[copy.deepcopy(reference_wosoid)]))

    #Symptom ontology ID (P8656)
    statements.append(wdi_core.WDExternalID(row["soid"].replace("SYMP:", ""), prop_nr="P8656", references=[copy.deepcopy(reference)]))
    #exact match (P2888)
    statements.append(wdi_core.WDUrl(value=row["so_uri"], prop_nr="P2888", references=[copy.deepcopy(reference)]))
    return statements


def show(b):
    print(b.row["symptom"])
    
def update(b):
    statements = prepare_statements(b.row)
    print(b.row["symptom"])
    item = wdi_core.WDItemEngine(wd_item_id=b.row["symptom"].replace("http://www.wikidata.org/entity/", ""), data=statements,append_value=["P31", "P2888"])
    # If there is no label and the label is not in the list of aliases
    if item.get_label(lang="en") != "" and b.row["label"] not in item.get_aliases(lang="en"):
      item.set_label(b.row["label"], lang="en")
    # If there is a label, which isn't the provided label and the label is also not in the aliases
    elif item.get_label(lang="en") != b.row["label"] and b.row["label"] not in item.get_aliases():
      aliases = item.get_aliases(lang="en")
      aliases.append(b.row["label"])
      item.set_aliases(aliases=aliases,lang="en")
    print(item.write(login))
    
def create(b):
    statements = prepare_statements(b.row)
    item = wdi_core.WDItemEngine(new_item=True, data=statements)
    # If there is no label and the label is not in the list of aliases
    if item.get_label(lang="en") != "" and b.row["label"] not in item.get_aliases(lang="en"):
      item.set_label(b.row["label"], lang="en")
    # If there is a label, which isn't the provided label and the label is also not in the aliases
    elif item.get_label(lang="en") != b.row["label"] and b.row["label"] not in item.get_aliases():
      aliases = item.get_aliases(lang="en")
      aliases.append(b.row["label"])
      item.set_aliases(aliases=aliases,lang="en")
    print(item.write(login))
    
i=1
for index, row in results.iterrows():
    qid = row['symptom'].replace('http://www.wikidata.org/entity/', '')
    grid[i,0] = widgets.HTML("<a href = '"+row['symptom']+"'>"+qid+ "</a>")
    grid[i,1] = widgets.Label(row["so_uri"])
    grid[i,2] = widgets.Label(row["label"])
    if isinstance(row["aliases"], str):
        grid[i,3] = widgets.Label(row["aliases"])
    grid[i,4] = widgets.Label(row["types"])
    # if isinstance(row["soid_x"], str):
    #    grid[i,5] = widgets.Label(row["soid_x"])
    grid[i ,6] = widgets.Label(row["soid"])
    grid[i, 7] = create_button("update", 'warning', row)
    grid[i, 7].on_click(update)
    grid[i, 8] = create_button("create", 'danger', row)
    grid[i, 8].on_click(create)
    i=i+1
    

    

In [None]:
WDUSER = "Andrawaag"
WDPASS = <not disclosed>

login = wdi_login.WDLogin(WDUSER, WDPASS)

In [None]:
qid = "Q1071481"
statements = prepare_statements(row)
item = wdi_core.WDItemEngine(wd_item_id=qid, data=statements)
item.set_label(row["label"], lang="en")
item.set_description("symptom")
if row["aliases"] != "": 
    item.set_aliases("|".split(row["aliases"]))
print(item.write(login))

In [None]:
from wikidataintegrator import wdi_core, wdi_login
query = """
SELECT ?symptom ?symptomLabel ?soid WHERE {
   ?symptom wdt:P8656 ?soid .
  SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
}
"""
soQids = {}
inwikidata=wdi_core.WDFunctionsEngine.execute_sparql_query(query, as_dataframe=True)
for index, row in inwikidata.iterrows():
    soQids[row["soid"]] = row["symptom"]
    

# bot to create Symptom items in Wikidata

In [None]:
WDUSER = ""
WDPASS = ""

login = wdi_login.WDLogin(WDUSER, WDPASS)

In [None]:
for index, row in df.iterrows():
    soid = row["soid"].replace("SYMP:", "")
    print(soid)
    if soid not in soQids.keys(): 
        statements = prepare_statements(row)
        item = wdi_core.WDItemEngine(new_item=True, data=statements)
        item.set_label(row["label"], lang="en")
        item.set_description("symptom")
        if row["aliases"] != "": 
            item.set_aliases("|".split(row["aliases"]))
        #print(item.get_wd_json_representation())
        print(item.write(login))
        

In [None]:
for index, row in df.iterrows():
    soid = row["soid"].replace("SYMP:", "")
    print(soid)
    if soid not in soQids.keys(): 
        statements = prepare_statements(row)
        item = wdi_core.WDItemEngine(new_item=True, data=statements)
        item.set_label(row["label"], lang="en")
        item.set_description("symptom")
        if row["aliases"] != "": 
            item.set_aliases("|".split(row["aliases"]))
        #print(item.get_wd_json_representation())
        print(item.write(login))
        

In [None]:
parents = dict()
for index, row in df.iterrows():
    soid = row["so_uri"].replace("http://purl.obolibrary.org/obo/SYMP_", "")
    if row["so_uri"] != "http://purl.obolibrary.org/obo/SYMP_0000462":
        for subclass in row["subclassof"].split("|"):
            if row["so_uri"] == "http://purl.obolibrary.org/obo/symp#part_of":
                continue
            if soQids[soid] not in parents.keys():
                parents[soQids[soid]] = []
            if soQids[subclass.replace("http://purl.obolibrary.org/obo/SYMP_", "")] not in parents[soQids[soid]]: 
                parents[soQids[soid]].append(soQids[subclass.replace("http://purl.obolibrary.org/obo/SYMP_", "")])
    

In [None]:
QidsSo = dict()
for key in soQids.keys():
    QidsSo[soQids[key]] = key
QidsSo    

In [None]:
import pprint
for qid in parents.keys():
    statements = []
    reference = create_reference(QidsSo[qid] , datetime.now())
    for parent in parents[qid]:
        statements.append(wdi_core.WDItemID(parent.replace("http://www.wikidata.org/entity/", ""), prop_nr="P279", references=[copy.deepcopy(reference)]))
    item = wdi_core.WDItemEngine(wd_item_id=qid.replace("http://www.wikidata.org/entity/", ""), data=statements, append_value=["P279"])
    #pprint.pprint(item.get_wd_json_representation())
    print(item.write(login))
    

In [None]:
for qid in QidsSo.keys():  
    item = wdi_core.WDItemEngine(wd_item_id=qid.replace("http://www.wikidata.org/entity/", ""))
    json = item.get_wd_json_representation()

    if "P31" in json["claims"].keys():
        for claim in json["claims"]["P31"]: 
            print(claim["id"])
            for reference in claim["references"]:
                if "P248" in reference["snaks"].keys():
                    for snakP248 in reference["snaks"]["P248"]:
                        if snakP248["datavalue"]["value"]["id"] == "Q81661810":
                            wdi_core.WDItemEngine.delete_statement(statement_id=claim["id"], revision=item.lastrevid, login=login)


In [None]:
QidsSo