In [3]:
pip install SPARQLWrapper

Collecting SPARQLWrapper
  Obtaining dependency information for SPARQLWrapper from https://files.pythonhosted.org/packages/31/89/176e3db96e31e795d7dfd91dd67749d3d1f0316bb30c6931a6140e1a0477/SPARQLWrapper-2.0.0-py3-none-any.whl.metadata
  Downloading SPARQLWrapper-2.0.0-py3-none-any.whl.metadata (2.0 kB)
Downloading SPARQLWrapper-2.0.0-py3-none-any.whl (28 kB)
Installing collected packages: SPARQLWrapper
Successfully installed SPARQLWrapper-2.0.0
Note: you may need to restart the kernel to use updated packages.


In [4]:
from rdflib import Graph, URIRef, Literal, Namespace
from rdflib.namespace import RDF, RDFS

from SPARQLWrapper import SPARQLWrapper, JSON

In [30]:
WD = Namespace("http://www.wikidata.org/entity/")
RDFS = Namespace("http://www.w3.org/2000/01/rdf-schema#")
ex = Namespace("https://www.example.org/")
wdt = Namespace("https://www.wikidata.org/prop/direct/")

In [31]:
sparql_query = """
SELECT DISTINCT ?drug ?drugLabel ?disease ?diseaseLabel ?cause ?causeLabel ?symptom ?symptomLabel ?sideEffect ?sideEffectLabel ?gene ?geneLabel
WHERE {
  ?drug wdt:P31 wd:Q12140 .  # medicine
  ?drug wdt:P2175 ?disease .    # treats disease
  OPTIONAL { ?disease wdt:P828 ?cause } .  # cause of disease
  OPTIONAL { ?disease wdt:P780 ?symptom } .  # symptom/complaint of disease
  OPTIONAL { ?drug wdt:P780 ?sideEffect } .  # potential side effect
  OPTIONAL { ?disease wdt:P1057 ?gene } .  # gene associated with disease
  SERVICE wikibase:label {
    bd:serviceParam wikibase:language "en" .
  }
}
"""

In [33]:
sparql = SPARQLWrapper("https://query.wikidata.org/sparql")
sparql.setQuery(sparql_query)
sparql.setReturnFormat(JSON)
results = sparql.query().convert()

# Create an RDF graph
g = Graph()

# Bind namespaces
g.bind("wd", WD)
g.bind("wdt", WDT)
g.bind("rdfs", RDFS)

# Process query results
for result in results["results"]["bindings"]:
    drug = URIRef(result["drug"]["value"])
    disease = URIRef(result["disease"]["value"])

    # Add RDF types
    g.add((drug, RDF.type, ex.Drug))
    g.add((disease, RDF.type, ex.Disease))
    if "cause" in result:
        cause = URIRef(result["cause"]["value"])
        g.add((cause, RDF.type, ex.Cause))
    if "symptom" in result:
        symptom = URIRef(result["symptom"]["value"])
        g.add((symptom, RDF.type, ex.Symptom))
    if "sideEffect" in result:
        side_effect = URIRef(result["sideEffect"]["value"])
        g.add((side_effect, RDF.type, ex.SideEffect))
    if "gene" in result:
        gene = URIRef(result["gene"]["value"])
        g.add((gene, RDF.type, ex.Gene))

    # Add labels
    g.add((drug, RDFS.label, Literal(result["drugLabel"]["value"])))
    g.add((disease, RDFS.label, Literal(result["diseaseLabel"]["value"])))
    if "cause" in result:
        g.add((cause, RDFS.label, Literal(result["causeLabel"]["value"])))
    if "symptom" in result:
        g.add((symptom, RDFS.label, Literal(result["symptomLabel"]["value"])))
    if "sideEffect" in result:
        g.add((side_effect, RDFS.label, Literal(result["sideEffectLabel"]["value"])))
    if "gene" in result:
        g.add((gene, RDFS.label, Literal(result["geneLabel"]["value"])))

    # Add relationships
    g.add((drug, ex.treats, disease))
    if "cause" in result:
        g.add((disease, ex.hasCause, cause))
    if "symptom" in result:
        g.add((disease, ex.hasSymptom, symptom))
    if "sideEffect" in result:
        g.add((drug, ex.hasSideEffect, side_effect))
    if "gene" in result:
        g.add((disease, ex.associatedGene, gene))

# Serialize the graph to a TTL file
g.serialize(destination='medical_graph.ttl', format='turtle')


<Graph identifier=N649d76492c0240779289d23fcdd8f760 (<class 'rdflib.graph.Graph'>)>

In [25]:
g = Graph()
g.parse("wikidata_drug_disease_graph.ttl", format="turtle")

# Define a namespace for our ontology
ONT = Namespace("http://example.com/ontology#")

# Create a new RDF graph for the ontology
ontology_graph = Graph()

# Iterate over triples to identify classes and properties
classes = set()
properties = set()
for s, p, o in g:
    if isinstance(s, URIRef):
        classes.add(s)
    if isinstance(p, URIRef):
        properties.add(p)
    if isinstance(o, URIRef):
        classes.add(o)

# Define classes in the ontology
for class_uri in classes:
    ontology_graph.add((class_uri, RDF.type, RDFS.Class))
    ontology_graph.add((class_uri, RDFS.label, class_uri))

# Define properties in the ontology
for property_uri in properties:
    ontology_graph.add((property_uri, RDF.type, RDF.Property))
    ontology_graph.add((property_uri, RDFS.label, property_uri))

# Serialize the ontology to a Turtle file
ontology_graph.serialize(destination="wikidata_ontology.ttl", format="turtle"

(True, 'ontology.txt')