In [3]:
pip install SPARQLWrapper

Collecting SPARQLWrapper
  Obtaining dependency information for SPARQLWrapper from https://files.pythonhosted.org/packages/31/89/176e3db96e31e795d7dfd91dd67749d3d1f0316bb30c6931a6140e1a0477/SPARQLWrapper-2.0.0-py3-none-any.whl.metadata
  Downloading SPARQLWrapper-2.0.0-py3-none-any.whl.metadata (2.0 kB)
Downloading SPARQLWrapper-2.0.0-py3-none-any.whl (28 kB)
Installing collected packages: SPARQLWrapper
Successfully installed SPARQLWrapper-2.0.0
Note: you may need to restart the kernel to use updated packages.


In [4]:
from rdflib import Graph, URIRef, Literal, Namespace
from rdflib.namespace import RDF, RDFS

from SPARQLWrapper import SPARQLWrapper, JSON

In [5]:
WD = Namespace("http://www.wikidata.org/entity/")
WDT = Namespace("http://www.wikidata.org/prop/direct/")
RDFS = Namespace("http://www.w3.org/2000/01/rdf-schema#")

In [26]:
sparql_query = """
SELECT DISTINCT ?drug ?drugLabel ?disease ?diseaseLabel ?cause ?causeLabel ?symptom ?symptomLabel ?sideEffect ?sideEffectLabel ?gene ?geneLabel
WHERE {
  ?drug wdt:P31 wd:Q12140 .  # medicine
  ?drug wdt:P2175 ?disease .    # treats disease
  OPTIONAL { ?disease wdt:P828 ?cause } .  # cause of disease
  OPTIONAL { ?disease wdt:P780 ?symptom } .  # symptom/complaint of disease
  OPTIONAL { ?drug wdt:P780 ?sideEffect } .  # potential side effect
  OPTIONAL { ?disease wdt:P1057 ?gene } .  # gene associated with disease
  SERVICE wikibase:label {
    bd:serviceParam wikibase:language "en" .
  }
}
"""

In [27]:
sparql = SPARQLWrapper("https://query.wikidata.org/sparql")
sparql.setQuery(sparql_query)
sparql.setReturnFormat(JSON)
results = sparql.query().convert()

# Create an RDF graph
g = Graph()

# Bind namespaces
g.bind("wd", WD)
g.bind("wdt", WDT)
g.bind("rdfs", RDFS)

# Process SPARQL results and add triples to the graph
for result in results["results"]["bindings"]:
    drug = URIRef(result["drug"]["value"])
    drugLabel = Literal(result["drugLabel"]["value"])
    disease = URIRef(result["disease"]["value"])
    diseaseLabel = Literal(result["diseaseLabel"]["value"])
    cause = URIRef(result.get("cause", {"value": ""})["value"])
    causeLabel = Literal(result.get("causeLabel", {"value": ""})["value"])
    symptom = URIRef(result.get("symptom", {"value": ""})["value"])
    symptomLabel = Literal(result.get("symptomLabel", {"value": ""})["value"])
    sideEffect = URIRef(result.get("sideEffect", {"value": ""})["value"])
    sideEffectLabel = Literal(result.get("sideEffectLabel", {"value": ""})["value"])
    gene = URIRef(result.get("gene", {"value": ""})["value"])
    geneLabel = Literal(result.get("geneLabel", {"value": ""})["value"])

    g.add((drug, RDF.type, WD.Q12140))
    g.add((drug, RDFS.label, drugLabel))
    g.add((drug, WDT.P2175, disease))
    g.add((disease, RDFS.label, diseaseLabel))
    if cause:
        g.add((disease, WDT.P828, cause))
    if causeLabel:
        g.add((cause, RDFS.label, causeLabel))
    if symptom:
        g.add((disease, WDT.P780, symptom))
    if symptomLabel:
        g.add((symptom, RDFS.label, symptomLabel))
    if sideEffect:
        g.add((drug, WDT.P780, sideEffect))
    if sideEffectLabel:
        g.add((sideEffect, RDFS.label, sideEffectLabel))
    if gene:
        g.add((disease, WDT.P1057, gene))
    if geneLabel:
        g.add((gene, RDFS.label, geneLabel))

# Save the graph to a TTL file
output_file = "wikidata_drug_disease_graph.ttl"
g.serialize(destination=output_file, format="turtle")

print(f"Graph saved to {output_file}")

Graph saved to wikidata_drug_disease_graph.ttl


In [17]:
sparql = SPARQLWrapper("https://query.wikidata.org/sparql")
sparql.setQuery(sparql_query)
sparql.setReturnFormat(JSON)
results = sparql.query().convert()

# Create an RDF graph
g = Graph()

# Bind namespaces
g.bind("wd", WD)
g.bind("wdt", WDT)
g.bind("rdfs", RDFS)

# Process SPARQL results and add triples to the graph
for result in results["results"]["bindings"]:
    drug = URIRef(result["drug"]["value"])
    drugLabel = Literal(result["drugLabel"]["value"])
    disease = URIRef(result["disease"]["value"])
    diseaseLabel = Literal(result["diseaseLabel"]["value"])
    cause = URIRef(result.get("cause", {"value": ""})["value"])
    causeLabel = Literal(result.get("causeLabel", {"value": ""})["value"])
    symptom = URIRef(result.get("symptom", {"value": ""})["value"])
    symptomLabel = Literal(result.get("symptomLabel", {"value": ""})["value"])
    sideEffect = URIRef(result.get("sideEffect", {"value": ""})["value"])
    sideEffectLabel = Literal(result.get("sideEffectLabel", {"value": ""})["value"])
    gene = URIRef(result.get("gene", {"value": ""})["value"])
    geneLabel = Literal(result.get("geneLabel", {"value": ""})["value"])

    g.add((drug, RDF.type, WD.Q12140))
    g.add((drug, RDFS.label, drugLabel))
    g.add((drug, WDT.P2175, disease))
    g.add((disease, RDFS.label, diseaseLabel))
    if cause:
        g.add((disease, WDT.P828, cause))
    if causeLabel:
        g.add((cause, RDFS.label, causeLabel))
    if symptom:
        g.add((disease, WDT.P780, symptom))
    if symptomLabel:
        g.add((symptom, RDFS.label, symptomLabel))
    if sideEffect:
        g.add((drug, WDT.P780, sideEffect))
    if sideEffectLabel:
        g.add((sideEffect, RDFS.label, sideEffectLabel))
    if gene:
        g.add((disease, WDT.P1057, gene))
    if geneLabel:
        g.add((gene, RDFS.label, geneLabel))

# Save the graph to a TTL file
output_file = "wikidata_drug_diseasesparql = SPARQLWrapper("https://query.wikidata.org/sparql")
sparql.setQuery(sparql_query)
sparql.setReturnFormat(JSON)
results = sparql.query().convert()

# Create an RDF graph
g = Graph()

# Bind namespaces
g.bind("wd", WD)
g.bind("wdt", WDT)
g.bind("rdfs", RDFS)

# Process SPARQL results and add triples to the graph
for result in results["results"]["bindings"]:
    drug = URIRef(result["drug"]["value"])
    drugLabel = Literal(result["drugLabel"]["value"])
    disease = URIRef(result["disease"]["value"])
    diseaseLabel = Literal(result["diseaseLabel"]["value"])
    cause = URIRef(result.get("cause", {"value": ""})["value"])
    causeLabel = Literal(result.get("causeLabel", {"value": ""})["value"])
    symptom = URIRef(result.get("symptom", {"value": ""})["value"])
    symptomLabel = Literal(result.get("symptomLabel", {"value": ""})["value"])
    sideEffect = URIRef(result.get("sideEffect", {"value": ""})["value"])
    sideEffectLabel = Literal(result.get("sideEffectLabel", {"value": ""})["value"])
    gene = URIRef(result.get("gene", {"value": ""})["value"])
    geneLabel = Literal(result.get("geneLabel", {"value": ""})["value"])

    g.add((drug, RDF.type, WD.Q12140))
    g.add((drug, RDFS.label, drugLabel))
    g.add((drug, WDT.P2175, disease))
    g.add((disease, RDFS.label, diseaseLabel))
    if cause:
        g.add((disease, WDT.P828, cause))
    if causeLabel:
        g.add((cause, RDFS.label, causeLabel))
    if symptom:
        g.add((disease, WDT.P780, symptom))
    if symptomLabel:
        g.add((symptom, RDFS.label, symptomLabel))
    if sideEffect:
        g.add((drug, WDT.P780, sideEffect))
    if sideEffectLabel:
        g.add((sideEffect, RDFS.label, sideEffectLabel))
    if gene:
        g.add((disease, WDT.P1057, gene))
    if geneLabel:
        g.add((gene, RDFS.label, geneLabel))

# Save the graph to a TTL file
output_file = "wikidata_drug_disease_graph.ttl"
g.serialize(destination=output_file, format="turtle")

print(f"Graph saved to {output_file}")_graph.ttl"
g.serialize(destination=output_file, format="turtle")

print(f"Graph saved to {output_file}")

Graph saved to wikidata_drug_disease_graph.ttl


In [25]:
g = Graph()
g.parse("wikidata_drug_disease_graph.ttl", format="turtle")

# Define a namespace for our ontology
ONT = Namespace("http://example.com/ontology#")

# Create a new RDF graph for the ontology
ontology_graph = Graph()

# Iterate over triples to identify classes and properties
classes = set()
properties = set()
for s, p, o in g:
    if isinstance(s, URIRef):
        classes.add(s)
    if isinstance(p, URIRef):
        properties.add(p)
    if isinstance(o, URIRef):
        classes.add(o)

# Define classes in the ontology
for class_uri in classes:
    ontology_graph.add((class_uri, RDF.type, RDFS.Class))
    ontology_graph.add((class_uri, RDFS.label, class_uri))

# Define properties in the ontology
for property_uri in properties:
    ontology_graph.add((property_uri, RDF.type, RDF.Property))
    ontology_graph.add((property_uri, RDFS.label, property_uri))

# Serialize the ontology to a Turtle file
ontology_graph.serialize(destination="wikidata_ontology.ttl", format="turtle"

(True, 'ontology.txt')