In [25]:
import rdflib
import networkx as nx
from rdflib.extras.external_graph_libs import rdflib_to_networkx_digraph
from rdflib.extras.external_graph_libs import rdflib_to_networkx_multidigraph

In [26]:
g = rdflib.Graph()
g.parse('medical_graph.ttl', format='ttl')

<Graph identifier=N735746bf0d9f4be99758700e3d5e9ec3 (<class 'rdflib.graph.Graph'>)>

In [27]:
import sys
#!{sys.executable} -m pip install pandas oxrdflib Pygments

import pandas as pd
from IPython.display import display, HTML
from pygments import highlight
from pygments.lexers import SparqlLexer
from pygments.formatters import HtmlFormatter
from rdflib import Graph

def run_query(graph, query_path):
    try:
        with open(query_path, 'r') as file:
            query = file.read()
    except Exception as _e:
        print(f"No file for {query_path}")
        return
    results = graph.query(query)
    # Display the SPARQL query
    formatted_query = highlight(query, SparqlLexer(), HtmlFormatter(style='solarized-dark', full=True, nobackground=True))
    display(HTML(formatted_query))
    # Convert results to a Pandas DataFrame
    res_list = []
    for row in results:
        res_list.append([str(item) for item in row])
    df = pd.DataFrame(res_list, columns=[str(var) for var in results.vars]) if len(res_list) > 0 else pd.DataFrame()
    # Display the DataFrame as a table in Jupyter Notebook
    return df
    #display(HTML(df.to_html()))

In [28]:
# make a query for the graph
query_path = 'query.sparql'
df = run_query(g, query_path)

No file for query.sparql


In [29]:
mdg = rdflib_to_networkx_multidigraph(g)
dg = rdflib_to_networkx_digraph(g)

In [30]:
# amout of nodes
print(len(dg.nodes()))

1160


In [31]:
# amount of edges
print(len(dg.edges()))

1873


In [32]:
import pandas as pd
nodes = pd.DataFrame(dg.nodes(data=True))
nodes

Unnamed: 0,0,1
0,http://www.wikidata.org/entity/Q131742,{}
1,http://www.wikidata.org/entity/Q6844,{}
2,http://www.wikidata.org/entity/Q19597688,{}
3,https://www.example.org/Drug,{}
4,http://www.wikidata.org/entity/Q842428,{}
...,...,...
1155,opportunistic infection,{}
1156,food,{}
1157,food allergy,{}
1158,migraine without aura,{}


In [33]:
edges = pd.DataFrame(dg.edges(data=True))
edges

Unnamed: 0,0,1,2
0,http://www.wikidata.org/entity/Q131742,http://www.wikidata.org/entity/Q6844,{'triples': [('http://www.wikidata.org/entity/...
1,http://www.wikidata.org/entity/Q131742,http://www.wikidata.org/entity/Q1607636,{'triples': [('http://www.wikidata.org/entity/...
2,http://www.wikidata.org/entity/Q131742,hepatitis,{'triples': [('http://www.wikidata.org/entity/...
3,http://www.wikidata.org/entity/Q131742,http://www.wikidata.org/entity/Q192360,{'triples': [('http://www.wikidata.org/entity/...
4,http://www.wikidata.org/entity/Q131742,http://www.wikidata.org/entity/Q183425,{'triples': [('http://www.wikidata.org/entity/...
...,...,...,...
1868,http://www.wikidata.org/entity/Q107359303,http://www.wikidata.org/entity/Q953638,{'triples': [('http://www.wikidata.org/entity/...
1869,http://www.wikidata.org/entity/Q3316140,https://www.example.org/Symptom,{'triples': [('http://www.wikidata.org/entity/...
1870,http://www.wikidata.org/entity/Q3316140,emotional dysregulation,{'triples': [('http://www.wikidata.org/entity/...
1871,http://www.wikidata.org/entity/Q4340209,mental depression,{'triples': [('http://www.wikidata.org/entity/...


In [34]:
count = 0
# ephedra treats -> rhinitis -> what are the symphoms

# http://www.wikidata.org/entity/Q13530468,ephedra,"{'triples': [('http://www.wikidata.org/entity/Q13530468', 'http://www.w3.org/2000/01/rdf-schema#label', 'ephedra')], 'weight': 1}"

# http://www.wikidata.org/entity/Q114085,rhinitis
# treated by http://www.wikidata.org/entity/Q13530468,http://www.wikidata.org/entity/Q114085,"{'triples': [('http://www.wikidata.org/entity/Q13530468', 'https://www.example.org/treats', 'http://www.wikidata.org/entity/Q114085')], 'weight': 1}"

# http://www.wikidata.org/entity/Q114085,http://www.wikidata.org/entity/Q3245488,"{'triples': [('http://www.wikidata.org/entity/Q114085', 'https://www.example.org/hasSymptom', 'http://www.wikidata.org/entity/Q3245488')], 'weight': 1}"

# http://www.wikidata.org/entity/Q13530468,http://www.wikidata.org/entity/Q114085,"{'triples': [('http://www.wikidata.org/entity/Q13530468', 'https://www.example.org/treats', 'http://www.wikidata.org/entity/Q114085')], 'weight': 1}"


# {'triples': [('http://www.wikidata.org/entity/Q8349689', 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type', 'https://www.example.org/Cause')], 'weight': 1}
# {'triples': [('http://www.wikidata.org/entity/Q8349689', 'http://www.w3.org/2000/01/rdf-schema#label', 'physical condition')], 'weight': 1}
# http://www.wikidata.org/entity/Q147778 treats it
# 'http://www.wikidata.org/entity/Q19597688'): # DRUg?
# label ledipasvir/sofosbuvir
for node in dg.nodes(data=True):
    if node[0] == rdflib.term.URIRef('http://www.wikidata.org/entity/Q19597688'): # disease
        print(node)

(rdflib.term.URIRef('http://www.wikidata.org/entity/Q19597688'), {})


In [96]:
def multi_hop_reasoning(query_entity, target_relation, graph, relation):
    visited = set()
    queue = [(query_entity, [])]  # Start with the query entity and an empty relation path
    test = []
    while queue:
        entity, relation_path = queue.pop(0)
        if entity == target_relation:
            return test  # Return the test list instead of the relation path
        if entity not in visited:
            visited.add(entity)
            for neighbor in graph.neighbors(entity):
                edge = graph.get_edge_data(entity, neighbor)
                if edge['triples'][0][1] == rdflib.term.URIRef(relation):
                    test.append(str(neighbor))
                new_path = relation_path + [(entity, neighbor)]
                queue.append((neighbor, new_path))
    return None  # Return None if the target relation is not found

In [97]:
# traverse the graph from france to portugal
# ephedra
query_entity = rdflib.term.URIRef('http://www.wikidata.org/entity/Q13530468')
target_relation = rdflib.term.URIRef('http://www.wikidata.org/entity/Q3245488')
result = multi_hop_reasoning(query_entity, target_relation, dg, 'https://www.example.org/hasSymptom')
if result:
    print("Relation path:", result)
else:
    print("No relation path found between the entities.")

Relation path: ['http://www.wikidata.org/entity/Q3245488', 'http://www.wikidata.org/entity/Q21118859', 'http://www.wikidata.org/entity/Q86', 'http://www.wikidata.org/entity/Q35830', 'http://www.wikidata.org/entity/Q1115038', 'http://www.wikidata.org/entity/Q517104', 'http://www.wikidata.org/entity/Q35805', 'http://www.wikidata.org/entity/Q101991', 'http://www.wikidata.org/entity/Q693058']
