# RDF Graph Exploration

In [3]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)  ## remove pandas future warning
from ipysigma import Sigma
import networkx as nx
import pandas as pd
import pydantic
# import torch
from pygraphml import GraphMLParser
from pygraphml import Graph as GraphML
from SPARQLWrapper import SPARQLWrapper, JSON

# from relik import Relik
# from relik.inference.data.objects import RelikOutput
from tqdm import tqdm

import logging
logging.basicConfig(level=logging.INFO)
logging.getLogger('relik').setLevel(logging.INFO)


In [5]:
ENDPOINT_URL = "http://localhost:7007/sparql"

In [57]:
def pygraphml_to_nx(g_py):
    """
    Convert a pygraphml.Graph (GraphML) to a NetworkX Graph.
    """
    G_nx = nx.Graph()
    G_nx.graph["directed"] = g_py.directed

    # Add nodes
    for node in g_py.nodes():
        node_id = node.id
        # Call the method to get the attributes dict
        attrs = dict(node.attributes())   # <-- note the ()
        G_nx.add_node(node_id, **attrs)

    # Add edges
    for edge in g_py.edges():
        u = edge.node1.id
        v = edge.node2.id
        attrs = dict(edge.attributes())   # <-- note the ()
        G_nx.add_edge(u, v, **attrs)

    return G_nx

## Query one: type to type

In [58]:
qtypetype = """	PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
PREFIX schema: <https://schema.org/>

SELECT DISTINCT ?source ?type ?target ?sType ?tType 
WHERE {
    ?source a ?sType .
    ?target a ?tType .
    ?source ?type ?target . 
}
"""


In [59]:
ts = SPARQLWrapper(ENDPOINT_URL)
ts.setReturnFormat(JSON)

ts.setQuery(qtypetype)
r = ts.query().convert()
qr = r["results"]["bindings"]
vars = r["head"]["vars"]
value_list = vars

In [60]:
g = GraphML()
g.directed = False

for row in qr:
    # Each row[var]["value"] gives the string value
    source = row["source"]["value"]
    target = row["target"]["value"]
    rel_type = row["type"]["value"]
    sType = row["sType"]["value"]
    tType = row["tType"]["value"]

    n1 = g.add_node(source)
    n1["type"] = sType

    n2 = g.add_node(target)
    n2["type"] = tType

    e1 = g.add_edge(n1, n2)
    e1["predicate"] = rel_type

In [None]:
fname = "./rdfGraphs.graphml"
parser = GraphMLParser()
parser.write(g, fname)

In [53]:
G = nx.read_graphml("./rdfGraphs.graphml")


In [54]:
Sigma(
    G,
    node_size=G.degree,
    default_edge_type='curve',
    node_border_color_from='node',
    node_metrics={"community": "louvain"},
    node_color='type',   #louvain  community
    start_layout=5,
    edge_size=lambda u, v: G.degree(u) + G.degree(v),
    edge_size_range=(0.5, 5),
    label_font='cursive',
    node_label_size=G.degree,
    label_density=0
)

Sigma(nx.MultiGraph with 53,643 nodes and 63,832 edges)

## Query two

In [6]:
qtypetype = """	PREFIX schema: <https://schema.org/>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
SELECT  DISTINCT ?name ?spatial_name
WHERE {
  	?subject a schema:InfectiousDisease .
    ?subject schema:additionalProperty ?additionalproperty .
  ?subject schema:name ?name .


  ?subject schema:spatialCoverage ?spatialcoverage .
  ?spatialcoverage schema:name ?spatial_name .

}

"""

In [7]:
ts = SPARQLWrapper(ENDPOINT_URL)
ts.setReturnFormat(JSON)

ts.setQuery(qtypetype)
r = ts.query().convert()
qr = r["results"]["bindings"]
vars = r["head"]["vars"]
value_list = vars

In [8]:
g = GraphML()
g.directed = False

for row in qr:
    # Each row[var]["value"] gives the string value
    source = row["name"]["value"]
    target = row["spatial_name"]["value"]
    rel_type = "spatialCoverage"
    sType = "pathogen"
    tType = "geospatial"

    n1 = g.add_node(source)
    n1["type"] = sType

    n2 = g.add_node(target)
    n2["type"] = tType

    e1 = g.add_edge(n1, n2)
    e1["predicate"] = rel_type

In [9]:
fname = "./rdfGraphs.graphml"
parser = GraphMLParser()
parser.write(g, fname)

In [10]:
G = nx.read_graphml("./rdfGraphs.graphml")


In [12]:
Sigma(
    G,
    node_size=G.degree,
    default_edge_type='curve',
    node_border_color_from='node',
    node_metrics={"community": "louvain"},
    node_color='type',   #louvain  community
    start_layout=5,
    edge_size=lambda u, v: G.degree(u) + G.degree(v),
    edge_size_range=(0.5, 5),
    label_font='cursive',
    node_label_size=G.degree,
    label_density=0
)

Sigma(nx.Graph with 233 nodes and 254 edges)