In [1]:
from rdflib import Graph

In [2]:
radlex_owl_path = 'RadLex/data/RadLex.owl'

### Loading of the Ontology and transformation into a Directed Graph

In [None]:
g = Graph()
g.parse(radlex_owl_path, format="xml")  # OWL is typically RDF/XML

print(f"Number of triples: {len(g)}")
for subj, pred, obj in g[:10]:  # Print first 10 triples
    print(subj, pred, obj)

In [None]:
import rdflib
from rdflib import RDF, RDFS, OWL, Namespace
from rdflib.term import BNode
from itertools import islice

RADLEX = Namespace("http://www.radlex.org/RID/")
g.bind("radlex", RADLEX)
g.bind("rdf", RDF)
g.bind("rdfs", RDFS)
g.bind("owl", OWL)

def format_node(node):
    if isinstance(node, BNode):
        return f"[BlankNode:{str(node)[:8]}]"
    elif isinstance(node, rdflib.URIRef):
        # Try to use a qname if bound (e.g., radlex:RID47279)
        try:
            return g.qname(node)
        except Exception:
            return str(node)
    else:
        return str(node)

for s, p, o in islice(g, 10):
    print(f"{format_node(s)} -- {format_node(p)} --> {format_node(o)}")

In [None]:
from rdflib import URIRef

RID_PREFIX = "http://www.radlex.org/RID/"

rid_codes = set()
rid_pairs = set()
relations_between_rids = set()

for s, p, o in g:
    s_is_rid = isinstance(s, URIRef) and str(s).startswith(RID_PREFIX) and "RID" in str(s)
    o_is_rid = isinstance(o, URIRef) and str(o).startswith(RID_PREFIX) and "RID" in str(o)
    
    # Collect individual RID codes
    if s_is_rid:
        rid_codes.add(s)
    if o_is_rid:
        rid_codes.add(o)
    
    # Collect pairs and their relations
    if s_is_rid and o_is_rid:
        rid_pairs.add((s, o))
        relations_between_rids.add(p)

# Print the results
print(f"Number of unique RID codes: {len(rid_codes)}")
print(f"Number of RID-to-RID pairs: {len(rid_pairs)}")
print(f"Number of different relations between RID codes: {len(relations_between_rids)}")
print("\nRelations used between RID codes:")
for rel in relations_between_rids:
    print(f" - {g.qname(rel)}")


In [None]:
import networkx as nx
from rdflib import URIRef

RID_PREFIX = "http://www.radlex.org/RID/"

# Create a directed multigraph
G = nx.MultiDiGraph()

for s, p, o in g:
    s_is_rid = isinstance(s, URIRef) and str(s).startswith(RID_PREFIX) and "RID" in str(s)
    o_is_rid = isinstance(o, URIRef) and str(o).startswith(RID_PREFIX) and "RID" in str(o)

    if s_is_rid and o_is_rid:
        # Add edge with predicate as label
        G.add_edge(str(s), str(o), label=g.qname(p))

# Summary info
print(f"Number of RID nodes: {G.number_of_nodes()}")
print(f"Number of RID-to-RID edges: {G.number_of_edges()}")

# Optional: show first 5 edges
for u, v, d in list(G.edges(data=True))[:5]:
    print(f"{u} --[{d['label']}]--> {v}")

In [7]:
G = nx.relabel_nodes(G, lambda x: x.split("/")[-1])

In [8]:
def format_relation_label(label):
    if label is None:
        return ""
    for prefix in ['RID:', 'rdf:', 'rdfs:', 'owl:']:
        if label.startswith(prefix):
            label = label[len(prefix):]
            break
    return label.replace('_', ' ').title()

# For MultiDiGraph: iterate over edges with keys
for u, v, key, d in G.edges(keys=True, data=True):
    original_label = d.get('label', '')
    d['label'] = format_relation_label(original_label)

In [None]:
# Get all unique edge relation types in G
relation_types = set()

for _, _, _, d in G.edges(keys=True, data=True):
    label = d.get('label')
    if label:
        relation_types.add(label)

# Print all unique relation labels
print("Unique edge relation types:")
for rel in sorted(relation_types):
    print("-", rel)

### Fix issue with orientation of edge "subClassOf"
We would like that the edge points from the parent to the child node. Therefore, we redefine the edge with the opposite orientation and also change its label to preserve the logic of the relation.

In [None]:
print(len(G.nodes), len(G.edges))

In [11]:
# Create a reversed hierarchy graph with updated label

# Collect edges to reverse (u,v,key) with label 'Subclassof'
edges_to_reverse = []
for u, v, key, d in G.edges(keys=True, data=True):
    if d.get('label') == 'Subclassof':
        edges_to_reverse.append((u, v, key))

# Remove those edges and add reversed edges with new label
for u, v, key in edges_to_reverse:
    G.remove_edge(u, v, key=key)
    G.add_edge(v, u, label='Has Subclass')

In [None]:
print(len(G.nodes), len(G.edges))

In [None]:
descendants = nx.descendants(G, 'RID1')
print(f"RID1 has {len(descendants)} subclass descendants.")
print(descendants)

### Add code description for each RIDXXXX node

In [14]:
import networkx as nx
import pandas as pd

# Load your CSV file
df = pd.read_csv("data/d_radlex_entities.csv")  # Adjust path if necessary

# Create a dictionary: code → preferred description
desc_map = dict(zip(df['radlex_code'], df['preferred_description']))

# Add the description as an attribute to each node in the graph
for node in G.nodes():
    if node in desc_map:
        G.nodes[node]['description'] = desc_map[node]
    else:
        G.nodes[node]['description'] = "(no description)"

In [None]:
print(len(G.nodes), len(G.edges))

In [19]:
# Save graph
import pickle

with open("data/RadLex_graph.gpickle", "wb") as f:
    pickle.dump(G, f)

### TEST: Try to extract subgraph corresponding to codes from a sample report
Note to self: make sure that the differences between the output from ``graph_sample.html`` and ``with_pahts_to_RID1.html`` in the ``ontology_exploration.ipynb``code are ok.

In [8]:
import networkx as nx
import pickle

with open("data/RadLex_graph.gpickle", "rb") as f:
    G = pickle.load(f)

G

<networkx.classes.multidigraph.MultiDiGraph at 0x1171e8070>

In [9]:
print(len(G.nodes), len(G.edges))

46899 154615


In [10]:
codes = ['RID908', 'RID5619', 'RID5622', 'RID10404', 'RID10326', 'RID29991', 'RID5611', 'RID5576', 'RID1543']
root = 'RID1'

In [11]:
import networkx as nx
from pyvis.network import Network
from itertools import combinations

codes = ['RID908', 'RID5619', 'RID5622', 'RID10404', 'RID10326', 'RID29991', 'RID5611', 'RID5576', 'RID1543']
root = 'RID1'

# --- Step 1: Extract subgraph containing all paths between any pair of codes ---

nodes_to_include = set(codes)

# For every pair of codes, include all nodes on shortest paths between them (if paths exist)
for u, v in combinations(codes, 2):
    if nx.has_path(G, u, v):
        path_nodes = nx.shortest_path(G, u, v)
        nodes_to_include.update(path_nodes)
    if nx.has_path(G, v, u):
        path_nodes = nx.shortest_path(G, v, u)
        nodes_to_include.update(path_nodes)

H_sub = G.subgraph(nodes_to_include).copy()

# --- Step 2: Clean graph by removing self-loops and reciprocal edges ---

def simplify_graph_for_display(G):
    simplified = nx.DiGraph()
    seen_pairs = set()
    for u, v, d in G.edges(data=True):
        if u == v:
            continue  # remove self-loops
        pair = tuple(sorted((u, v)))
        if pair in seen_pairs:
            continue  # remove reciprocal edge (keep first encountered)
        seen_pairs.add(pair)
        simplified.add_edge(u, v, label=d.get('label'))
    return simplified

cleaned_H = simplify_graph_for_display(H_sub)

# --- Step 3: Add shortest paths from RID1 to each code (from full ontology G) ---

highlight_edges = set()

for code in codes:
    if nx.has_path(G, root, code):
        path = nx.shortest_path(G, root, code)
        for i in range(len(path) - 1):
            u, v = path[i], path[i+1]
            edge_data = G.get_edge_data(u, v)
            first_key = next(iter(edge_data))
            label = edge_data[first_key].get('label', '')
            cleaned_H.add_edge(u, v, label=label)
            highlight_edges.add((u, v))
        for node in path:
            cleaned_H.add_node(node)  # ensure all path nodes are included

# --- Step 4: Visualize with pyvis ---

net = Network(height="700px", width="100%", directed=True, notebook=True)

for node in cleaned_H.nodes():
    if node == root:
        color = 'orange'
        size = 30
    elif node in codes:
        color = 'skyblue'
        size = 25
    else:
        color = 'lightgray'
        size = 10
    label = node
    title = G.nodes[node].get('description', '')  # Tooltip on hover
    net.add_node(node, label=label, title=title, color=color, size=size)

for u, v, d in cleaned_H.edges(data=True):
    color = 'black' if (u, v) in highlight_edges else '#cccccc'
    net.add_edge(u, v, label=d.get('label', ''), color=color)

net.repulsion(node_distance=150, spring_length=200)

output_path = "data/output/graph_sample.html"
net.show(output_path)

# --- Step 5: Add legend to the saved HTML ---

# Read back the HTML file
with open(output_path, 'r', encoding='utf-8') as f:
    html = f.read()

# Build legend HTML snippet
legend_items = []
for code in codes:
    desc = G.nodes[code].get('description', '')
    legend_items.append(f"<li><b>{code}</b>: {desc}</li>")

legend_html = f"""
<div style="position: fixed; bottom: 20px; left: 20px; background: white; 
            border: 2px solid black; padding: 10px; max-width: 300px; 
            font-family: Arial, sans-serif; font-size: 12px; overflow-y: auto; max-height: 200px; z-index:9999;">
  <h4>Extracted RadLex codes</h4>
  <ul style="padding-left: 1em; margin: 0;">{''.join(legend_items)}</ul>
</div>
"""

# Inject legend just before closing </body> tag
html = html.replace("</body>", legend_html + "</body>")

# Write back modified HTML
with open(output_path, 'w', encoding='utf-8') as f:
    f.write(html)

data/output/graph_sample.html
