# ontology

> RDF ontology loading and meta-graph navigation for RLM

In [None]:
#| default_exp ontology

## Overview

This module implements Stage 1 of the trajectory: Define the Ontology "Context Model".

### Design Principles

- **Handles, not dumps**: Return graph handles with bounded view operations
- **Meta-graph scaffolding**: Build navigation indexes (labels, hierarchy, properties)
- **Progressive disclosure**: Small summaries guide exploration
- **RLM-compatible**: Works with namespace-explicit `rlm_run()`

### Context Model

From the trajectory document:
> The *root model never gets a graph dump*. It gets a handle name (e.g. `ont`, `res_0`) and uses bounded view operations.

## Imports

In [None]:
#| export
from rdflib import Graph, Namespace, RDF, RDFS, OWL
from pathlib import Path
from collections import Counter, defaultdict
from dataclasses import dataclass, field

## Graph Loading

In [None]:
#| export
def load_ontology(path: str | Path, ns: dict, name: str = 'ont') -> str:
    """Load an RDF ontology file into namespace as a Graph handle.
    
    Args:
        path: Path to ontology file (.ttl, .rdf, .owl)
        ns: Namespace dict where Graph will be stored
        name: Variable name for the Graph handle
        
    Returns:
        Summary string describing what was loaded
    """
    g = Graph()
    g.parse(path)
    ns[name] = g
    
    return f"Loaded {len(g)} triples from {Path(path).name} into '{name}'"

In [None]:
# Test loading prov.ttl
test_ns = {}
result = load_ontology('ontology/prov.ttl', test_ns, name='prov_ont')
print(result)
assert 'prov_ont' in test_ns
assert isinstance(test_ns['prov_ont'], Graph)
assert len(test_ns['prov_ont']) > 0
print(f"âœ“ Loaded {len(test_ns['prov_ont'])} triples")

## Meta-Graph Navigation

Build navigation scaffolding from a Graph to enable progressive disclosure.
This is what goes in the REPL environment, not the graph itself.

In [None]:
#| export
@dataclass
class GraphMeta:
    """Meta-graph navigation scaffolding for an RDF Graph.
    
    This is REPL-resident and provides bounded views over the graph.
    """
    graph: Graph
    name: str = 'ont'
    
    # Computed lazily
    _namespaces: dict = field(default=None, init=False, repr=False)
    _classes: list = field(default=None, init=False, repr=False)
    _properties: list = field(default=None, init=False, repr=False)
    _labels: dict = field(default=None, init=False, repr=False)
    
    @property
    def triple_count(self) -> int:
        """Total number of triples in graph."""
        return len(self.graph)
    
    @property
    def namespaces(self) -> dict:
        """Get namespace prefix bindings."""
        if self._namespaces is None:
            self._namespaces = {prefix: str(ns) for prefix, ns in self.graph.namespaces()}
        return self._namespaces
    
    @property
    def classes(self) -> list:
        """Get all OWL/RDFS classes (URIs only, sorted)."""
        if self._classes is None:
            # Find all subjects that are classes
            classes = set(
                self.graph.subjects(RDF.type, OWL.Class)
            ).union(
                self.graph.subjects(RDF.type, RDFS.Class)
            )
            self._classes = sorted([str(c) for c in classes])
        return self._classes
    
    @property
    def properties(self) -> list:
        """Get all properties (URIs only, sorted)."""
        if self._properties is None:
            props = set(
                self.graph.subjects(RDF.type, OWL.ObjectProperty)
            ).union(
                self.graph.subjects(RDF.type, OWL.DatatypeProperty)
            ).union(
                self.graph.subjects(RDF.type, OWL.AnnotationProperty)
            ).union(
                self.graph.subjects(RDF.type, RDF.Property)
            )
            self._properties = sorted([str(p) for p in props])
        return self._properties
    
    @property
    def labels(self) -> dict:
        """Get label index: URI -> label string."""
        if self._labels is None:
            self._labels = {}
            for s, o in self.graph.subject_objects(RDFS.label):
                self._labels[str(s)] = str(o)
        return self._labels
    
    def summary(self) -> str:
        """Generate a summary of the graph for display."""
        lines = [
            f"Graph '{self.name}': {self.triple_count:,} triples",
            f"Classes: {len(self.classes)}",
            f"Properties: {len(self.properties)}",
            f"Namespaces: {', '.join(self.namespaces.keys())}"
        ]
        return '\n'.join(lines)

In [None]:
# Test GraphMeta with prov ontology
prov_g = test_ns['prov_ont']
meta = GraphMeta(prov_g, name='prov')

print(meta.summary())
print()
print(f"Sample classes (first 5): {meta.classes[:5]}")
print(f"Sample properties (first 5): {meta.properties[:5]}")
print(f"Namespaces: {list(meta.namespaces.keys())}")

## Bounded View Functions

These operate on GraphMeta and return small, bounded summaries.

In [None]:
#| export
def graph_stats(meta: GraphMeta) -> str:
    """Get graph statistics summary."""
    return meta.summary()

In [None]:
#| export
def search_by_label(meta: GraphMeta, search: str, limit: int = 10) -> list:
    """Search for entities by label substring (case-insensitive).
    
    Args:
        meta: GraphMeta to search
        search: Substring to search for in labels
        limit: Maximum results to return
        
    Returns:
        List of (URI, label) tuples
    """
    search_lower = search.lower()
    matches = [
        (uri, label) 
        for uri, label in meta.labels.items()
        if search_lower in label.lower()
    ]
    return matches[:limit]

In [None]:
# Test search_by_label
results = search_by_label(meta, 'activity', limit=5)
print(f"Found {len(results)} matches for 'activity':")
for uri, label in results:
    print(f"  {label}: {uri}")

In [None]:
#| export
def describe_entity(meta: GraphMeta, uri: str, limit: int = 20) -> dict:
    """Get bounded description of an entity.
    
    Args:
        meta: GraphMeta containing the entity
        uri: URI of entity to describe
        limit: Max number of triples to include
        
    Returns:
        Dict with label, types, and sample triples
    """
    from rdflib import URIRef
    
    entity = URIRef(uri)
    
    # Get label
    label = meta.labels.get(uri, uri)
    
    # Get types
    types = [str(t) for t in meta.graph.objects(entity, RDF.type)]
    
    # Get sample of outgoing triples
    outgoing = []
    for p, o in list(meta.graph.predicate_objects(entity))[:limit]:
        outgoing.append((str(p), str(o)))
    
    # Get comment if available
    comments = list(meta.graph.objects(entity, RDFS.comment))
    comment = str(comments[0]) if comments else None
    
    return {
        'uri': uri,
        'label': label,
        'types': types,
        'comment': comment,
        'outgoing_sample': outgoing[:10]  # First 10 only
    }

In [None]:
# Test describe_entity
# Find the Activity class
activity_uri = 'http://www.w3.org/ns/prov#Activity'
desc = describe_entity(meta, activity_uri)

print(f"Label: {desc['label']}")
print(f"Types: {desc['types']}")
print(f"Comment: {desc['comment'][:100]}..." if desc['comment'] else "No comment")
print(f"Outgoing triples: {len(desc['outgoing_sample'])}")

## Integration with RLM

Helper to setup ontology context for `rlm_run()`.

In [None]:
#| export
def setup_ontology_context(path: str | Path, ns: dict, name: str = 'ont') -> str:
    """Load ontology and create meta-graph for RLM use.
    
    This sets up both the Graph and GraphMeta in the namespace.
    
    Args:
        path: Path to ontology file
        ns: Namespace dict
        name: Base name for graph handle
        
    Returns:
        Summary string
    """
    # Load graph
    load_msg = load_ontology(path, ns, name=name)
    
    # Create meta-graph
    g = ns[name]
    meta = GraphMeta(g, name=name)
    ns[f"{name}_meta"] = meta
    
    # Add helper functions bound to this meta
    from functools import partial
    ns['graph_stats'] = partial(graph_stats, meta)
    ns['search_by_label'] = partial(search_by_label, meta)
    ns['describe_entity'] = partial(describe_entity, meta)
    
    return f"{load_msg}\nCreated meta-graph '{name}_meta' with {len(meta.classes)} classes, {len(meta.properties)} properties"

In [None]:
# Test setup for RLM
test_ns = {}
result = setup_ontology_context('ontology/prov.ttl', test_ns, name='prov')
print(result)
print()
print("Namespace contains:")
for k in test_ns.keys():
    print(f"  {k}: {type(test_ns[k]).__name__}")

## Test with RLM

Now let's test asking a question about the PROV ontology using `rlm_run()`.

In [None]:
#| eval: false
from rlm.core import rlm_run

# Setup namespace with PROV ontology
ns = {}
setup_ontology_context('ontology/prov.ttl', ns, name='prov')

# Ask a question
# The context is the GraphMeta summary, not the full graph
context = ns['prov_meta'].summary()

answer, iterations, ns = rlm_run(
    "What is the Activity class in the PROV ontology?",
    context,
    ns=ns,
    max_iters=3
)

print(f"Answer: {answer}")
print(f"Iterations: {len(iterations)}")