diff --git a/flake.lock b/flake.lock index 994c735..c7a9ba8 100644 --- a/flake.lock +++ b/flake.lock @@ -23,17 +23,17 @@ "rust-analyzer-src": "rust-analyzer-src" }, "locked": { - "lastModified": 1767941162, - "narHash": "sha256-7qJDycrXto4xrQWHbj5BkrRWt/hcfZtjlCstEJTyfJ8=", + "lastModified": 1775029908, + "narHash": "sha256-QuPn+EN/097aBLeSqbQ7vOwc5TSOb68bAxg1+mknfmw=", "owner": "nix-community", "repo": "fenix", - "rev": "80b1a19a713e2558c411f3259fecb1edd4b5b327", + "rev": "380f1969f440e683333af5746caac76811b4a1a8", "type": "github" }, "original": { "owner": "nix-community", "repo": "fenix", - "rev": "80b1a19a713e2558c411f3259fecb1edd4b5b327", + "rev": "380f1969f440e683333af5746caac76811b4a1a8", "type": "github" } }, diff --git a/flake.nix b/flake.nix index fef7f44..758b6a0 100644 --- a/flake.nix +++ b/flake.nix @@ -5,7 +5,7 @@ crane.url = "github:ipetkov/crane"; - fenix.url = "github:nix-community/fenix/80b1a19a713e2558c411f3259fecb1edd4b5b327"; + fenix.url = "github:nix-community/fenix/380f1969f440e683333af5746caac76811b4a1a8"; fenix.inputs.nixpkgs.follows = "nixpkgs"; }; diff --git a/lib/maplib/src/model.rs b/lib/maplib/src/model.rs index d2eb8f4..c6f78a1 100644 --- a/lib/maplib/src/model.rs +++ b/lib/maplib/src/model.rs @@ -20,6 +20,7 @@ use std::fs; use std::io::Write; use std::path::Path; use std::sync::Arc; +use templates::as_rdf::templates_to_triples; use templates::ast::{ConstantTermOrList, PType, Template}; use templates::dataset::TemplateDataset; use templates::document::document_from_str; @@ -30,7 +31,7 @@ use triplestore::{IndexingOptions, NewTriples, Triplestore}; use chrontext::engine::{ChrontextSettings, Engine}; use datalog::ast::DatalogRuleset; -use representation::constants::{FX_PREFIX, FX_PREFIX_IRI, XYZ_PREFIX, XYZ_PREFIX_IRI}; +use representation::constants::{FX_PREFIX, FX_PREFIX_IRI, OTTR_TRIPLE, XYZ_PREFIX, XYZ_PREFIX_IRI}; use representation::dataset::NamedGraph; use representation::prefixes::get_default_prefixes; use tracing::instrument; @@ -338,6 +339,28 @@ impl Model { .map_err(MaplibError::TriplestoreError) } + /// The templates held by this model, excluding the built-in `ottr:Triple` primitive that + /// `TemplateDataset` injects as the base case every template expands to. + pub fn get_templates(&self) -> Vec<&Template> { + self.template_dataset + .templates + .iter() + .filter(|t| t.signature.iri.as_str() != OTTR_TRIPLE) + .collect() + } + + /// Materialize the model's OTTR templates into `graph` as RDF using the flattened maplib + /// template vocabulary (prefix `mtpl`). Triples are added alongside any existing content of + /// the graph. + #[instrument(skip_all)] + pub fn templates_to_graph(&mut self, graph: &NamedGraph) -> Result<(), MaplibError> { + let triples = templates_to_triples(&self.template_dataset.templates); + self.triplestore + .add_triples(triples, graph, false) + .map_err(MaplibError::TriplestoreError)?; + Ok(()) + } + #[instrument(skip_all)] pub fn query( &mut self, diff --git a/lib/representation/src/constants.rs b/lib/representation/src/constants.rs index f9fd796..6966ce7 100644 --- a/lib/representation/src/constants.rs +++ b/lib/representation/src/constants.rs @@ -20,6 +20,10 @@ pub const DEFAULT_PREFIX_IRI: &str = "urn:maplib_default:"; pub const OTTR_IRI: &str = "http://ns.ottr.xyz/0.4/IRI"; pub const OTTR_BLANK_NODE: &str = "http://ns.ottr.xyz/0.4/BlankNode"; +// Flattened maplib vocabulary used to expose OTTR templates as a queryable named graph. +pub const MAPLIB_TEMPLATE_PREFIX: &str = "mtpl"; +pub const MAPLIB_TEMPLATE_PREFIX_IRI: &str = "https://datatreehouse.github.io/maplib/vocab#"; + pub const OWL_PREFIX: &str = "owl"; pub const OWL_PREFIX_IRI: &str = "http://www.w3.org/2002/07/owl#"; diff --git a/lib/representation/src/prefixes.rs b/lib/representation/src/prefixes.rs index 05a4495..b9d3832 100644 --- a/lib/representation/src/prefixes.rs +++ b/lib/representation/src/prefixes.rs @@ -1,7 +1,8 @@ use crate::constants::{ - DEFAULT_PREFIX, DEFAULT_PREFIX_IRI, FOAF_PREFIX, FOAF_PREFIX_IRI, OTTR_PREFIX, OTTR_PREFIX_IRI, - OWL_PREFIX, OWL_PREFIX_IRI, RDFS_PREFIX, RDFS_PREFIX_IRI, RDF_PREFIX, RDF_PREFIX_IRI, - SHACL_PREFIX, SHACL_PREFIX_IRI, XSD_PREFIX, XSD_PREFIX_IRI, + DEFAULT_PREFIX, DEFAULT_PREFIX_IRI, FOAF_PREFIX, FOAF_PREFIX_IRI, MAPLIB_TEMPLATE_PREFIX, + MAPLIB_TEMPLATE_PREFIX_IRI, OTTR_PREFIX, OTTR_PREFIX_IRI, OWL_PREFIX, OWL_PREFIX_IRI, + RDFS_PREFIX, RDFS_PREFIX_IRI, RDF_PREFIX, RDF_PREFIX_IRI, SHACL_PREFIX, SHACL_PREFIX_IRI, + XSD_PREFIX, XSD_PREFIX_IRI, }; use oxrdf::NamedNode; use std::collections::HashMap; @@ -16,6 +17,7 @@ pub fn get_default_prefixes() -> HashMap { (SHACL_PREFIX, SHACL_PREFIX_IRI), (DEFAULT_PREFIX, DEFAULT_PREFIX_IRI), (FOAF_PREFIX, FOAF_PREFIX_IRI), + (MAPLIB_TEMPLATE_PREFIX, MAPLIB_TEMPLATE_PREFIX_IRI), ]; HashMap::from_iter( predefined diff --git a/lib/templates/src/as_rdf.rs b/lib/templates/src/as_rdf.rs new file mode 100644 index 0000000..7702788 --- /dev/null +++ b/lib/templates/src/as_rdf.rs @@ -0,0 +1,380 @@ +//! Serialize OTTR templates into a flattened, SPARQL-friendly RDF representation. +//! +//! This is not the standard OTTR RDF (rOTTR) list encoding but a denormalized vocabulary +//! (prefix `mtpl`, base `https://datatreehouse.github.io/maplib/vocab#`) so that template +//! structure and inter-template/IRI relationships can be queried with one-hop SPARQL and used +//! to derive SHACL shapes. + +use crate::ast::{ + ConstantTerm, ConstantTermOrList, Instance, PType, Parameter, StottrTerm, Template, +}; +use oxrdf::vocab::{rdf, xsd}; +use oxrdf::{BlankNode, Literal, NamedNode, NamedOrBlankNode, Term, Triple}; +use representation::constants::{MAPLIB_TEMPLATE_PREFIX_IRI, OTTR_TRIPLE}; +use std::collections::HashSet; + +/// Build a `NamedNode` in the maplib template vocabulary. +fn vocab(local: &str) -> NamedNode { + NamedNode::new_unchecked(format!("{MAPLIB_TEMPLATE_PREFIX_IRI}{local}")) +} + +fn bool_lit(b: bool) -> Literal { + Literal::new_typed_literal(if b { "true" } else { "false" }, xsd::BOOLEAN.into_owned()) +} + +fn int_lit(i: usize) -> Literal { + Literal::new_typed_literal(i.to_string(), xsd::INTEGER.into_owned()) +} + +fn string_lit(s: &str) -> Literal { + Literal::new_simple_literal(s) +} + +fn fresh_bnode(counter: &mut usize) -> BlankNode { + let b = BlankNode::new_unchecked(format!("mtpl{counter}")); + *counter += 1; + b +} + +fn constant_term_to_term(ct: &ConstantTerm) -> Option { + match ct { + ConstantTerm::Iri(i) => Some(Term::NamedNode(i.clone())), + ConstantTerm::BlankNode(b) => Some(Term::BlankNode(b.clone())), + ConstantTerm::Literal(l) => Some(Term::Literal(l.clone())), + ConstantTerm::None => None, + } +} + +/// Reduce a (possibly nested) `PType` to (innermost base type, outermost cardinality, lub?). +/// Nested lists collapse to the outermost cardinality; `None` types yield `None`. +fn decompose_ptype(p: &PType) -> Option<(NamedNode, &'static str, bool)> { + match p { + PType::None => None, + PType::Basic(nn) => Some((nn.clone(), "single", false)), + PType::Lub(inner) => decompose_ptype(inner).map(|(b, c, _)| (b, c, true)), + PType::List(inner) => decompose_ptype(inner).map(|(b, _, l)| (b, "list", l)), + PType::NEList(inner) => decompose_ptype(inner).map(|(b, _, l)| (b, "nelist", l)), + } +} + +/// The predicate IRI of an `ottr:Triple` instance, if its predicate argument (index 1) +/// is a constant IRI. +fn ottr_triple_predicate_iri(inst: &Instance) -> Option { + let arg = inst.argument_list.get(1)?; + if let StottrTerm::ConstantTerm(ConstantTermOrList::ConstantTerm(ConstantTerm::Iri(i))) = + &arg.term + { + Some(i.clone()) + } else { + None + } +} + +/// Serialize a slice of templates to triples in the flattened maplib vocabulary. +/// The built-in `ottr:Triple` template is skipped. +pub fn templates_to_triples(templates: &[Template]) -> Vec { + let mut triples = vec![]; + let mut counter = 0usize; + for t in templates { + if t.signature.iri.as_str() == OTTR_TRIPLE { + continue; + } + template_to_triples(t, &mut triples, &mut counter); + } + triples +} + +fn template_to_triples(t: &Template, triples: &mut Vec, counter: &mut usize) { + let subj = NamedOrBlankNode::NamedNode(t.signature.iri.clone()); + triples.push(Triple::new( + subj.clone(), + rdf::TYPE.into_owned(), + vocab("Template"), + )); + + for (idx, p) in t.signature.parameter_list.iter().enumerate() { + let pb = fresh_bnode(counter); + triples.push(Triple::new(subj.clone(), vocab("hasParameter"), pb.clone())); + parameter_to_triples(p, idx, &pb, triples); + } + + for (idx, inst) in t.pattern_list.iter().enumerate() { + let ib = fresh_bnode(counter); + triples.push(Triple::new(subj.clone(), vocab("hasInstance"), ib.clone())); + instance_to_triples(inst, idx, &ib, triples, counter); + if inst.template_iri.as_str() == OTTR_TRIPLE { + if let Some(pred) = ottr_triple_predicate_iri(inst) { + triples.push(Triple::new(subj.clone(), vocab("usesPredicate"), pred)); + } + } + } + + // Every distinct IRI referenced by the template, excluding the template's own IRI. + let mut nns = vec![]; + t.find_named_nodes(&mut nns); + let mut seen = HashSet::new(); + for nn in nns { + if nn.as_str() == t.signature.iri.as_str() { + continue; + } + if seen.insert(nn.as_str().to_string()) { + triples.push(Triple::new(subj.clone(), vocab("referencesIri"), nn)); + } + } +} + +fn parameter_to_triples(p: &Parameter, idx: usize, pb: &BlankNode, triples: &mut Vec) { + let s = NamedOrBlankNode::BlankNode(pb.clone()); + triples.push(Triple::new(s.clone(), rdf::TYPE.into_owned(), vocab("Parameter"))); + triples.push(Triple::new(s.clone(), vocab("index"), int_lit(idx))); + triples.push(Triple::new( + s.clone(), + vocab("variableName"), + string_lit(p.variable.as_str()), + )); + triples.push(Triple::new(s.clone(), vocab("optional"), bool_lit(p.optional))); + triples.push(Triple::new(s.clone(), vocab("nonBlank"), bool_lit(p.non_blank))); + if let Some(pt) = &p.ptype { + if let Some((base, card, lub)) = decompose_ptype(pt) { + triples.push(Triple::new(s.clone(), vocab("type"), base)); + triples.push(Triple::new(s.clone(), vocab("cardinality"), string_lit(card))); + triples.push(Triple::new(s.clone(), vocab("lub"), bool_lit(lub))); + } + } + if let Some(ConstantTermOrList::ConstantTerm(ct)) = &p.default_value { + if let Some(term) = constant_term_to_term(ct) { + triples.push(Triple::new(s.clone(), vocab("defaultValue"), term)); + } + } +} + +fn instance_to_triples( + inst: &Instance, + idx: usize, + ib: &BlankNode, + triples: &mut Vec, + counter: &mut usize, +) { + let s = NamedOrBlankNode::BlankNode(ib.clone()); + triples.push(Triple::new(s.clone(), rdf::TYPE.into_owned(), vocab("Instance"))); + triples.push(Triple::new(s.clone(), vocab("index"), int_lit(idx))); + triples.push(Triple::new( + s.clone(), + vocab("callsTemplate"), + inst.template_iri.clone(), + )); + if let Some(le) = &inst.list_expander { + triples.push(Triple::new( + s.clone(), + vocab("listExpander"), + string_lit(&le.to_string()), + )); + } + for (aidx, arg) in inst.argument_list.iter().enumerate() { + let ab = fresh_bnode(counter); + triples.push(Triple::new(s.clone(), vocab("hasArgument"), ab.clone())); + let sa = NamedOrBlankNode::BlankNode(ab); + triples.push(Triple::new( + sa.clone(), + rdf::TYPE.into_owned(), + vocab("Argument"), + )); + triples.push(Triple::new(sa.clone(), vocab("index"), int_lit(aidx))); + triples.push(Triple::new( + sa.clone(), + vocab("listExpand"), + bool_lit(arg.list_expand), + )); + match &arg.term { + StottrTerm::Variable(v) => { + triples.push(Triple::new( + sa.clone(), + vocab("variableName"), + string_lit(v.as_str()), + )); + } + StottrTerm::ConstantTerm(ConstantTermOrList::ConstantTerm(ct)) => { + if let Some(term) = constant_term_to_term(ct) { + triples.push(Triple::new(sa.clone(), vocab("constantValue"), term)); + } + } + StottrTerm::ConstantTerm(ConstantTermOrList::ConstantList(_)) | StottrTerm::List(_) => { + triples.push(Triple::new(sa.clone(), vocab("isList"), bool_lit(true))); + } + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::dataset::TemplateDataset; + use crate::document::document_from_str; + use representation::prefixes::get_default_prefixes; + + fn build(s: &str) -> Vec { + let doc = document_from_str(s, Some(&get_default_prefixes())).unwrap(); + let ds = TemplateDataset::from_documents(vec![doc]).unwrap(); + templates_to_triples(&ds.templates) + } + + fn subject_str(s: &NamedOrBlankNode) -> String { + match s { + NamedOrBlankNode::NamedNode(n) => n.as_str().to_string(), + NamedOrBlankNode::BlankNode(b) => format!("_:{}", b.as_str()), + #[allow(unreachable_patterns)] + _ => unreachable!(), + } + } + + fn term_str(t: &Term) -> String { + match t { + Term::NamedNode(n) => n.as_str().to_string(), + Term::BlankNode(b) => format!("_:{}", b.as_str()), + Term::Literal(l) => l.value().to_string(), + #[allow(unreachable_patterns)] + _ => unreachable!(), + } + } + + fn pred(local: &str) -> String { + format!("{MAPLIB_TEMPLATE_PREFIX_IRI}{local}") + } + + /// Objects of (subject, mtpl:local) triples. + fn objs(ts: &[Triple], subj: &str, local: &str) -> Vec { + let p = pred(local); + ts.iter() + .filter(|t| subject_str(&t.subject) == subj && t.predicate.as_str() == p) + .map(|t| term_str(&t.object)) + .collect() + } + + /// All blank-node subjects reachable from `subj` via mtpl:local. + fn child_bnodes(ts: &[Triple], subj: &str, local: &str) -> Vec { + objs(ts, subj, local) + } + + const PERSON: &str = "\ +@prefix ex: . +ex:Person [ ottr:IRI ?p, xsd:string ?name ] :: { + ottr:Triple(?p, rdf:type, ex:Person) , + ottr:Triple(?p, ex:hasName, ?name) +} ."; + + #[test] + fn template_is_typed_and_skips_ottr_triple() { + let ts = build(PERSON); + // Person is typed mtpl:Template + let types: Vec<_> = ts + .iter() + .filter(|t| { + subject_str(&t.subject) == "http://example.org/Person" + && t.predicate.as_str() == rdf::TYPE.as_str() + }) + .map(|t| term_str(&t.object)) + .collect(); + assert_eq!(types, vec![pred("Template")]); + // ottr:Triple builtin is never emitted as a subject + assert!(!ts.iter().any(|t| subject_str(&t.subject) == OTTR_TRIPLE)); + } + + #[test] + fn parameters_have_index_type_and_flags() { + let ts = build(PERSON); + let params = child_bnodes(&ts, "http://example.org/Person", "hasParameter"); + assert_eq!(params.len(), 2); + + // Find the parameter named "name" and check its type is xsd:string, single, non-lub. + let name_param = params + .iter() + .find(|b| objs(&ts, b, "variableName") == vec!["name".to_string()]) + .expect("name parameter"); + assert_eq!( + objs(&ts, name_param, "type"), + vec!["http://www.w3.org/2001/XMLSchema#string".to_string()] + ); + assert_eq!(objs(&ts, name_param, "cardinality"), vec!["single".to_string()]); + assert_eq!(objs(&ts, name_param, "optional"), vec!["false".to_string()]); + + let p_param = params + .iter() + .find(|b| objs(&ts, b, "variableName") == vec!["p".to_string()]) + .expect("p parameter"); + assert_eq!( + objs(&ts, p_param, "type"), + vec!["http://ns.ottr.xyz/0.4/IRI".to_string()] + ); + } + + #[test] + fn instances_call_ottr_triple() { + let ts = build(PERSON); + let instances = child_bnodes(&ts, "http://example.org/Person", "hasInstance"); + assert_eq!(instances.len(), 2); + for i in &instances { + assert_eq!(objs(&ts, i, "callsTemplate"), vec![OTTR_TRIPLE.to_string()]); + } + } + + #[test] + fn uses_predicate_denormalized() { + let ts = build(PERSON); + let mut preds = objs(&ts, "http://example.org/Person", "usesPredicate"); + preds.sort(); + assert_eq!( + preds, + vec![ + "http://example.org/hasName".to_string(), + "http://www.w3.org/1999/02/22-rdf-syntax-ns#type".to_string(), + ] + ); + } + + #[test] + fn references_iri_excludes_self() { + let ts = build(PERSON); + let refs = objs(&ts, "http://example.org/Person", "referencesIri"); + assert!(refs.contains(&"http://example.org/hasName".to_string())); + // self is excluded + assert!(!refs.contains(&"http://example.org/Person".to_string())); + } + + #[test] + fn list_expander_default_and_cardinality() { + let s = "\ +@prefix ex: . +ex:Tagged [ ? ! ottr:IRI ?c = ex:Default , NEList ?xs ] :: { + cross | ottr:Triple(?c, ex:tag, ++ ?xs) +} ."; + let ts = build(s); + let params = child_bnodes(&ts, "http://example.org/Tagged", "hasParameter"); + + let c = params + .iter() + .find(|b| objs(&ts, b, "variableName") == vec!["c".to_string()]) + .unwrap(); + assert_eq!(objs(&ts, c, "optional"), vec!["true".to_string()]); + assert_eq!(objs(&ts, c, "nonBlank"), vec!["true".to_string()]); + assert_eq!( + objs(&ts, c, "defaultValue"), + vec!["http://example.org/Default".to_string()] + ); + + let xs = params + .iter() + .find(|b| objs(&ts, b, "variableName") == vec!["xs".to_string()]) + .unwrap(); + assert_eq!(objs(&ts, xs, "cardinality"), vec!["nelist".to_string()]); + + // instance carries the list expander, and the ++ argument is flagged + let instance = &child_bnodes(&ts, "http://example.org/Tagged", "hasInstance")[0]; + assert_eq!(objs(&ts, instance, "listExpander"), vec!["cross".to_string()]); + let args = child_bnodes(&ts, instance, "hasArgument"); + let expanded: Vec<_> = args + .iter() + .filter(|a| objs(&ts, a, "listExpand") == vec!["true".to_string()]) + .collect(); + assert_eq!(expanded.len(), 1); + } +} diff --git a/lib/templates/src/lib.rs b/lib/templates/src/lib.rs index 819f460..949e65d 100644 --- a/lib/templates/src/lib.rs +++ b/lib/templates/src/lib.rs @@ -1,6 +1,7 @@ use crate::ast::PType; use representation::RDFNodeState; +pub mod as_rdf; pub mod ast; mod compatible; pub mod dataset; diff --git a/lib/triplestore/src/triples_read.rs b/lib/triplestore/src/triples_read.rs index 5503b31..ffb3036 100644 --- a/lib/triplestore/src/triples_read.rs +++ b/lib/triplestore/src/triples_read.rs @@ -1,6 +1,6 @@ use super::Triplestore; use crate::errors::TriplestoreError; -use crate::TriplesToAdd; +use crate::{NewTriples, TriplesToAdd}; use std::cmp; use cimxml_import::{fix_cim_quad, Remapper}; @@ -388,6 +388,76 @@ impl Triplestore { } Ok(()) } + + // Insert already-parsed in-memory triples into a graph, grouping them by predicate and + // subject/object type into the same column representation the RDF readers produce. This + // avoids serializing to an RDF string and parsing it back when the triples are already + // available as oxrdf Triples (e.g. when materializing templates). + #[instrument(skip_all)] + pub fn add_triples( + &mut self, + triples: Vec, + graph: &NamedGraph, + transient: bool, + ) -> Result, TriplestoreError> { + let mut subj_type_map: HashMap = HashMap::new(); + let mut obj_type_map: HashMap = HashMap::new(); + let mut predicate_map: PredMap = HashMap::new(); + for Triple { + subject, + predicate, + object, + } in triples + { + let subject_type = + get_or_insert_dt(get_subject_datatype_ref(&subject), &mut subj_type_map); + let object_type = get_or_insert_dt(get_term_datatype_ref(&object), &mut obj_type_map); + let (subjects, objects) = predicate_map + .entry(predicate.as_str().to_string()) + .or_default() + .entry(subject_type.clone()) + .or_default() + .entry(object_type.clone()) + .or_insert_with(|| { + ( + SeriesBuilder::new(&subject_type), + SeriesBuilder::new(&object_type), + ) + }); + if objects.parse_term(&object).is_ok() { + subjects.push_named_or_blank(&subject); + } + } + + let mut ttas = vec![]; + for (predicate, by_subject) in predicate_map { + let predicate = NamedNode::new_unchecked(predicate); + for (subject_type, by_object) in by_subject { + for (object_type, (subjects, objects)) in by_object { + let l = subjects.len(); + let df = DataFrame::new( + l, + vec![ + subjects.into_series(SUBJECT_COL_NAME).into_column(), + objects.into_series(OBJECT_COL_NAME).into_column(), + ], + ) + .unwrap(); + ttas.push(TriplesToAdd { + df, + subject_cat_state: subject_type.default_input_cat_state(), + object_cat_state: object_type.default_input_cat_state(), + subject_type: subject_type.clone(), + object_type, + predicate: Some(predicate.clone()), + graph: graph.clone(), + predicate_cat_state: None, + }); + } + } + } + self.add_triples_vec(ttas, transient) + } } fn term_to_oxrdf_term(t: Term, parser_call: &str) -> Term { diff --git a/py_maplib/maplib/__init__.pyi b/py_maplib/maplib/__init__.pyi index 2db74f6..213463a 100644 --- a/py_maplib/maplib/__init__.pyi +++ b/py_maplib/maplib/__init__.pyi @@ -524,6 +524,38 @@ class Model: :return: """ + def get_templates(self) -> List["Template"]: + """ + Return the OTTR templates currently held by the model (whether added as stOTTR or + programmatically). The built-in ``ottr:Triple`` primitive is not included. + + Usage: + >>> for t in m.get_templates(): + ... print(t) + + :return: A list of Template objects. + """ + + def templates_to_graph(self, graph: str = None) -> None: + """ + Materialize the model's OTTR templates into a named graph as RDF, using the flattened + maplib template vocabulary (prefix ``mtpl``, base + ``https://datatreehouse.github.io/maplib/vocab#``). This lets template structure and + the interconnectedness of IRIs across templates be inspected with ordinary SPARQL, and + used to derive SHACL shapes. The triples are added alongside any existing content of + the target graph (the graph is not replaced). + + Usage: + >>> m.templates_to_graph("https://example.org/templates") + >>> m.query(''' + ... PREFIX mtpl: + ... SELECT ?template ?iri WHERE { + ... GRAPH { ?template mtpl:referencesIri ?iri } + ... }''') + + :param graph: The IRI of the graph to add the template triples to. Defaults to the default graph. + """ + def add_prefixes(self, prefixes: Dict[str, str]): """ Add prefixes that will be used in parsing of SPARQL, Datalog and OTTR. diff --git a/py_maplib/src/lib.rs b/py_maplib/src/lib.rs index c503739..63cdc6a 100644 --- a/py_maplib/src/lib.rs +++ b/py_maplib/src/lib.rs @@ -813,6 +813,35 @@ impl PyModel { }) } + #[instrument(skip_all)] + fn get_templates(&self, py: Python<'_>) -> PyResult> { + py.detach(|| { + let inner = self.inner.lock().unwrap(); + let templates = inner + .get_templates() + .into_iter() + .map(|t| PyTemplate { + template: t.clone(), + }) + .collect(); + Ok(templates) + }) + } + + #[pyo3(signature = (graph=None))] + #[instrument(skip_all)] + fn templates_to_graph(&self, py: Python<'_>, graph: Option) -> PyResult<()> { + py.detach(move || { + let mut inner = self.inner.lock().unwrap(); + let graph = parse_optional_named_node(graph)?; + let named_graph = NamedGraph::from_maybe_named_node(graph.as_ref()); + inner + .templates_to_graph(&named_graph) + .map_err(PyMaplibError::from)?; + Ok(()) + }) + } + #[pyo3(signature = (file_path, format=None, graph=None, prefixes=None))] #[instrument(skip_all)] fn write( diff --git a/py_maplib/tests/test_templates_to_graph.py b/py_maplib/tests/test_templates_to_graph.py new file mode 100644 index 0000000..f8dc1b3 --- /dev/null +++ b/py_maplib/tests/test_templates_to_graph.py @@ -0,0 +1,248 @@ +import polars as pl +from polars.testing import assert_frame_equal +from maplib import Model + +MTPL = "https://datatreehouse.github.io/maplib/vocab#" +GRAPH = "https://example.org/templates" + +PERSON = """ +@prefix ex: . +@prefix ottr: . +@prefix xsd: . + +ex:Person [ ottr:IRI ?p, xsd:string ?name ] :: { + ottr:Triple(?p, rdf:type, ex:Person) , + ottr:Triple(?p, ex:hasName, ?name) +} . +""" + +WIDGET = """ +@prefix ex: . +@prefix ottr: . +@prefix xsd: . + +ex:Person [ ottr:IRI ?p ] :: { + ottr:Triple(?p, rdf:type, ex:Person) +} . + +ex:Widget [ ottr:IRI ?w, xsd:string ?label, ? xsd:integer ?weight, List ?tags, ottr:IRI ?owner ] :: { + ottr:Triple(?w, rdf:type, ex:Widget) , + ottr:Triple(?w, ex:label, ?label) , + ottr:Triple(?w, ex:weight, ?weight) , + ottr:Triple(?w, ex:ownedBy, ?owner) , + ex:Person(?owner) , + cross | ottr:Triple(?w, ex:tag, ++ ?tags) +} . +""" + + +def test_get_templates_excludes_ottr_triple(): + m = Model() + m.add_template(PERSON) + templates = m.get_templates() + assert len(templates) == 1 + assert templates[0].iri.iri == "http://example.net/ns#Person" + + +def test_templates_to_graph_emits_vocab(): + m = Model() + m.add_template(PERSON) + m.templates_to_graph(GRAPH) + + types = m.query( + f""" + PREFIX mtpl: <{MTPL}> + SELECT ?t WHERE {{ + GRAPH <{GRAPH}> {{ ?t a mtpl:Template }} + }} + """ + ) + assert types.height == 1 + assert "Person" in types["t"][0] + + +def test_templates_to_graph_references_iri(): + m = Model() + m.add_template(PERSON) + m.templates_to_graph(GRAPH) + + refs = m.query( + f""" + PREFIX mtpl: <{MTPL}> + PREFIX ex: + SELECT ?iri WHERE {{ + GRAPH <{GRAPH}> {{ ex:Person mtpl:referencesIri ?iri }} + }} + ORDER BY ?iri + """ + ) + iris = [v for v in refs["iri"]] + assert "" in iris + assert "" not in iris + + +def test_templates_to_graph_uses_predicate(): + m = Model() + m.add_template(PERSON) + m.templates_to_graph(GRAPH) + + preds = m.query( + f""" + PREFIX mtpl: <{MTPL}> + PREFIX ex: + SELECT ?p WHERE {{ + GRAPH <{GRAPH}> {{ ex:Person mtpl:usesPredicate ?p }} + }} + ORDER BY ?p + """ + ) + vals = [v for v in preds["p"]] + assert "" in vals + assert "" in vals + + +def test_templates_to_graph_added_alongside_data(): + m = Model() + m.add_template(PERSON) + + df = pl.DataFrame( + { + "p": ["http://example.net/ns#alice"], + "name": ["Alice"], + } + ) + m.map("ex:Person", df) + m.templates_to_graph(GRAPH) + + data = m.query( + """ + PREFIX ex: + SELECT ?name WHERE { ?p ex:hasName ?name } + """ + ) + assert data.height == 1 + assert data["name"][0] == "Alice" + + meta = m.query( + f""" + PREFIX mtpl: <{MTPL}> + SELECT ?t WHERE {{ GRAPH <{GRAPH}> {{ ?t a mtpl:Template }} }} + """ + ) + assert meta.height == 1 + + +def test_infer_nodeshape_from_template_graph(): + m = Model() + m.add_template(WIDGET) + m.templates_to_graph(GRAPH) + + shapes_graph = "https://example.org/shapes" + m.insert( + f""" + PREFIX mtpl: <{MTPL}> + PREFIX sh: + PREFIX rdf: + PREFIX ottr: + CONSTRUCT {{ + ?template a sh:NodeShape ; + sh:targetClass ?class ; + sh:property _:prop . + _:prop sh:path ?p ; + sh:datatype ?datatype ; + sh:nodeKind ?nodeKind ; + sh:class ?objectClass ; + sh:minCount ?minCount ; + sh:maxCount ?maxCount . + }} WHERE {{ + GRAPH <{GRAPH}> {{ + ?template a mtpl:Template ; + mtpl:hasInstance ?typeInst , ?inst ; + mtpl:hasParameter ?param . + ?typeInst mtpl:hasArgument ?tpArg , ?tcArg . + ?tpArg mtpl:index 1 ; mtpl:constantValue rdf:type . + ?tcArg mtpl:index 2 ; mtpl:constantValue ?class . + ?inst mtpl:callsTemplate ottr:Triple ; + mtpl:hasArgument ?predArg , ?objArg . + ?predArg mtpl:index 1 ; mtpl:constantValue ?p . + FILTER(?p != rdf:type) + ?objArg mtpl:index 2 ; mtpl:variableName ?var . + ?param mtpl:variableName ?var ; + mtpl:type ?ptype ; + mtpl:cardinality ?card ; + mtpl:optional ?optional . + BIND(IF(?optional || ?card = "list", 0, 1) AS ?minCount) + OPTIONAL {{ + FILTER(?card = "single") + BIND(1 AS ?maxCount) + }} + OPTIONAL {{ + FILTER(?ptype != ottr:IRI) + BIND(?ptype AS ?datatype) + }} + OPTIONAL {{ + FILTER(?ptype = ottr:IRI) + BIND(sh:IRI AS ?nodeKind) + }} + OPTIONAL {{ + ?template mtpl:hasInstance ?callInst . + ?callInst mtpl:callsTemplate ?callee ; + mtpl:hasArgument ?callArg . + FILTER(?callee != ottr:Triple) + ?callArg mtpl:variableName ?var . + ?callee mtpl:hasInstance ?calleeType . + ?calleeType mtpl:hasArgument ?ctPredArg , ?ctClassArg . + ?ctPredArg mtpl:index 1 ; mtpl:constantValue rdf:type . + ?ctClassArg mtpl:index 2 ; mtpl:constantValue ?objectClass . + }} + }} + }} + """, + target_graph=shapes_graph, + ) + + shapes = m.query( + f""" + PREFIX sh: + SELECT ?target ?path ?datatype ?nodeKind ?objectClass ?minCount ?maxCount WHERE {{ + GRAPH <{shapes_graph}> {{ + ?shape a sh:NodeShape ; + sh:targetClass ?target ; + sh:property ?prop . + ?prop sh:path ?path ; + sh:minCount ?minCount . + OPTIONAL {{ ?prop sh:datatype ?datatype }} + OPTIONAL {{ ?prop sh:nodeKind ?nodeKind }} + OPTIONAL {{ ?prop sh:class ?objectClass }} + OPTIONAL {{ ?prop sh:maxCount ?maxCount }} + }} + }} + ORDER BY ?path + """ + ) + xsd_string = "" + xsd_integer = "" + sh_iri = "" + ex = lambda local: f"" + + expected = pl.DataFrame( + { + "target": [ex("Widget")] * 4, + "path": [ex("label"), ex("ownedBy"), ex("tag"), ex("weight")], + "datatype": [xsd_string, None, xsd_string, xsd_integer], + "nodeKind": [None, sh_iri, None, None], + "objectClass": [None, ex("Person"), None, None], + "minCount": [1, 1, 0, 0], + "maxCount": [1, 1, None, 1], + }, + schema={ + "target": pl.String, + "path": pl.String, + "datatype": pl.String, + "nodeKind": pl.String, + "objectClass": pl.String, + "minCount": pl.Int64, + "maxCount": pl.Int64, + }, + ) + assert_frame_equal(shapes, expected)