Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SDO HTTPS and DN creator script #1485

Merged
merged 4 commits into from
Dec 7, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
3,949 changes: 2,512 additions & 1,437 deletions rdflib/namespace/_SDO.py

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion rdflib/namespace/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -419,7 +419,7 @@ def compute_qname(self, uri, generate=True):
pl_namespace = get_longest_namespace(self.__strie[namespace], uri)
if pl_namespace is not None:
namespace = pl_namespace
name = uri[len(namespace) :]
name = uri[len(namespace):]

namespace = URIRef(namespace)
prefix = self.store.prefix(namespace) # warning multiple prefixes problem
Expand Down
186 changes: 186 additions & 0 deletions rdflib/tools/defined_namespace_creator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,186 @@
"""
This rdflib Python script creates a DefinedNamespace Python file from a given RDF file

It is a very simple script: it finds all things defined in the RDF file within a given
namespace:

<thing> a ?x

where ?x is anything and <thing> starts with the given namespace

Nicholas J. Car, Dec, 2021
"""
import sys
from pathlib import Path
import argparse
import datetime

sys.path.append(str(Path(__file__).parent.absolute().parent.parent))

from rdflib import Graph
from rdflib.namespace import DCTERMS, OWL, RDFS, SKOS
from rdflib.util import guess_format


def validate_namespace(namespace):
if not namespace.endswith(("/", "#")):
raise ValueError("The supplied namespace must end with '/' or '#'")


def validate_object_id(object_id):
for c in object_id:
if not c.isupper():
raise ValueError("The supplied object_id must be an all-capitals string")


# This function is not used: it was originally written to get classes and to be used
# alongside a method to get properties, but then it was decided that a single function
# to get everything in the namespace, get_target_namespace_elements(), was both simper
# and better covered all namespace elements, so that function is used instead.
#
# def get_classes(g, target_namespace):
# namespaces = {"dcterms": DCTERMS, "owl": OWL, "rdfs": RDFS, "skos": SKOS}
# q = """
# SELECT DISTINCT ?x ?def
# WHERE {
# # anything that is an instance of owl:Class or rdfs:Class
# # or any subclass of them
# VALUES ?c { owl:Class rdfs:Class }
# ?x rdfs:subClassOf*/a ?c .
#
# # get any definitions, if they have one
# OPTIONAL {
# ?x rdfs:comment|dcterms:description|skos:definition ?def
# }
#
# # only get results for the targetted namespace (supplied by user)
# FILTER STRSTARTS(STR(?x), "xxx")
# }
# """.replace("xxx", target_namespace)
# classes = []
# for r in g.query(q, initNs=namespaces):
# classes.append((str(r[0]), str(r[1])))
#
# classes.sort(key=lambda tup: tup[1])
#
# return classes


def get_target_namespace_elements(g, target_namespace):
namespaces = {"dcterms": DCTERMS, "owl": OWL, "rdfs": RDFS, "skos": SKOS}
q = """
SELECT DISTINCT ?s ?def
WHERE {
# all things in the RDF data (anything RDF.type...)
?s a ?o .

# get any definitions, if they have one
OPTIONAL {
?s dcterms:description|rdfs:comment|skos:definition ?def
}

# only get results for the target namespace (supplied by user)
FILTER STRSTARTS(STR(?s), "xxx")
}
""".replace("xxx", target_namespace)
elements = []
for r in g.query(q, initNs=namespaces):
elements.append((str(r[0]), str(r[1])))

elements.sort(key=lambda tup: tup[0])

elements_strs = []
for e in elements:
desc = e[1].replace('\n', ' ')
elements_strs.append(f" {e[0].replace(args.target_namespace, '')}: URIRef # {desc}\n")

return elements, elements_strs


def make_dn_file(output_file_name, target_namespace, elements_strs, object_id, fail):
header = f'''from rdflib.term import URIRef
from rdflib.namespace import DefinedNamespace, Namespace


class {object_id}(DefinedNamespace):
"""
DESCRIPTION_EDIT_ME_!

Generated from: SOURCE_RDF_FILE_EDIT_ME_!
Date: {datetime.datetime.utcnow()}
"""
'''
with open(output_file_name, "w") as f:
f.write(header)
f.write("\n")
f.write(f' _NS = Namespace("{target_namespace}")')
f.write("\n\n")
if fail:
f.write(" _fail = True")
f.write("\n\n")
f.writelines(elements_strs)


if __name__ == "__main__":
parser = argparse.ArgumentParser()

parser.add_argument(
"ontology_file",
type=str,
help="Path to the RDF ontology to extract a DefinedNamespace from.",
)

parser.add_argument(
"target_namespace",
type=str,
help="The namespace within the ontology that you want to create a "
"DefinedNamespace for.",
)

parser.add_argument(
"object_id",
type=str,
help="The RDFlib object ID of the DefinedNamespace, e.g. GEO for GeoSPARQL.",
)

parser.add_argument(
'-f', "--fail",
dest='fail',
action='store_true',
help="Whether (true) or not (false) to mimic ClosedNamespace and fail on "
"non-element use"
)
parser.add_argument('--no-fail', dest='fail', action='store_false')
parser.set_defaults(feature=False)

args = parser.parse_args()

fmt = guess_format(args.ontology_file)
if fmt is None:
print("The format of the file you've supplied is unknown.")
exit(1)
g = Graph().parse(args.ontology_file, format=fmt)

validate_namespace(args.target_namespace)

validate_object_id(args.object_id)

print(f"Creating DefinedNamespace file {args.object_id} "
f"for {args.target_namespace}...")
print(f"Ontology with {len(g)} triples loaded...")

print("Getting all namespace elements...")
elements = get_target_namespace_elements(g, args.target_namespace)

output_file_name = Path().cwd() / f"_{args.object_id}.py"
print(f"Creating DefinedNamespace Python file {output_file_name}")
make_dn_file(
output_file_name,
args.target_namespace,
elements[1],
args.object_id,
args.fail
)



14 changes: 14 additions & 0 deletions test/defined_namespaces/fake.xxx
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
@prefix ns1: <http://www.w3.org/2004/02/skos/core#> .
@prefix ns2: <http://purl.org/dc/terms/> .
@prefix ns3: <http://www.w3.org/2002/07/owl#> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix schema: <https://schema.org/> .

schema:3DModel a rdfs:Class ;
rdfs:label "3DModel" ;
rdfs:comment """A 3D model represents some kind of 3D content, which may have [[encoding]]s in one or more [[MediaObject]]s. Many 3D formats are available (e.g. see [Wikipedia](https://en.wikipedia.org/wiki/Category:3D_graphics_file_formats)); specific encoding formats can be represented using the [[encodingFormat]] property applied to the relevant [[MediaObject]]. For the
case of a single file published after Zip compression, the convention of appending '+zip' to the [[encodingFormat]] can be used. Geospatial, AR/VR, artistic/animation, gaming, engineering and scientific content can all be represented using [[3DModel]].""" ;
rdfs:subClassOf schema:MediaObject ;
schema:isPartOf <https://pending.schema.org> ;
schema:source <https://github.com/schemaorg/schemaorg/issues/2140> .