Skip to content

Commit

Permalink
Merge pull request #1463 from joshmoore/python-graph
Browse files Browse the repository at this point in the history
RFC: Add PythonInputSource to create py-based graphs
  • Loading branch information
nicholascar committed Dec 1, 2021
2 parents fb8d007 + 2bd4b71 commit 300fc38
Show file tree
Hide file tree
Showing 3 changed files with 130 additions and 6 deletions.
54 changes: 49 additions & 5 deletions rdflib/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@

from urllib.request import Request
from urllib.request import url2pathname
from urllib.parse import urljoin
from urllib.request import urlopen
from urllib.error import HTTPError

Expand All @@ -36,6 +35,7 @@
"StringInputSource",
"URLInputSource",
"FileInputSource",
"PythonInputSource",
]


Expand Down Expand Up @@ -105,6 +105,45 @@ def close(self):
pass


class PythonInputSource(InputSource):
"""
Constructs an RDFLib Parser InputSource from a Python data structure,
for example, loaded from JSON with json.load or json.loads:
>>> import json
>>> as_string = \"\"\"{
... "@context" : {"ex" : "http://example.com/ns#"},
... "@graph": [{"@type": "ex:item", "@id": "#example"}]
... }\"\"\"
>>> as_python = json.loads(as_string)
>>> source = create_input_source(data=as_python)
>>> isinstance(source, PythonInputSource)
True
"""

def __init__(self, data, system_id=None):
self.content_type = None
self.auto_close = False # see Graph.parse(), true if opened by us
self.public_id = None
self.system_id = system_id
self.data = data

def getPublicId(self):
return self.public_id

def setPublicId(self, public_id):
self.public_id = public_id

def getSystemId(self):
return self.system_id

def setSystemId(self, system_id):
self.system_id = system_id

def close(self):
self.data = None


class StringInputSource(InputSource):
"""
Constructs an RDFLib Parser InputSource from a Python String or Bytes
Expand Down Expand Up @@ -295,10 +334,15 @@ def create_input_source(
input_source = FileInputSource(file)

if data is not None:
if not isinstance(data, (str, bytes, bytearray)):
raise RuntimeError("parse data can only str, or bytes.")
input_source = StringInputSource(data)
auto_close = True
if isinstance(data, dict):
input_source = PythonInputSource(data)
auto_close = True
elif isinstance(data, (str, bytes, bytearray)):
input_source = StringInputSource(data)
auto_close = True
else:
raise RuntimeError(
f"parse data can only str, or bytes. not: {type(data)}")

if input_source is None:
raise Exception("could not create InputSource")
Expand Down
6 changes: 5 additions & 1 deletion rdflib/plugins/shared/jsonld/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,16 @@

from urllib.parse import urljoin, urlsplit, urlunsplit

from rdflib.parser import create_input_source
from rdflib.parser import create_input_source, PythonInputSource

from io import StringIO


def source_to_json(source):

if isinstance(source, PythonInputSource):
return source.data

# TODO: conneg for JSON (fix support in rdflib's URLInputSource!)
source = create_input_source(source, format="json-ld")

Expand Down
76 changes: 76 additions & 0 deletions test/jsonld/test_pythonparse.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
from rdflib import Graph
from rdflib.compare import isomorphic
import json


def test_wrap():
"""
Example of intercepting a JSON-LD structure and performing some
in-memory manipulation and then passing that structure to Graph.parse
lists in the shacl graph.
"""

_data = """
{
"@context" : {
"ngff" : "http://example.com/ns#"
},
"@graph": [{
"@type": "ngff:ItemList",
"ngff:collectionType": {"@type": "ngff:Image"},
"ngff:itemListElement": [
{
"@type": "ngff:Image",
"path": "image1",
"name": "Image 1"
},
{
"@type": "ngff:Image",
"path": "something-else",
"name": "bob"
}
]
}]
}
"""

# Current workaround
data = json.loads(_data)
data = walk(data)
data = json.dumps(data) # wasteful
g1 = Graph()
g1.parse(data=data, format="json-ld")

# Desired behavior
data = json.loads(_data)
data = walk(data)
g2 = Graph()
g2.parse(data=data, format="json-ld")

assert isomorphic(g1, g2)


def walk(data, path=None):
"""
Some arbitrary operation on a Python data structure.
"""

if path is None:
path = []

if isinstance(data, dict):
for k, v in data.items():
data[k] = walk(v, path + [k])

elif isinstance(data, list):
replacement = list()
for idx, item in enumerate(data):
if path[-1] == "@graph":
replacement.append(walk(item, path))
else:
wrapper = {"@type": "ListItemWrapper", "ngff:position": idx}
wrapper["ngff:item"] = walk(item, path + [idx])
replacement.append(wrapper)
data = replacement

return data

0 comments on commit 300fc38

Please sign in to comment.