Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

RFC: Add PythonInputSource to create py-based graphs #1463

Merged
merged 9 commits into from
Dec 1, 2021
54 changes: 49 additions & 5 deletions rdflib/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@

from urllib.request import Request
from urllib.request import url2pathname
from urllib.parse import urljoin
from urllib.request import urlopen
from urllib.error import HTTPError

Expand All @@ -35,6 +34,7 @@
"StringInputSource",
"URLInputSource",
"FileInputSource",
"PythonInputSource",
]


Expand Down Expand Up @@ -104,6 +104,45 @@ def close(self):
pass


class PythonInputSource(InputSource):
"""
Constructs an RDFLib Parser InputSource from a Python data structure,
for example, loaded from JSON with json.load or json.loads:

>>> import json
>>> as_string = \"\"\"{
... "@context" : {"ex" : "http://example.com/ns#"},
... "@graph": [{"@type": "ex:item", "@id": "#example"}]
... }\"\"\"
>>> as_python = json.loads(as_string)
>>> source = create_input_source(data=as_python)
>>> isinstance(source, PythonInputSource)
True
"""

def __init__(self, data, system_id=None):
self.content_type = None
self.auto_close = False # see Graph.parse(), true if opened by us
self.public_id = None
self.system_id = system_id
self.data = data

def getPublicId(self):
return self.public_id

def setPublicId(self, public_id):
self.public_id = public_id

def getSystemId(self):
return self.system_id

def setSystemId(self, system_id):
self.system_id = system_id

def close(self):
self.data = None


class StringInputSource(InputSource):
"""
Constructs an RDFLib Parser InputSource from a Python String or Bytes
Expand Down Expand Up @@ -289,10 +328,15 @@ def create_input_source(
input_source = FileInputSource(file)

if data is not None:
if not isinstance(data, (str, bytes, bytearray)):
raise RuntimeError("parse data can only str, or bytes.")
input_source = StringInputSource(data)
auto_close = True
if isinstance(data, dict):
input_source = PythonInputSource(data)
auto_close = True
elif isinstance(data, (str, bytes, bytearray)):
input_source = StringInputSource(data)
auto_close = True
else:
raise RuntimeError(
f"parse data can only str, or bytes. not: {type(data)}")

if input_source is None:
raise Exception("could not create InputSource")
Expand Down
6 changes: 5 additions & 1 deletion rdflib/plugins/shared/jsonld/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,16 @@

from urllib.parse import urljoin, urlsplit, urlunsplit

from rdflib.parser import create_input_source
from rdflib.parser import create_input_source, PythonInputSource

from io import StringIO


def source_to_json(source):

if isinstance(source, PythonInputSource):
return source.data

# TODO: conneg for JSON (fix support in rdflib's URLInputSource!)
source = create_input_source(source, format="json-ld")

Expand Down
76 changes: 76 additions & 0 deletions test/jsonld/test_pythonparse.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
from rdflib import Graph
from rdflib.compare import isomorphic
import json


def test_wrap():
"""
Example of intercepting a JSON-LD structure and performing some
in-memory manipulation and then passing that structure to Graph.parse
lists in the shacl graph.
"""

_data = """
{
"@context" : {
"ngff" : "http://example.com/ns#"
},
"@graph": [{
"@type": "ngff:ItemList",
"ngff:collectionType": {"@type": "ngff:Image"},
"ngff:itemListElement": [
{
"@type": "ngff:Image",
"path": "image1",
"name": "Image 1"
},
{
"@type": "ngff:Image",
"path": "something-else",
"name": "bob"
}
]
}]
}
"""

# Current workaround
data = json.loads(_data)
data = walk(data)
data = json.dumps(data) # wasteful
g1 = Graph()
g1.parse(data=data, format="json-ld")

# Desired behavior
data = json.loads(_data)
data = walk(data)
g2 = Graph()
g2.parse(data=data, format="json-ld")

assert isomorphic(g1, g2)


def walk(data, path=None):
"""
Some arbitrary operation on a Python data structure.
"""

if path is None:
path = []

if isinstance(data, dict):
for k, v in data.items():
data[k] = walk(v, path + [k])

elif isinstance(data, list):
replacement = list()
for idx, item in enumerate(data):
if path[-1] == "@graph":
replacement.append(walk(item, path))
else:
wrapper = {"@type": "ListItemWrapper", "ngff:position": idx}
wrapper["ngff:item"] = walk(item, path + [idx])
replacement.append(wrapper)
data = replacement

return data