From b0924bf168500070a287cef300047f874deebe0c Mon Sep 17 00:00:00 2001 From: Joachim Van Herwegen Date: Mon, 26 Sep 2022 16:06:49 +0200 Subject: [PATCH] feat: Allow JSON-LD contexts to be stored locally --- package-lock.json | 1 + package.json | 2 +- src/storage/conversion/RdfToQuadConverter.ts | 45 +++++++++++++- test/assets/contexts/test.jsonld | 7 +++ .../conversion/RdfToQuadConverter.test.ts | 59 ++++++++++++++++++- 5 files changed, 109 insertions(+), 5 deletions(-) create mode 100644 test/assets/contexts/test.jsonld diff --git a/package-lock.json b/package-lock.json index 168b896616..a013e844cb 100644 --- a/package-lock.json +++ b/package-lock.json @@ -9,6 +9,7 @@ "version": "5.0.0", "license": "MIT", "dependencies": { + "@comunica/context-entries": "^2.2.0", "@comunica/query-sparql": "^2.2.1", "@rdfjs/types": "^1.1.0", "@solid/access-token-verifier": "^2.0.3", diff --git a/package.json b/package.json index f2a9778fcb..4f1f804d95 100644 --- a/package.json +++ b/package.json @@ -99,6 +99,7 @@ "templates" ], "dependencies": { + "@comunica/context-entries": "^2.2.0", "@comunica/query-sparql": "^2.2.1", "@rdfjs/types": "^1.1.0", "@solid/access-token-verifier": "^2.0.3", @@ -193,7 +194,6 @@ "nodemon": "^2.0.19", "set-cookie-parser": "^2.5.1", "simple-git": "^3.12.0", - "commit-and-tag-version": "^10.1.0", "supertest": "^6.2.4", "ts-jest": "^27.1.5", "ts-node": "^10.9.1", diff --git a/src/storage/conversion/RdfToQuadConverter.ts b/src/storage/conversion/RdfToQuadConverter.ts index e68b451623..1a8839c889 100644 --- a/src/storage/conversion/RdfToQuadConverter.ts +++ b/src/storage/conversion/RdfToQuadConverter.ts @@ -1,25 +1,61 @@ import { PassThrough } from 'stream'; +import { KeysRdfParseJsonLd } from '@comunica/context-entries'; import type { NamedNode } from '@rdfjs/types'; +import fetch from 'cross-fetch'; +import { readJsonSync } from 'fs-extra'; +import { FetchDocumentLoader } from 'jsonld-context-parser'; +import type { IJsonLdContext } from 'jsonld-context-parser'; import rdfParser from 'rdf-parse'; import { BasicRepresentation } from '../../http/representation/BasicRepresentation'; import type { Representation } from '../../http/representation/Representation'; import { RepresentationMetadata } from '../../http/representation/RepresentationMetadata'; import { INTERNAL_QUADS } from '../../util/ContentTypes'; import { BadRequestHttpError } from '../../util/errors/BadRequestHttpError'; +import { resolveAssetPath } from '../../util/PathUtil'; import { pipeSafely } from '../../util/StreamUtil'; import { PREFERRED_PREFIX_TERM, SOLID_META } from '../../util/Vocabularies'; import { BaseTypedRepresentationConverter } from './BaseTypedRepresentationConverter'; import type { RepresentationConverterArgs } from './RepresentationConverter'; +/** + * First checks if a context is stored locally before letting the super class do a fetch. + */ +class ContextDocumentLoader extends FetchDocumentLoader { + private readonly contexts: Record; + + public constructor(contexts: Record) { + super(fetch); + this.contexts = {}; + for (const [ key, path ] of Object.entries(contexts)) { + this.contexts[key] = readJsonSync(resolveAssetPath(path)); + } + } + + public async load(url: string): Promise { + if (url in this.contexts) { + return this.contexts[url]; + } + return super.load(url); + } +} + /** * Converts most major RDF serializations to `internal/quads`. + * + * Custom contexts can be defined to be used when parsing JSON-LD. + * The keys of the object should be the URL of the context, + * and the values the file path of the contexts to use when the JSON-LD parser would fetch the given context. + * We use filepaths because embedding them directly into the configurations breaks Components.js. */ export class RdfToQuadConverter extends BaseTypedRepresentationConverter { - public constructor() { + private readonly documentLoader: ContextDocumentLoader; + + public constructor(contexts: Record = {}) { const inputTypes = rdfParser.getContentTypes() // ContentType application/json MAY NOT be converted to Quad. .then((types): string[] => types.filter((type): boolean => type !== 'application/json')); super(inputTypes, INTERNAL_QUADS); + this.documentLoader = new ContextDocumentLoader(contexts); } public async handle({ representation, identifier }: RepresentationConverterArgs): Promise { @@ -27,7 +63,12 @@ export class RdfToQuadConverter extends BaseTypedRepresentationConverter { const rawQuads = rdfParser.parse(representation.data, { contentType: representation.metadata.contentType!, baseIRI: identifier.path, - }) + // All extra keys get passed in the Comunica ActionContext + // and this is the key that is used to define the document loader. + // See https://github.com/rubensworks/rdf-parse.js/blob/master/lib/RdfParser.ts + // and https://github.com/comunica/comunica/blob/master/packages/actor-rdf-parse-jsonld/lib/ActorRdfParseJsonLd.ts + [KeysRdfParseJsonLd.documentLoader.name]: this.documentLoader, + } as any) // This works only for those cases where the data stream has been completely read before accessing the metadata. // Eg. the PATCH operation, which is the main case why we store the prefixes in metadata here if there are any. // See also https://github.com/CommunitySolidServer/CommunitySolidServer/issues/126 diff --git a/test/assets/contexts/test.jsonld b/test/assets/contexts/test.jsonld new file mode 100644 index 0000000000..1e2299865a --- /dev/null +++ b/test/assets/contexts/test.jsonld @@ -0,0 +1,7 @@ +{ + "@context": { + "@version": 1.1, + "test": "http://example.com/context#", + "testPredicate": { "@id": "test:predicate" } + } +} diff --git a/test/unit/storage/conversion/RdfToQuadConverter.test.ts b/test/unit/storage/conversion/RdfToQuadConverter.test.ts index a246740d09..0b1878aa16 100644 --- a/test/unit/storage/conversion/RdfToQuadConverter.test.ts +++ b/test/unit/storage/conversion/RdfToQuadConverter.test.ts @@ -1,6 +1,7 @@ import 'jest-rdf'; import { Readable } from 'stream'; import arrayifyStream from 'arrayify-stream'; +import fetch, { Headers } from 'cross-fetch'; import { DataFactory } from 'n3'; import rdfParser from 'rdf-parse'; import { PREFERRED_PREFIX_TERM, SOLID_META } from '../../../../src'; @@ -14,10 +15,25 @@ import { INTERNAL_QUADS } from '../../../../src/util/ContentTypes'; import { BadRequestHttpError } from '../../../../src/util/errors/BadRequestHttpError'; const { namedNode, triple, literal, quad } = DataFactory; +// All of this is necessary to not break the cross-fetch imports that happen in `rdf-parse` +jest.mock('cross-fetch', (): any => { + const mock = jest.fn(); + // Require the original module to not be mocked... + const originalFetch = jest.requireActual('cross-fetch'); + return { + // eslint-disable-next-line @typescript-eslint/naming-convention + __esModule: true, + ...originalFetch, + fetch: mock, + default: mock, + }; +}); + +// Not mocking `fs` since this breaks the `rdf-parser` library describe('A RdfToQuadConverter', (): void => { + const fetchMock: jest.Mock = fetch as any; const converter = new RdfToQuadConverter(); - const identifier: ResourceIdentifier = { path: 'path' }; - + const identifier: ResourceIdentifier = { path: 'http://example.com/resource' }; it('supports serializing as quads.', async(): Promise => { const types = rdfParser.getContentTypes() .then((inputTypes): string[] => inputTypes.filter((type): boolean => type !== 'application/json')); @@ -123,4 +139,43 @@ describe('A RdfToQuadConverter', (): void => { expect(result.metadata.contentType).toEqual(INTERNAL_QUADS); await expect(arrayifyStream(result.data)).rejects.toThrow(BadRequestHttpError); }); + + it('can use locally stored contexts.', async(): Promise => { + const fetchedContext = { + '@context': { + '@version': 1.1, + test: 'http://example.com/context2#', + testPredicate2: { '@id': 'test:predicate2' }, + }, + }; + // This depends on the fields needed by the `jsonld-context-parser` so could break if library changes + fetchMock.mockResolvedValueOnce({ + json: (): any => fetchedContext, + status: 200, + ok: true, + headers: new Headers({ 'content-type': 'application/ld+json' }), + }); + + const contextConverter = new RdfToQuadConverter( + { 'http://example.com/context.jsonld': '@css:test/assets/contexts/test.jsonld' }, + ); + const jsonld = { + '@context': [ 'http://example.com/context.jsonld', 'http://example.com/context2.jsonld' ], + '@id': 'http://example.com/resource', + testPredicate: 123, + testPredicate2: 456, + }; + const representation = new BasicRepresentation(JSON.stringify(jsonld), 'application/ld+json'); + const preferences: RepresentationPreferences = { type: { [INTERNAL_QUADS]: 1 }}; + const result = await contextConverter.handle({ identifier, representation, preferences }); + await expect(arrayifyStream(result.data)).resolves.toEqualRdfQuadArray([ triple( + namedNode('http://example.com/resource'), + namedNode('http://example.com/context#predicate'), + literal(123), + ), triple( + namedNode('http://example.com/resource'), + namedNode('http://example.com/context2#predicate2'), + literal(456), + ) ]); + }); });