From ee3552964f86edf6bbe35db47ea92304101f58f9 Mon Sep 17 00:00:00 2001 From: Ruben Taelman Date: Thu, 23 Jun 2022 08:56:44 +0200 Subject: [PATCH] Add vocabulary handler --- README.md | 21 ++++ index.ts | 1 + lib/Enhancer.ts | 35 ++++++- lib/handlers/EnhancementHandlerVocabulary.ts | 27 ++++++ lib/handlers/IEnhancementContext.ts | 8 ++ test/Enhancer-test.ts | 20 ++++ .../EnhancementHandlerComments-test.ts | 2 + .../EnhancementHandlerPersonNames-test.ts | 2 + ...nhancementHandlerPersonNamesCities-test.ts | 2 + .../EnhancementHandlerPersonNoise-test.ts | 2 + .../EnhancementHandlerPostAuthors-test.ts | 2 + .../EnhancementHandlerPostContents-test.ts | 2 + test/handlers/EnhancementHandlerPosts-test.ts | 2 + .../EnhancementHandlerVocabulary-test.ts | 96 +++++++++++++++++++ 14 files changed, 220 insertions(+), 2 deletions(-) create mode 100644 lib/handlers/EnhancementHandlerVocabulary.ts create mode 100644 test/handlers/EnhancementHandlerVocabulary-test.ts diff --git a/README.md b/README.md index 2cbef12..2a520c7 100644 --- a/README.md +++ b/README.md @@ -285,6 +285,27 @@ Generated shape: . ``` +#### Vocabuary Handler + +Generates vocabulary information. + +```json +{ + "handlers": [ + { + "@type": "EnhancementHandlerVocabulary" + } + ] +} +``` + +Generated shape: +```turtle + a rdf:Property. + a rdf:Property. + a rdfs:Class. +``` + ### Parameter Emitters Certain handlers allow their internal parameters to be emitted. diff --git a/index.ts b/index.ts index dec809f..1c6e9fb 100644 --- a/index.ts +++ b/index.ts @@ -5,6 +5,7 @@ export * from './lib/handlers/EnhancementHandlerPersonNoise'; export * from './lib/handlers/EnhancementHandlerPostAuthors'; export * from './lib/handlers/EnhancementHandlerPostContents'; export * from './lib/handlers/EnhancementHandlerPosts'; +export * from './lib/handlers/EnhancementHandlerVocabulary'; export * from './lib/handlers/IEnhancementContext'; export * from './lib/handlers/IEnhancementHandler'; export * from './lib/logging/ILogger'; diff --git a/lib/Enhancer.ts b/lib/Enhancer.ts index 9932fdb..3a80b88 100644 --- a/lib/Enhancer.ts +++ b/lib/Enhancer.ts @@ -2,6 +2,7 @@ import * as fs from 'fs'; import type { Writable } from 'stream'; import { PassThrough } from 'stream'; import type * as RDF from '@rdfjs/types'; +import { DataFactory } from 'rdf-data-factory'; import { RdfObjectLoader } from 'rdf-object'; import rdfParser from 'rdf-parse'; import rdfSerializer from 'rdf-serialize'; @@ -11,6 +12,8 @@ import type { ILogger } from './logging/ILogger'; import type { IParameterEmitter } from './parameters/IParameterEmitter'; import type { IDataSelector } from './selector/IDataSelector'; +const DF = new DataFactory(); + /** * Enhances a given dataset. */ @@ -62,7 +65,14 @@ export class Enhancer { // Prepare context this.logger?.log('Reading background data: people'); - const { people, peopleLocatedInCities, peopleKnows, peopleKnownBy } = await this.extractPeople(); + const { + people, + peopleLocatedInCities, + peopleKnows, + peopleKnownBy, + predicates, + classes, + } = await this.extractPeople(); this.logger?.log('Reading background data: activities'); const { posts, comments } = await this.extractActivities(); this.logger?.log('Reading background data: cities'); @@ -77,6 +87,8 @@ export class Enhancer { posts, comments, cities, + predicates, + classes, }; // Generate data @@ -95,6 +107,8 @@ export class Enhancer { peopleLocatedInCities: Record; peopleKnows: Record; peopleKnownBy: Record; + predicates: RDF.NamedNode[]; + classes: RDF.NamedNode[]; }> { return new Promise((resolve, reject) => { // Prepare RDF terms to compare with @@ -108,6 +122,8 @@ export class Enhancer { const peopleLocatedInCities: Record = {}; const peopleKnows: Record = {}; const peopleKnownBy: Record = {}; + const predicates: Set = new Set(); + const classes: Set = new Set(); const stream = rdfParser.parse(fs.createReadStream(this.personsPath), { path: this.personsPath }); // Temporary variables to determine knows relationships @@ -155,9 +171,24 @@ export class Enhancer { currentKnowsPerson = undefined; currentKnowsNode = undefined; } + + // Determine predicates + predicates.add(quad.predicate.value); + + // Determine classes + if (quad.predicate.equals(termType)) { + classes.add(quad.object.value); + } }); stream.on('end', () => { - resolve({ people, peopleLocatedInCities, peopleKnows, peopleKnownBy }); + resolve({ + people, + peopleLocatedInCities, + peopleKnows, + peopleKnownBy, + predicates: [ ...predicates ].map(value => DF.namedNode(value)), + classes: [ ...classes ].map(value => DF.namedNode(value)), + }); }); }); } diff --git a/lib/handlers/EnhancementHandlerVocabulary.ts b/lib/handlers/EnhancementHandlerVocabulary.ts new file mode 100644 index 0000000..52b8235 --- /dev/null +++ b/lib/handlers/EnhancementHandlerVocabulary.ts @@ -0,0 +1,27 @@ +import type { Writable } from 'stream'; +import type * as RDF from '@rdfjs/types'; +import { DataFactory } from 'rdf-data-factory'; +import type { IEnhancementContext } from './IEnhancementContext'; +import type { IEnhancementHandler } from './IEnhancementHandler'; + +const DF = new DataFactory(); + +/** + * Generates vocabulary information. + */ +export class EnhancementHandlerVocabulary implements IEnhancementHandler { + public async generate(writeStream: RDF.Stream & Writable, context: IEnhancementContext): Promise { + const rdf_type = DF.namedNode('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'); + const rdfs_property = DF.namedNode('http://www.w3.org/1999/02/22-rdf-syntax-ns#Property'); + const rdfs_class = DF.namedNode('http://www.w3.org/2000/01/rdf-schema#Class'); + + // Write predicates + for (const predicate of context.predicates) { + writeStream.write(DF.quad(predicate, rdf_type, rdfs_property)); + } + // Write classes + for (const clazz of context.classes) { + writeStream.write(DF.quad(clazz, rdf_type, rdfs_class)); + } + } +} diff --git a/lib/handlers/IEnhancementContext.ts b/lib/handlers/IEnhancementContext.ts index 836eb80..64552be 100644 --- a/lib/handlers/IEnhancementContext.ts +++ b/lib/handlers/IEnhancementContext.ts @@ -43,4 +43,12 @@ export interface IEnhancementContext { * An array of IRIs of all cities in the dataset. */ cities: RDF.NamedNode[]; + /** + * An array of all predicates. + */ + predicates: RDF.NamedNode[]; + /** + * An array of all classes. + */ + classes: RDF.NamedNode[]; } diff --git a/test/Enhancer-test.ts b/test/Enhancer-test.ts index 423b809..83d57f8 100644 --- a/test/Enhancer-test.ts +++ b/test/Enhancer-test.ts @@ -111,6 +111,13 @@ sn:post00000000000000000003 rdf:type snvoc:Post .`; expect.anything(), expect.anything(), ], + predicates: [ + DF.namedNode('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'), + DF.namedNode('http://www.ldbc.eu/ldbc_socialnet/1.0/vocabulary/isLocatedIn'), + ], + classes: [ + DF.namedNode('http://www.ldbc.eu/ldbc_socialnet/1.0/vocabulary/Person'), + ], }; expect(handlers[0].generate).toHaveBeenCalledWith(expect.any(PassThrough), context); expect(handlers[1].generate).toHaveBeenCalledWith(expect.any(PassThrough), context); @@ -153,6 +160,13 @@ sn:post00000000000000000003 rdf:type snvoc:Post .`; expect.anything(), expect.anything(), ], + predicates: [ + DF.namedNode('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'), + DF.namedNode('http://www.ldbc.eu/ldbc_socialnet/1.0/vocabulary/isLocatedIn'), + ], + classes: [ + DF.namedNode('http://www.ldbc.eu/ldbc_socialnet/1.0/vocabulary/Person'), + ], }; expect(handlers[0].generate).toHaveBeenCalledWith(expect.any(PassThrough), context); expect(handlers[1].generate).toHaveBeenCalledWith(expect.any(PassThrough), context); @@ -172,6 +186,10 @@ sn:post00000000000000000003 rdf:type snvoc:Post .`; peopleLocatedInCities: {}, peopleKnownBy: {}, peopleKnows: {}, + predicates: [ + DF.namedNode('ex:p'), + ], + classes: [], }); }); @@ -182,6 +200,8 @@ sn:post00000000000000000003 rdf:type snvoc:Post .`; peopleLocatedInCities: {}, peopleKnownBy: {}, peopleKnows: {}, + predicates: [], + classes: [], }); }); diff --git a/test/handlers/EnhancementHandlerComments-test.ts b/test/handlers/EnhancementHandlerComments-test.ts index a4b84c3..2a8164f 100644 --- a/test/handlers/EnhancementHandlerComments-test.ts +++ b/test/handlers/EnhancementHandlerComments-test.ts @@ -44,6 +44,8 @@ describe('EnhancementHandlerComments', () => { ], comments: [], cities: [], + predicates: [], + classes: [], }; await context.rdfObjectLoader.context; }); diff --git a/test/handlers/EnhancementHandlerPersonNames-test.ts b/test/handlers/EnhancementHandlerPersonNames-test.ts index 19ad831..7b4fae6 100644 --- a/test/handlers/EnhancementHandlerPersonNames-test.ts +++ b/test/handlers/EnhancementHandlerPersonNames-test.ts @@ -60,6 +60,8 @@ describe('EnhancementHandlerPersonNames', () => { DF.namedNode('ex:cit3'), DF.namedNode('ex:cit4'), ], + predicates: [], + classes: [], }; await context.rdfObjectLoader.context; }); diff --git a/test/handlers/EnhancementHandlerPersonNamesCities-test.ts b/test/handlers/EnhancementHandlerPersonNamesCities-test.ts index e50b8b2..cb6761a 100644 --- a/test/handlers/EnhancementHandlerPersonNamesCities-test.ts +++ b/test/handlers/EnhancementHandlerPersonNamesCities-test.ts @@ -60,6 +60,8 @@ describe('EnhancementHandlerPersonNamesCities', () => { DF.namedNode('ex:cit3'), DF.namedNode('ex:cit4'), ], + predicates: [], + classes: [], }; await context.rdfObjectLoader.context; }); diff --git a/test/handlers/EnhancementHandlerPersonNoise-test.ts b/test/handlers/EnhancementHandlerPersonNoise-test.ts index 34992d7..913b21d 100644 --- a/test/handlers/EnhancementHandlerPersonNoise-test.ts +++ b/test/handlers/EnhancementHandlerPersonNoise-test.ts @@ -35,6 +35,8 @@ describe('EnhancementHandlerPersonNoise', () => { posts: [], comments: [], cities: [], + predicates: [], + classes: [], }; await context.rdfObjectLoader.context; }); diff --git a/test/handlers/EnhancementHandlerPostAuthors-test.ts b/test/handlers/EnhancementHandlerPostAuthors-test.ts index 33296a2..a3f9ddc 100644 --- a/test/handlers/EnhancementHandlerPostAuthors-test.ts +++ b/test/handlers/EnhancementHandlerPostAuthors-test.ts @@ -44,6 +44,8 @@ describe('EnhancementHandlerPostAuthors', () => { ], comments: [], cities: [], + predicates: [], + classes: [], }; await context.rdfObjectLoader.context; }); diff --git a/test/handlers/EnhancementHandlerPostContents-test.ts b/test/handlers/EnhancementHandlerPostContents-test.ts index 1a3c495..453a6f2 100644 --- a/test/handlers/EnhancementHandlerPostContents-test.ts +++ b/test/handlers/EnhancementHandlerPostContents-test.ts @@ -40,6 +40,8 @@ describe('EnhancementHandlerPostContents', () => { ], comments: [], cities: [], + predicates: [], + classes: [], }; await context.rdfObjectLoader.context; }); diff --git a/test/handlers/EnhancementHandlerPosts-test.ts b/test/handlers/EnhancementHandlerPosts-test.ts index f3df4ee..efdc82d 100644 --- a/test/handlers/EnhancementHandlerPosts-test.ts +++ b/test/handlers/EnhancementHandlerPosts-test.ts @@ -35,6 +35,8 @@ describe('EnhancementHandlerPosts', () => { posts: [], comments: [], cities: [], + predicates: [], + classes: [], }; await context.rdfObjectLoader.context; }); diff --git a/test/handlers/EnhancementHandlerVocabulary-test.ts b/test/handlers/EnhancementHandlerVocabulary-test.ts new file mode 100644 index 0000000..0759107 --- /dev/null +++ b/test/handlers/EnhancementHandlerVocabulary-test.ts @@ -0,0 +1,96 @@ +import { PassThrough } from 'stream'; +import { DataFactory } from 'rdf-data-factory'; +import { RdfObjectLoader } from 'rdf-object'; +import { Enhancer } from '../../lib/Enhancer'; +import { EnhancementHandlerVocabulary } from '../../lib/handlers/EnhancementHandlerVocabulary'; +import type { IEnhancementContext } from '../../lib/handlers/IEnhancementContext'; +import { DataSelectorSequential } from '../selector/DataSelectorSequential'; +import 'jest-rdf'; + +const arrayifyStream = require('arrayify-stream'); +const DF = new DataFactory(); + +describe('EnhancementHandlerVocabulary', () => { + let handler: EnhancementHandlerVocabulary; + let stream: PassThrough; + let rdfObjectLoader: RdfObjectLoader; + let context: IEnhancementContext; + + beforeEach(async() => { + handler = new EnhancementHandlerVocabulary(); + stream = new PassThrough({ objectMode: true }); + rdfObjectLoader = new RdfObjectLoader({ context: Enhancer.CONTEXT_LDBC_SNB }); + context = { + rdfObjectLoader, + dataSelector: new DataSelectorSequential(), + people: [], + peopleLocatedInCities: {}, + peopleKnownBy: {}, + peopleKnows: {}, + posts: [], + comments: [], + cities: [], + predicates: [ + DF.namedNode('ex:p1'), + DF.namedNode('ex:p2'), + DF.namedNode('ex:p3'), + DF.namedNode('ex:p4'), + ], + classes: [ + DF.namedNode('ex:C1'), + DF.namedNode('ex:C2'), + DF.namedNode('ex:C3'), + DF.namedNode('ex:C4'), + ], + }; + await context.rdfObjectLoader.context; + }); + + describe('generate', () => { + it('should handle for no predicates and classes', async() => { + context = { ...context, predicates: [], classes: []}; + await handler.generate(stream, context); + stream.end(); + expect(await arrayifyStream(stream)).toBeRdfIsomorphic(rdfObjectLoader.createCompactedResource({}).toQuads()); + }); + + it('should handle', async() => { + await handler.generate(stream, context); + stream.end(); + expect(await arrayifyStream(stream)).toBeRdfIsomorphic(rdfObjectLoader.createCompactedResources([ + { + '@id': `ex:p1`, + type: 'rdf:Property', + }, + { + '@id': `ex:p2`, + type: 'rdf:Property', + }, + { + '@id': `ex:p3`, + type: 'rdf:Property', + }, + { + '@id': `ex:p4`, + type: 'rdf:Property', + }, + { + '@id': `ex:C1`, + type: 'rdfs:Class', + }, + { + '@id': `ex:C2`, + type: 'rdfs:Class', + }, + { + '@id': `ex:C3`, + type: 'rdfs:Class', + }, + { + '@id': `ex:C4`, + type: 'rdfs:Class', + }, + ]).flatMap(resource => resource.toQuads())); + }); + }); +});