Skip to content

Commit

Permalink
Added N-Quads parser.
Browse files Browse the repository at this point in the history
Hey, that was easy!

Don't allow literal graphs/contexts despite that being in the spec
at http://sw.deri.org/2008/07/n-quads/ because it's a stupid idea.

Added test from the spec
  • Loading branch information
dajobe committed Oct 22, 2010
1 parent a26c518 commit f01ff7a
Show file tree
Hide file tree
Showing 8 changed files with 198 additions and 13 deletions.
5 changes: 4 additions & 1 deletion configure.ac
Expand Up @@ -805,8 +805,9 @@ grddl_parser=no
guess_parser=yes
rdfa_parser=yes
json_parser=no
nquads_parser=no

rdf_parsers_available="rdfxml ntriples turtle trig guess rss-tag-soup rdfa"
rdf_parsers_available="rdfxml ntriples turtle trig guess rss-tag-soup rdfa nquads"
rdf_parsers_enabled=


Expand Down Expand Up @@ -844,6 +845,7 @@ if test "x" = "y"; then
AC_DEFINE(RAPTOR_PARSER_GUESS, 1, [Building guess parser])
AC_DEFINE(RAPTOR_PARSER_RDFA, 1, [Building RDFA parser])
AC_DEFINE(RAPTOR_PARSER_JSON, 1, [Building JSON parser])
AC_DEFINE(RAPTOR_PARSER_NQUADS, 1, [Building N-Quads parser])
fi

AC_MSG_CHECKING(RDF parsers required)
Expand Down Expand Up @@ -914,6 +916,7 @@ AM_CONDITIONAL(RAPTOR_PARSER_GRDDL, test $grddl_parser = yes)
AM_CONDITIONAL(RAPTOR_PARSER_GUESS, test $guess_parser = yes)
AM_CONDITIONAL(RAPTOR_PARSER_RDFA, test $rdfa_parser = yes)
AM_CONDITIONAL(RAPTOR_PARSER_JSON, test $json_parser = yes)
AM_CONDITIONAL(RAPTOR_PARSER_NQUADS, test $ntriples_parser = yes)

AM_CONDITIONAL(LIBRDFA, test $need_librdfa = yes)

Expand Down
4 changes: 4 additions & 0 deletions src/Makefile.am
Expand Up @@ -80,6 +80,10 @@ endif
endif
if RAPTOR_PARSER_NTRIPLES
libraptor2_la_SOURCES += ntriples_parse.c
else
if RAPTOR_PARSER_NQUADS
libraptor2_la_SOURCES += ntriples_parse.c
endif
endif
if RAPTOR_RSS_COMMON
libraptor2_la_SOURCES += raptor_rss_common.c raptor_rss.h
Expand Down
147 changes: 138 additions & 9 deletions src/ntriples_parse.c
Expand Up @@ -57,7 +57,7 @@


/* Prototypes for local functions */
static void raptor_ntriples_generate_statement(raptor_parser* parser, const unsigned char *subject, const raptor_term_type subject_type, const unsigned char *predicate, const raptor_term_type predicate_type, const void *object, const raptor_term_type object_type, const unsigned char *object_literal_language, const unsigned char *object_literal_datatype);
static void raptor_ntriples_generate_statement(raptor_parser* parser, const unsigned char *subject, const raptor_term_type subject_type, const unsigned char *predicate, const raptor_term_type predicate_type, const void *object, const raptor_term_type object_type, const unsigned char *object_literal_language, const unsigned char *object_literal_datatype, const void *graph, const raptor_term_type graph_type);

/*
* NTriples parser object
Expand All @@ -74,6 +74,11 @@ struct raptor_ntriples_parser_context_s {

/* static statement for use in passing to user code */
raptor_statement statement;

/* Non-0 if N-Quads */
int is_nquads;

int literal_graph_warning;
};


Expand All @@ -96,6 +101,9 @@ raptor_ntriples_parse_init(raptor_parser* rdf_parser, const char *name)
ntriples_parser = (raptor_ntriples_parser_context*)rdf_parser->context;

raptor_statement_init(&ntriples_parser->statement, rdf_parser->world);

if(!strcmp(name, "nquads"))
ntriples_parser->is_nquads = 1;

return 0;
}
Expand Down Expand Up @@ -128,7 +136,9 @@ raptor_ntriples_generate_statement(raptor_parser* parser,
const void *object,
const raptor_term_type object_type,
const unsigned char *object_literal_language,
const unsigned char *object_literal_datatype)
const unsigned char *object_literal_datatype,
const void *graph,
const raptor_term_type graph_type)
{
/* raptor_ntriples_parser_context *ntriples_parser = (raptor_ntriples_parser_context*)parser->context; */
raptor_statement *statement = &parser->statement;
Expand Down Expand Up @@ -207,6 +217,32 @@ raptor_ntriples_generate_statement(raptor_parser* parser,
object_literal_language);
}


if(graph) {
/* Three choices for graph/context from N-Quads according to
* http://sw.deri.org/2008/07/n-quads/ but I am IGNORING Literal
*/
if(graph_type == RAPTOR_TERM_TYPE_URI) {
raptor_uri *graph_uri;

graph_uri = raptor_new_uri(parser->world, (const unsigned char*)graph);
if(!graph_uri) {
raptor_parser_error(parser,
"Could not create object uri '%s', skipping",
(const char *)graph);
goto cleanup;
}
statement->graph = raptor_new_term_from_uri(parser->world, graph_uri);
raptor_free_uri(graph_uri);
graph_uri = NULL;
} else if(graph_type == RAPTOR_TERM_TYPE_BLANK) {
statement->graph = raptor_new_term_from_blank(parser->world, graph);
} else {
/* Warning about literal graphs is handled below */
statement->graph = NULL;
}
}

/* Generate the statement */
(*parser->statement_handler)(parser->user_data, statement);

Expand Down Expand Up @@ -490,14 +526,16 @@ raptor_ntriples_term(raptor_parser* rdf_parser,

static int
raptor_ntriples_parse_line(raptor_parser* rdf_parser,
unsigned char *buffer, size_t len)
unsigned char *buffer, size_t len,
int max_terms)
{
raptor_ntriples_parser_context *ntriples_parser = (raptor_ntriples_parser_context*)rdf_parser->context;
int i;
unsigned char *p;
unsigned char *dest;
unsigned char *terms[3];
size_t term_lengths[3];
raptor_term_type term_types[3];
unsigned char *terms[4];
size_t term_lengths[4];
raptor_term_type term_types[4];
size_t term_length = 0;
unsigned char *object_literal_language = NULL;
unsigned char *object_literal_datatype = NULL;
Expand Down Expand Up @@ -556,7 +594,7 @@ raptor_ntriples_parse_line(raptor_parser* rdf_parser,

/* Must be triple */

for(i = 0; i < 3; i++) {
for(i = 0; i < max_terms; i++) {
if(!len) {
raptor_parser_error(rdf_parser, "Unexpected end of line");
goto cleanup;
Expand Down Expand Up @@ -769,12 +807,21 @@ raptor_ntriples_parse_line(raptor_parser* rdf_parser,
}
}


if(terms[3] && term_types[3] == RAPTOR_TERM_TYPE_LITERAL) {
if(!ntriples_parser->literal_graph_warning++)
raptor_parser_warning(rdf_parser, "Ignoring N-Quad literal contexts");

terms[3] = NULL;
}

raptor_ntriples_generate_statement(rdf_parser,
terms[0], term_types[0],
terms[1], term_types[1],
terms[2], term_types[2],
object_literal_language,
object_literal_datatype);
object_literal_datatype,
terms[3], term_types[3]);

rdf_parser->locator.byte += len;

Expand All @@ -793,6 +840,7 @@ raptor_ntriples_parse_chunk(raptor_parser* rdf_parser,
unsigned char *ptr;
unsigned char *start;
raptor_ntriples_parser_context *ntriples_parser = (raptor_ntriples_parser_context*)rdf_parser->context;
int max_terms = ntriples_parser->is_nquads ? 4 : 3;

#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1
RAPTOR_DEBUG2("adding %d bytes to buffer\n", (unsigned int)len);
Expand Down Expand Up @@ -865,7 +913,7 @@ raptor_ntriples_parse_chunk(raptor_parser* rdf_parser,
rdf_parser->locator.column = 0;

*ptr = '\0';
if(raptor_ntriples_parse_line(rdf_parser,line_start,len))
if(raptor_ntriples_parse_line(rdf_parser, line_start, len, max_terms))
return 1;

rdf_parser->locator.line++;
Expand Down Expand Up @@ -950,6 +998,7 @@ raptor_ntriples_parse_start(raptor_parser* rdf_parser)
}


#ifdef RAPTOR_PARSER_NTRIPLES
static int
raptor_ntriples_parse_recognise_syntax(raptor_parser_factory* factory,
const unsigned char *buffer, size_t len,
Expand Down Expand Up @@ -1068,3 +1117,83 @@ raptor_init_parser_ntriples(raptor_world* world)
return !raptor_world_register_parser_factory(world,
&raptor_ntriples_parser_register_factory);
}

#endif


#ifdef RAPTOR_PARSER_NQUADS
static int
raptor_nquads_parse_recognise_syntax(raptor_parser_factory* factory,
const unsigned char *buffer, size_t len,
const unsigned char *identifier,
const unsigned char *suffix,
const char *mime_type)
{
int score = 0;

if(suffix) {
if(!strcmp((const char*)suffix, "nq"))
score = 2;

/* Explicitly refuse to do anything with N-Triples, Turtle or N3
* named content
*/
if(!strcmp((const char*)suffix, "nt") ||
!strcmp((const char*)suffix, "ttl") ||
!strcmp((const char*)suffix, "n3")) {
return 0;
}
}

if(mime_type) {
if(strstr((const char*)mime_type, "nquads"))
score += 2;
}

/* Do not guess on content since it looks so similar o N-Triples*/

return score;
}


static const char* const nquads_names[2] = { "nquads", NULL };

#define NQUADS_TYPES_COUNT 0
static const raptor_type_q nquads_types[NQUADS_TYPES_COUNT + 1] = {
{ NULL, 0, 0}
};

static int
raptor_nquads_parser_register_factory(raptor_parser_factory *factory)
{
int rc = 0;

factory->desc.names = nquads_names;

factory->desc.mime_types = nquads_types;
factory->desc.mime_types_count = NQUADS_TYPES_COUNT;

factory->desc.label = "N-Quads";
factory->desc.uri_string = "http://sw.deri.org/2008/07/n-quads/";

factory->desc.flags = 0;

factory->context_length = sizeof(raptor_ntriples_parser_context);

factory->init = raptor_ntriples_parse_init;
factory->terminate = raptor_ntriples_parse_terminate;
factory->start = raptor_ntriples_parse_start;
factory->chunk = raptor_ntriples_parse_chunk;
factory->recognise_syntax = raptor_nquads_parse_recognise_syntax;

return rc;
}


int
raptor_init_parser_nquads(raptor_world* world)
{
return !raptor_world_register_parser_factory(world,
&raptor_nquads_parser_register_factory);
}
#endif
1 change: 1 addition & 0 deletions src/raptor_internal.h
Expand Up @@ -805,6 +805,7 @@ int raptor_init_parser_guess(raptor_world* world);
int raptor_init_parser_rss(raptor_world* world);
int raptor_init_parser_rdfa(raptor_world* world);
int raptor_init_parser_json(raptor_world* world);
int raptor_init_parser_nquads(raptor_world* world);

void raptor_terminate_parser_grddl_common(raptor_world *world);

Expand Down
4 changes: 4 additions & 0 deletions src/raptor_parse.c
Expand Up @@ -129,6 +129,10 @@ raptor_parsers_init(raptor_world *world)
rc+= raptor_init_parser_json(world) != 0;
#endif

#ifdef RAPTOR_PARSER_NQUADS
rc+= raptor_init_parser_nquads(world) != 0;
#endif

return rc;
}

Expand Down
29 changes: 26 additions & 3 deletions tests/ntriples/Makefile.am
Expand Up @@ -28,20 +28,25 @@ NT_OUT_FILES=test.out
NT_BAD_TEST_FILES=bad-00.nt bad-01.nt bad-02.nt bad-03.nt bad-04.nt \
bad-05.nt bad-06.nt bad-07.nt

NQ_TEST_FILES=testnq-1.nq

NQ_OUT_FILES=testnq-1.out

# Used to make N-triples output consistent
BASE_URI=http://librdf.org/raptor/tests/

EXTRA_DIST = \
$(NT_TEST_FILES) \
$(NT_OUT_FILES) \
$(NT_BAD_TEST_FILES)
$(NT_BAD_TEST_FILES) \
$(NQ_TEST_FILES)


build-rapper:
@(cd $(top_builddir)/utils ; $(MAKE) rapper$(EXEEXT))

check-local: build-rapper \
check-nt check-bad-nt
check-nt check-bad-nt check-nq

check-nt: build-rapper $(NT_TEST_FILES)
@set +e; result=0; \
Expand Down Expand Up @@ -85,5 +90,23 @@ check-bad-nt: build-rapper $(NT_BAD_TEST_FILES)
done; \
set -e; exit $$result

check-nq: build-rapper $(NQ_TEST_FILES)
@set +e; result=0; \
$(ECHO) "Testing N-Quaads"; \
for test in $(NQ_TEST_FILES); do \
name=`basename $$test .nq` ; \
$(ECHO) $(ECHO_N) "Checking $$test $(ECHO_C)"; \
$(top_builddir)/utils/rapper -q -i nquads -o nquads file:$(srcdir)/$$test $(BASE_URI)$$test > $$name.res 2>/dev/null; \
if cmp $(srcdir)/$$name.out $$name.res >/dev/null 2>&1; then \
$(ECHO) "ok"; \
else \
$(ECHO) "FAILED"; \
diff $(srcdir)/$$name.out $$name.res; result=1; \
fi; \
rm -f $$name.res ; \
done; \
set -e; exit $$result

print-nt-test-files:
@echo $(NT_TEST_FILES) | tr ' ' '\012'
@echo $(NT_TEST_FILES) | tr ' ' '\012'; echo $(NQ_TEST_FILES) | tr ' ' '\012'

11 changes: 11 additions & 0 deletions tests/ntriples/testnq-1.nq
@@ -0,0 +1,11 @@
<http://example.org/alice/foaf.rdf#me> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://xmlns.com/foaf/0.1/Person> <http://example.org/alice/foaf.rdf> .
<http://example.org/alice/foaf.rdf#me> <http://xmlns.com/foaf/0.1/name> "Alice" <http://example.org/alice/foaf.rdf> .
<http://example.org/alice/foaf.rdf#me> <http://xmlns.com/foaf/0.1/knows> _:bnode1 <http://example.org/alice/foaf.rdf> .
_:bnode1 <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://xmlns.com/foaf/0.1/Person> <http://example.org/alice/foaf.rdf> .
_:bnode1 <http://xmlns.com/foaf/0.1/name> "Bob" <http://example.org/alice/foaf.rdf> .
_:bnode1 <http://xmlns.com/foaf/0.1/homepage> <http://example.org/bob/> <http://example.org/alice/foaf.rdf> .
_:bnode1 <http://www.w3. org/2000/01/rdf-schema#seeAlso> <http://example.org/bob/foaf.rdf> <http://example.org/alice/foaf.rdf> .

<http://example.org/bob/foaf.rdf#me> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://xmlns.com/foaf/0.1/Person> <http://example.org/bob/foaf.rdf> .
<http://example.org/bob/foaf.rdf#me> <http://xmlns.com/foaf/0.1/name> "Bob" <http://example.org/bob/foaf.rdf> .
<http://example.org/bob/foaf.rdf#me> <http://xmlns.com/foaf/0.1/homepage> <http://example.org/bob/> <http://example.org/bob/foaf.rdf> .
10 changes: 10 additions & 0 deletions tests/ntriples/testnq-1.out
@@ -0,0 +1,10 @@
<http://example.org/alice/foaf.rdf#me> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://xmlns.com/foaf/0.1/Person> <http://xmlns.com/foaf/0.1/Person> .
<http://example.org/alice/foaf.rdf#me> <http://xmlns.com/foaf/0.1/name> "Alice" "Alice" .
<http://example.org/alice/foaf.rdf#me> <http://xmlns.com/foaf/0.1/knows> _:bnode1 _:bnode1 .
_:bnode1 <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://xmlns.com/foaf/0.1/Person> <http://xmlns.com/foaf/0.1/Person> .
_:bnode1 <http://xmlns.com/foaf/0.1/name> "Bob" "Bob" .
_:bnode1 <http://xmlns.com/foaf/0.1/homepage> <http://example.org/bob/> <http://example.org/bob/> .
_:bnode1 <http://www.w3. org/2000/01/rdf-schema#seeAlso> <http://example.org/bob/foaf.rdf> <http://example.org/bob/foaf.rdf> .
<http://example.org/bob/foaf.rdf#me> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://xmlns.com/foaf/0.1/Person> <http://xmlns.com/foaf/0.1/Person> .
<http://example.org/bob/foaf.rdf#me> <http://xmlns.com/foaf/0.1/name> "Bob" "Bob" .
<http://example.org/bob/foaf.rdf#me> <http://xmlns.com/foaf/0.1/homepage> <http://example.org/bob/> <http://example.org/bob/> .

1 comment on commit f01ff7a

@artob
Copy link

@artob artob commented on f01ff7a Oct 22, 2010

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yay! Now I really must make some time to play with the latest Raptor...

Please sign in to comment.