From 85469ff265f32cfc1032b529e16135d12f507c61 Mon Sep 17 00:00:00 2001 From: Patrick Sinclair Date: Fri, 25 Jul 2008 09:20:55 +0100 Subject: [PATCH] Initial import of Treetop-based N3 parser --- lib/rena/n3_grammer.treetop | 125 ++++++++++++++++++++++++ lib/rena/n3parser.rb | 163 ++++++++++++++++++++++++++++++++ test/n3_tests/misc/on_now-01.n3 | 28 ++++++ test/n3_tests/misc/on_now-01.nt | 15 +++ test/n3_tests/n3p/simple-01.n3 | 1 + test/n3_tests/n3p/simple-01.nt | 0 test/n3_tests/n3p/simple-02.n3 | 4 + test/n3_tests/n3p/simple-02.nt | 0 test/n3_tests/n3p/simple-03.n3 | 5 + test/n3_tests/n3p/simple-03.nt | 1 + test/n3_tests/n3p/simple-04.n3 | 6 ++ test/n3_tests/n3p/simple-04.nt | 3 + test/n3_tests/n3p/simple-05.n3 | 7 ++ test/n3_tests/n3p/simple-05.nt | 2 + test/n3_tests/n3p/simple-06.n3 | 6 ++ test/n3_tests/n3p/simple-06.nt | 4 + test/n3_tests/n3p/simple-07.n3 | 7 ++ test/n3_tests/n3p/simple-07.nt | 6 ++ test/spec/n3parser.spec.rb | 41 ++++++++ 19 files changed, 424 insertions(+) create mode 100644 lib/rena/n3_grammer.treetop create mode 100644 lib/rena/n3parser.rb create mode 100644 test/n3_tests/misc/on_now-01.n3 create mode 100644 test/n3_tests/misc/on_now-01.nt create mode 100644 test/n3_tests/n3p/simple-01.n3 create mode 100644 test/n3_tests/n3p/simple-01.nt create mode 100644 test/n3_tests/n3p/simple-02.n3 create mode 100644 test/n3_tests/n3p/simple-02.nt create mode 100644 test/n3_tests/n3p/simple-03.n3 create mode 100644 test/n3_tests/n3p/simple-03.nt create mode 100644 test/n3_tests/n3p/simple-04.n3 create mode 100644 test/n3_tests/n3p/simple-04.nt create mode 100644 test/n3_tests/n3p/simple-05.n3 create mode 100644 test/n3_tests/n3p/simple-05.nt create mode 100644 test/n3_tests/n3p/simple-06.n3 create mode 100644 test/n3_tests/n3p/simple-06.nt create mode 100644 test/n3_tests/n3p/simple-07.n3 create mode 100644 test/n3_tests/n3p/simple-07.nt create mode 100644 test/spec/n3parser.spec.rb diff --git a/lib/rena/n3_grammer.treetop b/lib/rena/n3_grammer.treetop new file mode 100644 index 0000000..f5e3d31 --- /dev/null +++ b/lib/rena/n3_grammer.treetop @@ -0,0 +1,125 @@ +grammar N3Grammer + rule document + statements + end + + rule statements + (space / (statement / directive) space* ('.' space*)? )* + end + + rule statement + subject space+ property_list + end + + rule subject + node + end + + rule verb + ">-" prop "->" # has xxx of + / "<-" prop "<-" # is xxx of + # / # / operator # has operator:xxx of??? NOT IMPLMENTED + / prop # has xxx of -- shorthand + # / "has" prop # has xxx of + # / "is" prop "of" # is xxx of + / "a" # has rdf:type of + # / "=" # has daml:equivaent of + end + + rule prop + node + end + + rule node + uri_ref2 / anonnode / 'this' + end + + rule anonnode + "[" space* property_list space* "]" # something which ... + / "{" statementlist "}" # the statementlist itself as a resource + / "(" nodelist ")" { + def anonnode; true; end + } + end + + rule property_list + verb space+ object_list space* ";" space+ property_list + / verb space+ object_list + / ":-" anonnode #to allow two anonymous forms to be given eg [ a :Truth; :- { :sky :color :blue } ] ) + / ":-" anonnode ";" property_list + / '.' + end + + rule object_list + object "," space* object_list / object + end + + rule directive + '@prefix' space+ nprefix:nprefix? ':' space+ uri_ref2:uri_ref2 { + def directive; true; end + } + end + + rule uri_ref2 + qname / "<" uri:URI_Reference ">" + end + + rule qname + nprefix ":" localname / ':' localname + end + + rule object + subject + / string1 # " constant-value-with-escaping " + / string2 # """ constant value with escaping including single or double occurences of quotes and/or newlines """ + end + + rule localname + fragid + end + + rule URI_Reference + [^{}<>]* + end + + rule nprefix + ((alpha / "_") alphanumeric*) + end + + rule fragid + alpha alphanumeric* + end + + rule alpha + [a-zA-Z] + end + + rule alphanumeric + alpha / [0-9] / "_" + end + + rule space + [ \t\n\r]+ / comment + end + + rule comment + '#' (![\n\r] .)* + end + + rule string1 + '"' string1_char+ '"' + end + + rule string1_char + !["] . + end + + rule string2 + '"""' string2_char* '"""' + end + + rule string2_char + !'"""' . # something like this; need to think about it some more + end + +end diff --git a/lib/rena/n3parser.rb b/lib/rena/n3parser.rb new file mode 100644 index 0000000..bda99ee --- /dev/null +++ b/lib/rena/n3parser.rb @@ -0,0 +1,163 @@ +require 'rena/graph' +require 'treetop' + +Treetop.load(File.join(File.dirname(__FILE__), "n3_grammer")) + +class N3Parser + attr_accessor :graph + + def initialize(n3_str, uri=nil) + @uri = Addressable::URI.parse(uri) unless uri.nil? + parser = N3GrammerParser.new + document = parser.parse(n3_str) + if document + @graph = Graph.new + process_directives(document) + process_statements(document) + else + parser.terminal_failures.each do |tf| + puts "Expected #{tf.expected_string.inspect} (#{tf.index})- '#{string[tf.index,10].inspect}'" + end + end + end + + protected + + def process_directives(document) + directives = document.elements.find_all { |e| e.elements.first.respond_to? :directive } + directives.map! { |d| d.elements.first } + directives.each { |d| namespace(d.uri_ref2.uri.text_value, d.nprefix.text_value) } + end + + def namespace(uri, short) + short = '__local__' if short == '' + @graph.namespace(uri, short) + end + + def process_statements(document) + subjects = document.elements.find_all { |e| e.elements.first.respond_to? :subject } + subjects.map! { |s| s.elements.first } + subjects.each do |s| + subject = process_node(s.subject) + properties = process_properties(s.property_list) + properties.each do |p| + predicate = process_verb(p.verb) + objects = process_objects(p.object_list) + objects.each { |object| triple(subject, predicate, object) } + end + end + end + + def triple(subject, predicate, object) + @graph.add_triple(subject, predicate, object) + end + + def process_anonnode(anonnode) + bnode = BNode.new + properties = process_properties(anonnode.property_list) + properties.each do |p| + predicate = process_node(p.verb) + objects = process_objects(p.object_list) + objects.each { |object| triple(bnode, predicate, object) } + end + bnode + end + + def process_verb(verb) + return URIRef.new('http://www.w3.org/1999/02/22-rdf-syntax-ns#type') if (verb.text_value=='a') + return process_node(verb) + end + + def process_node(node) + if (node.respond_to? :uri) + URIRef.new(node.uri.text_value) + else + prefix = (node.respond_to? :nprefix) ? node.nprefix.text_value : nil + localname = node.localname.text_value + build_uri(prefix, localname) + end + end + + def process_properties(properties) + result = [] + result << properties if (properties.respond_to? :verb) + result << process_properties(properties.property_list) if (properties.respond_to? :property_list) + result.flatten + end + + def process_objects(objects) + result = [] + if (objects.respond_to? :object) + result << process_object(objects.object) + else + result << process_object(objects) + end + result << process_objects(objects.object_list) if (objects.respond_to? :object_list) + result.flatten + end + + def process_object(object) + if (object.respond_to? :localname or object.respond_to? :uri) + process_node(object) + elsif (object.respond_to? :property_list) + process_anonnode(object) + else + Literal.new(object.elements[1].text_value) + end + end + + # pp objects.object + # if (objects.respond_to? :object and objects.object.respond_to? :anonnode) + # pp 'foo' + # elsif (objects.respond_to? :object and objects.respond_to? :object_list) + # result << process_node(objects.object) + # result << process_objects(objects.object_list) + # elsif (objects.respond_to? :localname) + # result << process_node(objects) + # elsif (objects.respond_to? :uri) + # result << URIRef.new(objects.uri.text_value) + # else + # result << Literal.new(objects.elements[1].text_value) + # end + # result.flatten + # end + + def build_uri(prefix, localname) + prefix = '__local__' if prefix.nil? + if (prefix=='_') + BNode.new(localname) + else + @graph.nsbinding[prefix].send(localname) + end + end + + +end + +# # string = %[ +# # +# # +# # +# # +# # +# # +# # ] +# # parser = RdfXmlParser.new(string) +# # puts parser.graph.to_ntriples +# +# [ 'simple-01.n3', +# 'simple-02.n3', +# 'simple-03.n3', +# 'simple-04.n3', +# 'simple-05.n3', +# 'simple-06.n3', +# 'simple-07.n3', +# 'on_now-01.n3', +# ].each do |f| +# string = File.read("test/n3p_tests/#{f}") +# parser = N3Parser.new(string) +# puts f +# puts parser.graph.to_ntriples +# end diff --git a/test/n3_tests/misc/on_now-01.n3 b/test/n3_tests/misc/on_now-01.n3 new file mode 100644 index 0000000..754f3b2 --- /dev/null +++ b/test/n3_tests/misc/on_now-01.n3 @@ -0,0 +1,28 @@ +@prefix dc: . +@prefix po: . +@prefix rdf: . +_:broadcast + a po:Broadcast; + po:schedule_date """2008-06-24T12:00:00Z"""; + po:broadcast_of _:version; + po:broadcast_on ; +. +_:version + a po:Version; +. + + dc:title """Nemone"""; + a po:Brand; +. + + a po:Episode; + po:episode ; + po:version _:version; + po:long_synopsis """Actor and comedian Rhys Darby chats to Nemone."""; + dc:title """Nemone"""; + po:synopsis """Actor and comedian Rhys Darby chats to Nemone."""; +. + + a po:Service; + dc:title """BBC 6 Music"""; +. \ No newline at end of file diff --git a/test/n3_tests/misc/on_now-01.nt b/test/n3_tests/misc/on_now-01.nt new file mode 100644 index 0000000..cba243e --- /dev/null +++ b/test/n3_tests/misc/on_now-01.nt @@ -0,0 +1,15 @@ +_:broadcast . +_:broadcast "2008-06-24T12:00:00Z" . +_:broadcast _:version . +_:broadcast . +_:version . + "Nemone" . + . + . + . + _:version . + "Actor and comedian Rhys Darby chats to Nemone." . + "Nemone" . + "Actor and comedian Rhys Darby chats to Nemone." . + . + "BBC 6 Music" . diff --git a/test/n3_tests/n3p/simple-01.n3 b/test/n3_tests/n3p/simple-01.n3 new file mode 100644 index 0000000..abab6df --- /dev/null +++ b/test/n3_tests/n3p/simple-01.n3 @@ -0,0 +1 @@ +# simple-01.n3 - Empty file diff --git a/test/n3_tests/n3p/simple-01.nt b/test/n3_tests/n3p/simple-01.nt new file mode 100644 index 0000000..e69de29 diff --git a/test/n3_tests/n3p/simple-02.n3 b/test/n3_tests/n3p/simple-02.n3 new file mode 100644 index 0000000..50e3781 --- /dev/null +++ b/test/n3_tests/n3p/simple-02.n3 @@ -0,0 +1,4 @@ +# simple-02.n3 - Prefix and Keywords + +@prefix : <#> . +@prefix foaf: . diff --git a/test/n3_tests/n3p/simple-02.nt b/test/n3_tests/n3p/simple-02.nt new file mode 100644 index 0000000..e69de29 diff --git a/test/n3_tests/n3p/simple-03.n3 b/test/n3_tests/n3p/simple-03.n3 new file mode 100644 index 0000000..e57b2a1 --- /dev/null +++ b/test/n3_tests/n3p/simple-03.n3 @@ -0,0 +1,5 @@ +# simple-03.n3 - Simple triple + +@prefix foaf: . + +_:Sean foaf:name "Sean B. Palmer" . diff --git a/test/n3_tests/n3p/simple-03.nt b/test/n3_tests/n3p/simple-03.nt new file mode 100644 index 0000000..a467a53 --- /dev/null +++ b/test/n3_tests/n3p/simple-03.nt @@ -0,0 +1 @@ +_:Sean "Sean B. Palmer" . diff --git a/test/n3_tests/n3p/simple-04.n3 b/test/n3_tests/n3p/simple-04.n3 new file mode 100644 index 0000000..629636f --- /dev/null +++ b/test/n3_tests/n3p/simple-04.n3 @@ -0,0 +1,6 @@ +# simple-04.n3 - Multiple objects + +@prefix : . + +:Subject :predicate :ObjectP, :ObjectQ, + :ObjectR . diff --git a/test/n3_tests/n3p/simple-04.nt b/test/n3_tests/n3p/simple-04.nt new file mode 100644 index 0000000..5229a05 --- /dev/null +++ b/test/n3_tests/n3p/simple-04.nt @@ -0,0 +1,3 @@ + . + . + . diff --git a/test/n3_tests/n3p/simple-05.n3 b/test/n3_tests/n3p/simple-05.n3 new file mode 100644 index 0000000..b4174db --- /dev/null +++ b/test/n3_tests/n3p/simple-05.n3 @@ -0,0 +1,7 @@ +# simple-05.n3 - Popairs + +@prefix : <#> . +@prefix foaf: . + +_:Sean foaf:name "Sean B. Palmer"; + foaf:homepage . diff --git a/test/n3_tests/n3p/simple-05.nt b/test/n3_tests/n3p/simple-05.nt new file mode 100644 index 0000000..baa3981 --- /dev/null +++ b/test/n3_tests/n3p/simple-05.nt @@ -0,0 +1,2 @@ +_:Sean "Sean B. Palmer" . +_:Sean . diff --git a/test/n3_tests/n3p/simple-06.n3 b/test/n3_tests/n3p/simple-06.n3 new file mode 100644 index 0000000..33c0519 --- /dev/null +++ b/test/n3_tests/n3p/simple-06.n3 @@ -0,0 +1,6 @@ +# simple-06.n3 - Popairs and multiple objects + +@prefix : . + +:Subject :predicateOne :ObjectA, :ObjectB; + :predicateTwo :ObjectC, :ObjectD . diff --git a/test/n3_tests/n3p/simple-06.nt b/test/n3_tests/n3p/simple-06.nt new file mode 100644 index 0000000..6ea34b4 --- /dev/null +++ b/test/n3_tests/n3p/simple-06.nt @@ -0,0 +1,4 @@ + . + . + . + . diff --git a/test/n3_tests/n3p/simple-07.n3 b/test/n3_tests/n3p/simple-07.n3 new file mode 100644 index 0000000..231daf5 --- /dev/null +++ b/test/n3_tests/n3p/simple-07.n3 @@ -0,0 +1,7 @@ +# simple-07.n3 - Popairs and multiple objects, mixed types + +@prefix : . +@prefix foaf: . + +:Subject :predicateOne :ObjectP, ; + :predicateTwo [ foaf:nick "spiggot" ], "ObjectR", _:ObjectS . diff --git a/test/n3_tests/n3p/simple-07.nt b/test/n3_tests/n3p/simple-07.nt new file mode 100644 index 0000000..f68c516 --- /dev/null +++ b/test/n3_tests/n3p/simple-07.nt @@ -0,0 +1,6 @@ + . + . + _:node1 . +_:node1 "spiggot" . + "ObjectR" . + _:ObjectS . diff --git a/test/spec/n3parser.spec.rb b/test/spec/n3parser.spec.rb new file mode 100644 index 0000000..084e02e --- /dev/null +++ b/test/spec/n3parser.spec.rb @@ -0,0 +1,41 @@ +require 'lib/rena' + +describe "N3 parser" do + + # n3p tests taken from http://inamidst.com/n3p/test/ + describe "parsing n3p test" do + dir_name = File.join(File.dirname(__FILE__), '..', 'n3_tests', 'n3p', '*.n3') + Dir.glob(dir_name).each do |n3| + it n3 do + test_file(n3) + end + end + end + + describe "parsing misc tests" do + dir_name = File.join(File.dirname(__FILE__), '..', 'n3_tests', 'misc', '*.n3') + Dir.glob(dir_name).each do |n3| + it n3 do + test_file(n3) + end + end + end + + def test_file(filepath) + n3_string = File.read(filepath) + parser = N3Parser.new(n3_string) + ntriples = parser.graph.to_ntriples + ntriples.gsub!(/_:bn\d+/, '_:node1') + ntriples = sort_ntriples(ntriples) + + nt_string = File.read(filepath.sub('.n3', '.nt')) + nt_string = sort_ntriples(nt_string) + + ntriples.should == nt_string + end + + def sort_ntriples(string) + string.split("\n").sort.join("\n") + end + +end \ No newline at end of file