diff --git a/.gitignore b/.gitignore index 8fbce390..2ee3c973 100644 --- a/.gitignore +++ b/.gitignore @@ -2,9 +2,14 @@ ext/redcloth_scan/Makefile ext/redcloth_scan/*.[co] ext/redcloth_scan/*.bundle ext/redcloth_scan/*.so +ext/redcloth_scan/*.jar +ext/redcloth_scan/*.class +ext/redcloth_scan/*.java lib/*.bundle lib/*.so +lib/*.jar doc/rdoc/* tmp/* pkg/* -.DS_Store \ No newline at end of file +.DS_Store +*.log diff --git a/Rakefile b/Rakefile index 3fb84820..84d711b6 100644 --- a/Rakefile +++ b/Rakefile @@ -251,11 +251,11 @@ end # Here are the jruby tasks, stolen from Hpricot. If this jruby version catches on, I'd like these tasks to be unified with the C tasks and use echoe's platform detection, like Mongrel. namespace "jruby" do - + def ant(*args) system "ant #{args.join(' ')}" end - + desc "Installs jruby in a subdirectory of ./test/" task :install do sh %{svn export http://svn.codehaus.org/jruby/trunk/jruby test/jruby} @@ -265,26 +265,28 @@ namespace "jruby" do sh %{jruby -S gem install rake} Rake::Task['add_path'].invoke end - + desc "Adds jruby to your PATH" task :add_path do ENV['PATH'] = ENV['PATH'] + ":" + File.join(File.dirname(__FILE__), "test/jruby/bin") end - # Java only supports the table-driven code - # generation style at this point. + # Java only supports the table-driven code + # generation style at this point. desc "Generates the Java scanner code using the Ragel table-driven code generation style." task :ragel_java => [:ragel_version] do - ensure_ragel_version("RedClothScanService.java") do + ensure_ragel_version("RedclothScanService.java") do puts "compiling with ragel version #{@ragel_v}" - sh %{ragel -J -o ext/redcloth_scan/RedClothScanService.java ext/redcloth_scan/redcloth_scan.java.rl} + sh %{ragel -J -o ext/redcloth_scan/RedclothScanService.java ext/redcloth_scan/redcloth_scan.java.rl} + sh %{ragel -J -o ext/redcloth_scan/RedclothAttributes.java ext/redcloth_scan/redcloth_attributes.java.rl} + sh %{ragel -J -o ext/redcloth_scan/RedclothInline.java ext/redcloth_scan/redcloth_inline.java.rl} end end - def java_classpath_arg + def java_classpath_arg # A myriad of ways to discover the JRuby classpath classpath = begin - require 'java' + require 'java' # Already running in a JRuby JVM Java::java.lang.System.getProperty('java.class.path') rescue LoadError @@ -292,17 +294,6 @@ namespace "jruby" do end classpath ? "-cp #{classpath}" : "" end - - def compile_java(filename, jarname) - sh %{javac -source 1.4 -target 1.4 #{java_classpath_arg} #{filename}} - sh %{jar cf #{jarname} *.class} - end - - task :redcloth_scan_java => [:ragel_java] do - Dir.chdir "ext/redcloth_scan" do - compile_java("RedClothScanService.java", "redcloth_scan.jar") - end - end JRubySpec = spec.dup JRubySpec.platform = 'jruby' @@ -316,6 +307,25 @@ namespace "jruby" do sh "tar zxf pkg/#{PKG}.tgz" mv PKG, JRUBY_PKG_DIR end + + def compile_java(filenames, jarname) + sh %{javac -source 1.5 -target 1.5 #{java_classpath_arg} #{filenames.join(" ")}} + sh %{jar cf #{jarname} *.class} + end + + task :redcloth_scan_java => [:ragel_java] do + Dir.chdir "ext/redcloth_scan" do + compile_java(["RedclothAttributes.java", "RedclothInline.java", "RedclothScanService.java"], "redcloth_scan.jar") + end + cp "ext/redcloth_scan/redcloth_scan.jar", "lib" + end + + desc "Run all the tests using JRuby" + Rake::TestTask.new(:test => [:redcloth_scan_java]) do |t| + t.libs << "test" + t.test_files = FileList['test/test_*.rb'] + t.verbose = true + end desc "Build the RubyGems package for JRuby" task :package_jruby => JRUBY_PKG_DIR do @@ -340,4 +350,4 @@ def ensure_ragel_version(name) STDERR.puts "Ragel 6.3 or greater is required to generate #{name}." exit(1) end -end \ No newline at end of file +end diff --git a/ext/redcloth_scan/redcloth_attributes.java.rl b/ext/redcloth_scan/redcloth_attributes.java.rl new file mode 100644 index 00000000..34cba471 --- /dev/null +++ b/ext/redcloth_scan/redcloth_attributes.java.rl @@ -0,0 +1,118 @@ +/* + * redcloth_attributes.rl + * + * Copyright (C) 2008 Jason Garber + */ +import java.io.IOException; + +import org.jruby.Ruby; +import org.jruby.RubyArray; +import org.jruby.RubyClass; +import org.jruby.RubyHash; +import org.jruby.RubyModule; +import org.jruby.RubyNumeric; +import org.jruby.RubyObject; +import org.jruby.RubyString; +import org.jruby.RubySymbol; +import org.jruby.anno.JRubyMethod; +import org.jruby.runtime.Block; +import org.jruby.runtime.CallbackFactory; +import org.jruby.runtime.builtin.IRubyObject; +import org.jruby.exceptions.RaiseException; +import org.jruby.runtime.load.BasicLibraryService; + +import org.jruby.util.ByteList; + +public class RedclothAttributes extends RedclothScanService.Base { + +%%{ + + machine redcloth_attributes; + include redcloth_common "redcloth_common.java.rl"; + + C2_CLAS = ( "(" ( [^)#]+ >A %{ STORE("class_buf"); } )? ("#" [^)]+ >A %{STORE("id_buf");} )? ")" ) ; + C2_LNGE = ( "[" [^\]]+ >A %{ STORE("lang_buf"); } "]" ) ; + C2_STYL = ( "{" [^}]+ >A %{ STORE("style_buf"); } "}" ) ; + C2 = ( C2_CLAS | C2_STYL | C2_LNGE )+ ; + + mtext_with_attributes = ( C2 mtext >A %T ) >X ; + + inline := |* + + mtext_with_attributes { SET_ATTRIBUTES(); } ; + + *|; + + link_text_with_attributes = C2 "."* " "* ( mtext+ ) >A %{ STORE("name"); } ; + link_text_without_attributes = ( mtext+ ) >B %{ STORE_B("name_without_attributes"); } ; + + link_says := |* + + link_text_with_attributes { SET_ATTRIBUTES(); } ; + link_text_without_attributes { SET_ATTRIBUTE("name_without_attributes", "name"); } ; + + *|; + +}%% + +%% write data nofinal; + + public void SET_ATTRIBUTES() { + SET_ATTRIBUTE("class_buf", "class"); + SET_ATTRIBUTE("id_buf", "id"); + SET_ATTRIBUTE("lang_buf", "lang"); + SET_ATTRIBUTE("style_buf", "style"); + } + + public void SET_ATTRIBUTE(String B, String A) { + buf = ((RubyHash)regs).aref(runtime.newSymbol(B)); + if(!buf.isNil()) { + ((RubyHash)regs).aset(runtime.newSymbol(A), buf); + } + } + + private int machine; + private IRubyObject buf; + + public RedclothAttributes(int machine, IRubyObject self, byte[] data, int p, int pe) { + this.runtime = self.getRuntime(); + this.self = self; + + // This is GROSS but necessary for EOF matching + this.data = new byte[pe+1]; + System.arraycopy(data, p, this.data, 0, pe); + this.data[pe] = 0; + + this.p = 0; + this.pe = pe+1; + this.eof = this.pe; + this.orig_p = 0; + this.orig_pe = this.pe; + + this.regs = RubyHash.newHash(runtime); + this.buf = runtime.getNil(); + this.machine = machine; + } + + public IRubyObject parse() { + %% write init; + + cs = machine; + + %% write exec; + + return regs; + } + + public static IRubyObject attributes(IRubyObject self, IRubyObject str) { + ByteList bl = str.convertToString().getByteList(); + int cs = redcloth_attributes_en_inline; + return new RedclothAttributes(cs, self, bl.bytes, bl.begin, bl.realSize).parse(); + } + + public static IRubyObject link_attributes(IRubyObject self, IRubyObject str) { + ByteList bl = str.convertToString().getByteList(); + int cs = redcloth_attributes_en_link_says; + return new RedclothAttributes(cs, self, bl.bytes, bl.begin, bl.realSize).parse(); + } +} diff --git a/ext/redcloth_scan/redcloth_common.java.rl b/ext/redcloth_scan/redcloth_common.java.rl new file mode 100644 index 00000000..a1c21116 --- /dev/null +++ b/ext/redcloth_scan/redcloth_common.java.rl @@ -0,0 +1,121 @@ +%%{ + + machine redcloth_common; + + action A { reg = p; } + action B { bck = p; } + action T { STORE("text"); } + action X { CLEAR_REGS(); reg = -1; } + action cat { CAT(block); } + action esc { strCatEscaped(self, block, data, ts, te); } + action esc_pre { strCatEscapedForPreformatted(self, block, data, ts, te); } + action ignore { ((RubyString)block).append(self.callMethod(runtime.getCurrentContext(), "ignore", regs)); } + + # simple + LF = ( '\n' ) ; + default = ^0 ; + EOF = 0 ; + + # textile modifiers + A_LEFT = "<" %{ ASET("align", "left"); } ; + A_RIGHT = ">" %{ ASET("align", "right"); } ; + A_JUSTIFIED = "<>" %{ ASET("align", "justify"); } ; + A_CENTER = "=" %{ ASET("align", "center"); } ; + A_PADLEFT = "(" >A %{ AINC("padding-left"); } ; + A_PADRIGHT = ")" >A %{ AINC("padding-right"); } ; + A_HLGN = ( A_LEFT | A_RIGHT | A_JUSTIFIED | A_CENTER | A_PADLEFT | A_PADRIGHT ) ; + A_LIMIT = ( A_LEFT | A_CENTER | A_RIGHT ) ; + A_VLGN = ( "-" %{ ASET("vertical-align", "middle"); } | "^" %{ ASET("vertical-align", "top"); } | "~" %{ ASET("vertical-align", "bottom"); } ) ; + C_CLAS = ( "(" ( [^)#]+ >A %{ STORE("class"); } )? ("#" [^)]+ >A %{STORE("id");} )? ")" ) ; + C_LNGE = ( "[" [^\]]+ >A %{ STORE("lang"); } "]" ) ; + C_STYL = ( "{" [^}]+ >A %{ STORE("style"); } "}" ) ; + S_CSPN = ( "\\" [0-9]+ >A %{ STORE("colspan"); } ) ; + S_RSPN = ( "/" [0-9]+ >A %{ STORE("rowspan"); } ) ; + D_HEADER = "_" %{ ASET("th", "true"); } ; + A = ( ( A_HLGN | A_VLGN )* ) ; + A2 = ( A_LIMIT? ) ; + S = ( S_CSPN | S_RSPN )* ; + C = ( C_CLAS | C_STYL | C_LNGE )* ; + D = ( D_HEADER ) ; + N_CONT = "_" %{ list_continue = 1; }; + N_NUM = digit+ >A %{ STORE("start"); }; + N = ( N_CONT | N_NUM )? ; + PUNCT = ( "!" | '"' | "#" | "$" | "%" | "&" | "'" | "," | "-" | "." | "/" | ":" | ";" | "=" | "?" | "\\" | "^" | "`" | "|" | "~" | "[" | "]" | "(" | ")" | "<" ) ; + dotspace = ("." " "*) ; + indent = [ \t]* ; + + # very un-DRY; Adrian says an action-stripping macro will come in a future Ragel version + A_LEFT_noactions = "<" ; + A_RIGHT_noactions = ">" ; + A_JUSTIFIED_noactions = "<>" ; + A_CENTER_noactions = "=" ; + A_PADLEFT_noactions = "(" ; + A_PADRIGHT_noactions = ")" ; + A_HLGN_noactions = ( A_LEFT_noactions | A_RIGHT_noactions | A_JUSTIFIED_noactions | A_CENTER_noactions | A_PADLEFT_noactions | A_PADRIGHT_noactions ) ; + A_VLGN_noactions = ( "-" | "^" | "~" ) ; + C_CLAS_noactions = ( "(" ( [^)#]+ )? ("#" [^)]+ )? ")" ) ; + C_LNGE_noactions = ( "[" [^\]]+ "]" ) ; + C_STYL_noactions = ( "{" [^}]+ "}" ) ; + A_noactions = ( ( A_HLGN_noactions | A_VLGN_noactions )* ) ; + C_noactions = ( C_CLAS_noactions | C_STYL_noactions | C_LNGE_noactions )* ; + C_noquotes_noactions = C_noactions -- '"' ; + + # text blocks + trailing = PUNCT - ("'" | '"') ; + chars = (default - space)+ ; + phrase = chars -- trailing ; + + # html tags (from Hpricot) + NameChar = [\-A-Za-z0-9._:?] ; + Name = [A-Za-z_:] NameChar* ; + NameAttr = NameChar+ ; + Q1Attr = [^']* ; + Q2Attr = [^"]* ; + UnqAttr = ( space | [^ \t\r\n<>"'] [^ \t\r\n<>]* ) ; + Nmtoken = NameChar+ ; + Attr = NameAttr space* "=" space* ('"' Q2Attr '"' | "'" Q1Attr "'" | UnqAttr space+ ) space* ; + AttrEnd = ( NameAttr space* "=" space* UnqAttr? | Nmtoken ) ; + AttrSet = ( Attr | Nmtoken space+ ) ; + + script_tag_start = ( "]* ">" ) >X >A %T ; + script_tag_end = ( "" >A %T LF? ) >X ; + + + # URI tokens (lifted from Mongrel) + CTL = (cntrl | 127); + safe = ("$" | "-" | "_" | "."); + extra = ("!" | "*" | "'" | "(" | ")" | "," | "#"); + reserved = (";" | "/" | "?" | ":" | "@" | "&" | "=" | "+"); + unsafe = (CTL | " " | "\"" | "%" | "<" | ">"); + national = any -- (alpha | digit | reserved | extra | safe | unsafe); + unreserved = (alpha | digit | safe | extra | national); + escape = ("%" xdigit xdigit); + uchar = (unreserved | escape); + pchar = (uchar | ":" | "@" | "&" | "=" | "+"); + scheme = ( alpha | digit | "+" | "-" | "." )+ ; + absolute_uri = (scheme ":" (uchar | reserved )*); + safepath = (pchar* (alpha | digit | safe) pchar*) ; + path = (safepath ( "/" pchar* )*) ; + query = ( uchar | reserved )* ; + param = ( pchar | "/" )* ; + params = (param ( ";" param )*) ; + rel_path = (path (";" params)?) ("?" query)?; + absolute_path = ("/"+ rel_path?); + target = ("#" pchar*) ; + uri = (target | absolute_uri | absolute_path | rel_path) ; + + # common + title = ( '(' default+ >A %{ STORE("title"); } :> ')' ) ; + word = ( alnum | safe | " " ) ; + mspace = ( ( " " | "\t" | LF )+ ) -- LF{2} ; + mtext = ( chars (mspace chars)* ) ; + + # conditionals + action starts_line { + p == orig_p || data[(p-1)] == '\r' || data[(p-1)] == '\n' || data[(p-1)] == '\f' + } + action starts_phrase { + p == orig_p || data[(p-1)] == '\r' || data[(p-1)] == '\n' || data[(p-1)] == '\f' || data[(p-1)] == ' ' + } + +}%%; diff --git a/ext/redcloth_scan/redcloth_inline.java.rl b/ext/redcloth_scan/redcloth_inline.java.rl new file mode 100644 index 00000000..77439c7c --- /dev/null +++ b/ext/redcloth_scan/redcloth_inline.java.rl @@ -0,0 +1,254 @@ +/* + * redcloth_inline.rl + * + * Copyright (C) 2008 Jason Garber + */ +import java.io.IOException; + +import org.jruby.Ruby; +import org.jruby.RubyArray; +import org.jruby.RubyClass; +import org.jruby.RubyHash; +import org.jruby.RubyModule; +import org.jruby.RubyNumeric; +import org.jruby.RubyObject; +import org.jruby.RubyString; +import org.jruby.RubySymbol; +import org.jruby.anno.JRubyMethod; +import org.jruby.runtime.Block; +import org.jruby.runtime.CallbackFactory; +import org.jruby.runtime.builtin.IRubyObject; +import org.jruby.exceptions.RaiseException; +import org.jruby.runtime.load.BasicLibraryService; + +import org.jruby.util.ByteList; + +public class RedclothInline extends RedclothScanService.Base { + +%%{ + + machine redcloth_inline; + include redcloth_common "redcloth_common.java.rl"; + + # links + mtext_noquotes = mtext -- '"' ; + quoted_mtext = '"' mtext_noquotes '"' ; + mtext_including_quotes = (mtext_noquotes ' "' mtext_noquotes '" ' mtext_noquotes?)+ ; + link_says = ( C_noactions "."* " "* ((quoted_mtext | mtext_including_quotes | mtext_noquotes) -- '":') ) >A %{ STORE("link_text"); } ; + link_says_noquotes_noactions = ( C_noquotes_noactions "."* " "* ((mtext_noquotes) -- '":') ) ; + link = ( '"' link_says '":' %A uri %{ STORE_URL("href"); } ) >X ; + link_noquotes_noactions = ( '"' link_says_noquotes_noactions '":' uri ) ; + bracketed_link = ( '["' link_says '":' %A uri %{ STORE("href"); } :> "]" ) >X ; + + # images + image_src = ( uri ) >A %{ STORE("src"); } ; + image_is = ( A2 C ". "? image_src :> title? ) ; + image_link = ( ":" uri >A %{ STORE_URL("href"); } ) ; + image = ( "["? "!" image_is "!" %A image_link? "]"? ) >X ; + + # footnotes + footno = "[" >X %A digit+ %T "]" ; + + # markup + end_markup_phrase = (" " | PUNCT | EOF | LF) @{ fhold; }; + code = "["? "@" >X mtext >A %T :> "@" "]"? ; + code_tag_start = "]* ">" ; + code_tag_end = "" ; + script_tag = ( "]* ">" (default+ -- "") "" LF? ) >X >A %T ; + notextile = "" >X (default+ -- "") >A %T ""; + strong = "["? "*" >X mtext >A %T :> "*" "]"? ; + b = "["? "**" >X mtext >A %T :> "**" "]"? ; + em = "["? "_" >X mtext >A %T :> "_" "]"? ; + i = "["? "__" >X mtext >A %T :> "__" "]"? ; + del = "[-" >X C ( mtext ) >A %T :>> "-]" ; + emdash_parenthetical_phrase_with_spaces = " -- " mtext " -- " ; + del_phrase = (( " " >A %{ STORE("beginning_space"); } "-") >X C ( mtext ) >A %T :>> ( "-" end_markup_phrase )) - emdash_parenthetical_phrase_with_spaces ; + ins = "["? "+" >X mtext >A %T :> "+" "]"? ; + sup = "[^" >X mtext >A %T :> "^]" ; + sup_phrase = ( "^" when starts_phrase) >X ( mtext ) >A %T :>> ( "^" end_markup_phrase ) ; + sub = "[~" >X mtext >A %T :> "~]" ; + sub_phrase = ( "~" when starts_phrase) >X ( mtext ) >A %T :>> ( "~" end_markup_phrase ) ; + span = "[%" >X mtext >A %T :> "%]" ; + span_phrase = (("%" when starts_phrase) >X ( mtext ) >A %T :>> ( "%" end_markup_phrase )) ; + cite = "["? "??" >X mtext >A %T :> "??" "]"? ; + ignore = "["? "==" >X %A mtext %T :> "==" "]"? ; + snip = "["? "```" >X %A mtext %T :> "```" "]"? ; + + # quotes + quote1 = "'" >X %A mtext %T :> "'" ; + non_quote_chars_or_link = (chars -- '"') | link_noquotes_noactions ; + mtext_inside_quotes = ( non_quote_chars_or_link (mspace non_quote_chars_or_link)* ) ; + html_tag_up_to_attribute_quote = "<" Name space+ NameAttr space* "=" space* ; + quote2 = ('"' >X %A ( mtext_inside_quotes - (mtext_inside_quotes html_tag_up_to_attribute_quote ) ) %T :> '"' ) ; + multi_paragraph_quote = (('"' when starts_line) >X %A ( chars -- '"' ) %T ); + + # html + start_tag = ( "<" Name space+ AttrSet* (AttrEnd)? ">" | "<" Name ">" ) >X >A %T ; + empty_tag = ( "<" Name space+ AttrSet* (AttrEnd)? "/>" | "<" Name "/>" ) >X >A %T ; + end_tag = ( "" ) >X >A %T ; + html_comment = ("") >X >A %T; + + # glyphs + ellipsis = ( " "? >A %T "..." ) >X ; + emdash = "--" ; + arrow = "->" ; + endash = " - " ; + acronym = ( [A-Z] >A [A-Z0-9]{2,} %T "(" default+ >A %{ STORE("title"); } :> ")" ) >X ; + caps_noactions = upper{3,} ; + caps = ( caps_noactions >A %*T ) >X ; + dim_digit = [0-9.]+ ; + prime = ("'" | '"')?; + dim_noactions = dim_digit prime (("x" | " x ") dim_digit prime) %T (("x" | " x ") dim_digit prime)? ; + dim = dim_noactions >X >A %T ; + tm = [Tt] [Mm] ; + trademark = " "? ( "[" tm "]" | "(" tm ")" ) ; + reg = [Rr] ; + registered = " "? ( "[" reg "]" | "(" reg ")" ) ; + cee = [Cc] ; + copyright = ( "[" cee "]" | "(" cee ")" ) ; + entity = ( "&" %A ( '#' digit+ | ( alpha ( alpha | digit )+ ) ) %T ';' ) >X ; + + # info + redcloth_version = "[RedCloth::VERSION]" ; + + other_phrase = phrase -- dim_noactions; + + code_tag := |* + code_tag_end { CAT(block); fgoto main; }; + default => esc_pre; + *|; + + main := |* + + image { INLINE(block, "image"); }; + + link { PARSE_LINK_ATTR("link_text"); PASS(block, "name", "link"); }; + bracketed_link { PARSE_LINK_ATTR("link_text"); PASS(block, "name", "link"); }; + + code { PARSE_ATTR("text"); PASS_CODE(block, "text", "code", opts); }; + code_tag_start { CAT(block); fgoto code_tag; }; + notextile { INLINE(block, "notextile"); }; + strong { PARSE_ATTR("text"); PASS(block, "text", "strong"); }; + b { PARSE_ATTR("text"); PASS(block, "text", "b"); }; + em { PARSE_ATTR("text"); PASS(block, "text", "em"); }; + i { PARSE_ATTR("text"); PASS(block, "text", "i"); }; + del { PASS(block, "text", "del"); }; + del_phrase { PASS(block, "text", "del_phrase"); }; + ins { PARSE_ATTR("text"); PASS(block, "text", "ins"); }; + sup { PARSE_ATTR("text"); PASS(block, "text", "sup"); }; + sup_phrase { PARSE_ATTR("text"); PASS(block, "text", "sup_phrase"); }; + sub { PARSE_ATTR("text"); PASS(block, "text", "sub"); }; + sub_phrase { PARSE_ATTR("text"); PASS(block, "text", "sub_phrase"); }; + span { PARSE_ATTR("text"); PASS(block, "text", "span"); }; + span_phrase { PARSE_ATTR("text"); PASS(block, "text", "span_phrase"); }; + cite { PARSE_ATTR("text"); PASS(block, "text", "cite"); }; + ignore => ignore; + snip { PASS(block, "text", "snip"); }; + quote1 { PASS(block, "text", "quote1"); }; + quote2 { PASS(block, "text", "quote2"); }; + multi_paragraph_quote { PASS(block, "text", "multi_paragraph_quote"); }; + + ellipsis { INLINE(block, "ellipsis"); }; + emdash { INLINE(block, "emdash"); }; + endash { INLINE(block, "endash"); }; + arrow { INLINE(block, "arrow"); }; + caps { INLINE(block, "caps"); }; + acronym { INLINE(block, "acronym"); }; + dim { INLINE(block, "dim"); }; + trademark { INLINE(block, "trademark"); }; + registered { INLINE(block, "registered"); }; + copyright { INLINE(block, "copyright"); }; + footno { PASS(block, "text", "footno"); }; + entity { INLINE(block, "entity"); }; + + script_tag { INLINE(block, "inline_html"); }; + start_tag { INLINE(block, "inline_html"); }; + end_tag { INLINE(block, "inline_html"); }; + empty_tag { INLINE(block, "inline_html"); }; + html_comment { INLINE(block, "inline_html"); }; + + redcloth_version { INLINE(block, "inline_redcloth_version"); }; + + other_phrase => esc; + PUNCT => esc; + space => esc; + + EOF; + + *|; + +}%% + +%% write data nofinal; + + public IRubyObject red_pass_code(IRubyObject self, IRubyObject regs, IRubyObject ref, String meth) { + IRubyObject txt = ((RubyHash)regs).aref(ref); + if(!txt.isNil()) { + IRubyObject txt2 = RubyString.newEmptyString(runtime); + strCatEscapedForPreformatted(self, txt2, ((RubyString)txt).getByteList().bytes, ((RubyString)txt).getByteList().begin, ((RubyString)txt).getByteList().begin + ((RubyString)txt).getByteList().realSize); + ((RubyHash)regs).aset(ref, txt2); + } + return self.callMethod(runtime.getCurrentContext(), meth, regs); + } + + public IRubyObject red_parse_attr(IRubyObject self, IRubyObject regs, IRubyObject ref) { + IRubyObject txt = ((RubyHash)regs).aref(ref); + IRubyObject new_regs = RedclothAttributes.attributes(self, txt); + return regs.callMethod(runtime.getCurrentContext(), "update", new_regs); + } + + public IRubyObject red_parse_link_attr(IRubyObject self, IRubyObject regs, IRubyObject ref) { + IRubyObject txt = ((RubyHash)regs).aref(ref); + IRubyObject new_regs = RedclothAttributes.link_attributes(self, txt); + return regs.callMethod(runtime.getCurrentContext(), "update", new_regs); + } + + public void PASS_CODE(IRubyObject H, String A, String T, int O) { + ((RubyString)H).append(red_pass_code(self, regs, runtime.newSymbol(A), T)); + } + + public void PARSE_ATTR(String A) { + red_parse_attr(self, regs, runtime.newSymbol(A)); + } + + public void PARSE_LINK_ATTR(String A) { + red_parse_link_attr(self, regs, runtime.newSymbol(A)); + } + + private int opts; + private IRubyObject buf; + + public RedclothInline(IRubyObject self, byte[] data, int p, int pe, IRubyObject refs) { + this.runtime = self.getRuntime(); + this.self = self; + + // This is GROSS but necessary for EOF matching + this.data = new byte[pe+1]; + System.arraycopy(data, p, this.data, 0, pe); + this.data[pe] = 0; + + this.p = 0; + this.pe = pe+1; + this.eof = this.pe; + this.orig_p = 0; + this.orig_pe = this.pe; + this.refs = refs; + this.block = RubyString.newEmptyString(runtime); + this.regs = runtime.getNil(); + this.opts = 0; + this.buf = runtime.getNil(); + } + + + public IRubyObject inline() { + %% write init; + %% write exec; + + return block; + } + + public static IRubyObject inline2(IRubyObject self, IRubyObject str, IRubyObject refs) { + ByteList bl = str.convertToString().getByteList(); + return new RedclothInline(self, bl.bytes, bl.begin, bl.realSize, refs).inline(); + } +} diff --git a/ext/redcloth_scan/redcloth_scan.java.rl b/ext/redcloth_scan/redcloth_scan.java.rl index 98065fec..2bbee07a 100644 --- a/ext/redcloth_scan/redcloth_scan.java.rl +++ b/ext/redcloth_scan/redcloth_scan.java.rl @@ -1,326 +1,842 @@ - +/* + * redcloth_scan.java.rl + * + * Copyright (C) 2008 Jason Garber + */ import java.io.IOException; import org.jruby.Ruby; +import org.jruby.RubyArray; import org.jruby.RubyClass; import org.jruby.RubyHash; import org.jruby.RubyModule; import org.jruby.RubyNumeric; -import org.jruby.RubyObjectAdapter; +import org.jruby.RubyObject; import org.jruby.RubyString; -import org.jruby.javasupport.JavaEmbedUtils; +import org.jruby.RubySymbol; +import org.jruby.anno.JRubyMethod; +import org.jruby.runtime.Arity; import org.jruby.runtime.Block; import org.jruby.runtime.CallbackFactory; import org.jruby.runtime.builtin.IRubyObject; import org.jruby.exceptions.RaiseException; import org.jruby.runtime.load.BasicLibraryService; +import org.jruby.util.ByteList; + +public class RedclothScanService implements BasicLibraryService { + + public static class Base { + public void LIST_ITEM() { + int aint = 0; + IRubyObject aval = ((RubyArray)list_index).entry(nest-1); + if(!aval.isNil()) { aint = RubyNumeric.fix2int(aval); } + if(list_type.equals("ol")) { + ((RubyArray)list_index).store(nest-1, runtime.newFixnum(aint + 1)); + } + + if(nest > ((RubyArray)list_layout).getLength()) { + listm = list_type + "_open"; + if(list_continue == 1) { + list_continue = 0; + ((RubyHash)regs).aset(runtime.newSymbol("start"), ((RubyArray)list_index).entry(nest-1)); + } else { + IRubyObject start = ((RubyHash)regs).aref(runtime.newSymbol("start")); + if(start.isNil()) { + ((RubyArray)list_index).store(nest-1, runtime.newFixnum(1)); + } else { + IRubyObject start_num = start.callMethod(runtime.getCurrentContext(), "to_i"); + ((RubyArray)list_index).store(nest-1, start_num); + } + } + ((RubyHash)regs).aset(runtime.newSymbol("nest"), runtime.newFixnum(nest)); + ((RubyString)html).append(self.callMethod(runtime.getCurrentContext(), listm, regs)); + ((RubyArray)list_layout).store(nest-1, runtime.newString(list_type)); + regs = RubyHash.newHash(runtime); + ASET("first", "true"); + } + LIST_CLOSE(); + ((RubyHash)regs).aset(runtime.newSymbol("nest"), ((RubyArray)list_layout).length()); + ASET("type", "li_open"); + } + + public void LIST_CLOSE() { + while(nest < ((RubyArray)list_layout).getLength()) { + ((RubyHash)regs).aset(runtime.newSymbol("nest"), ((RubyArray)list_layout).length()); + IRubyObject end_list = ((RubyArray)list_layout).pop(runtime.getCurrentContext()); + if(!end_list.isNil()) { + String s = end_list.convertToString().toString(); + listm = s + "_close"; + ((RubyString)html).append(self.callMethod(runtime.getCurrentContext(), listm, regs)); + } + } + } + + public void TRANSFORM(String T) { + if(p > reg && reg >= ts) { + IRubyObject str = RedclothScanService.transform(self, data, reg, p-reg, refs); + ((RubyHash)regs).aset(runtime.newSymbol(T), str); + } else { + ((RubyHash)regs).aset(runtime.newSymbol(T), runtime.getNil()); + } + } + + public IRubyObject red_pass(IRubyObject self, IRubyObject regs, IRubyObject ref, String meth, IRubyObject refs) { + IRubyObject txt = ((RubyHash)regs).aref(ref); + if(!txt.isNil()) { + ((RubyHash)regs).aset(ref, inline2(self, txt, refs)); + } + return self.callMethod(self.getRuntime().getCurrentContext(), meth, regs); + } -public class RedClothScanService implements BasicLibraryService { - private static RubyObjectAdapter rubyApi; + + public void PASS(IRubyObject H, String A, String T) { + ((RubyString)H).append(red_pass(self, regs, runtime.newSymbol(A), T, refs)); + } + + public void STORE_URL(String T) { + if(p > reg && reg >= ts) { + boolean punct = true; + while(p > reg && punct) { + switch(data[p - 1]) { + case '!': case '"': case '#': case '$': case '%': case ']': case '[': case '&': case '\'': + case '*': case '+': case ',': case '-': case '.': case ')': case '(': case ':': + case ';': case '=': case '?': case '@': case '\\': case '^': case '_': + case '`': case '|': case '~': p--; break; + default: punct = false; + } + } + te = p; + } -/***** This all came from Hpricot ******* + STORE(T); - public void ELE(IRubyObject N) { - if (te > ts || text) { - IRubyObject raw_string = runtime.getNil(); - ele_open = false; text = false; - if (ts != -1 && N != cdata && N != sym_text && N != procins && N != comment) { - raw_string = runtime.newString(new String(buf,ts,te-ts)); - } - rb_yield_tokens(N, tag[0], attr, raw_string, taint); - } - } + if(!refs.isNil() && refs.callMethod(runtime.getCurrentContext(), "has_key?", ((RubyHash)regs).aref(runtime.newSymbol(T))).isTrue()) { + ((RubyHash)regs).aset(runtime.newSymbol(T), ((RubyHash)refs).aref(((RubyHash)regs).aref(runtime.newSymbol(T)))); + } + } - public void SET(IRubyObject[] N, int E) { - int mark = 0; - if(N == tag) { - if(mark_tag == -1 || E == mark_tag) { - tag[0] = runtime.newString(""); - } else if(E > mark_tag) { - tag[0] = runtime.newString(new String(buf,mark_tag, E-mark_tag)); - } - } else if(N == akey) { - if(mark_akey == -1 || E == mark_akey) { - akey[0] = runtime.newString(""); - } else if(E > mark_akey) { - akey[0] = runtime.newString(new String(buf,mark_akey, E-mark_akey)); - } - } else if(N == aval) { - if(mark_aval == -1 || E == mark_aval) { - aval[0] = runtime.newString(""); - } else if(E > mark_aval) { - aval[0] = runtime.newString(new String(buf,mark_aval, E-mark_aval)); - } - } - } + public void red_inc(IRubyObject regs, IRubyObject ref) { + int aint = 0; + IRubyObject aval = ((RubyHash)regs).aref(ref); + if(!aval.isNil()) { + aint = RubyNumeric.fix2int(aval); + } + ((RubyHash)regs).aset(ref, regs.getRuntime().newFixnum(aint+1)); + } - public void CAT(IRubyObject[] N, int E) { - if(N[0].isNil()) { - SET(N,E); - } else { - int mark = 0; - if(N == tag) { - mark = mark_tag; - } else if(N == akey) { - mark = mark_akey; - } else if(N == aval) { - mark = mark_aval; - } - ((RubyString)(N[0])).append(runtime.newString(new String(buf, mark, E-mark))); - } - } + public IRubyObject red_blockcode(IRubyObject self, IRubyObject regs, IRubyObject block) { + Ruby runtime = self.getRuntime(); + IRubyObject btype = ((RubyHash)regs).aref(runtime.newSymbol("type")); + block = block.callMethod(runtime.getCurrentContext(), "strip"); + if(((RubyString)block).getByteList().realSize > 0) { + ((RubyHash)regs).aset(runtime.newSymbol("text"), block); + block = self.callMethod(runtime.getCurrentContext(), btype.asJavaString(), regs); + } + return block; + } - public void SLIDE(Object N) { - int mark = 0; - if(N == tag) { - mark = mark_tag; - } else if(N == akey) { - mark = mark_akey; - } else if(N == aval) { - mark = mark_aval; - } - if(mark > ts) { - if(N == tag) { - mark_tag -= ts; - } else if(N == akey) { - mark_akey -= ts; - } else if(N == aval) { - mark_aval -= ts; - } - } - } + public IRubyObject red_block(IRubyObject self, IRubyObject regs, IRubyObject block, IRubyObject refs) { + Ruby runtime = self.getRuntime(); + RubySymbol method; + IRubyObject sym_text = runtime.newSymbol("text"); + IRubyObject btype = ((RubyHash)regs).aref(runtime.newSymbol("type")); + block = block.callMethod(runtime.getCurrentContext(), "strip"); - public void ATTR(IRubyObject K, IRubyObject V) { - if(!K.isNil()) { - if(attr.isNil()) { - attr = RubyHash.newHash(runtime); - } - ((RubyHash)attr).op_aset(runtime.getCurrentContext(),K,V); - // ((RubyHash)attr).aset(K,V); - } - } + if(!block.isNil() && !btype.isNil()) { + method = btype.convertToString().intern(); - public void ATTR(IRubyObject[] K, IRubyObject V) { - ATTR(K[0],V); - } + if(method == runtime.newSymbol("notextile")) { + ((RubyHash)regs).aset(sym_text, block); + } else { + ((RubyHash)regs).aset(sym_text, inline2(self, block, refs)); + } - public void ATTR(IRubyObject K, IRubyObject[] V) { - ATTR(K,V[0]); - } + if(self.respondsTo(method.asJavaString())) { + block = self.callMethod(runtime.getCurrentContext(), method.asJavaString(), regs); + } else { + IRubyObject fallback = ((RubyHash)regs).aref(runtime.newSymbol("fallback")); + if(!fallback.isNil()) { + ((RubyString)fallback).append(((RubyHash)regs).aref(sym_text)); + regs = RubyHash.newHash(runtime); + ((RubyHash)regs).aset(sym_text, fallback); + } + block = self.callMethod(runtime.getCurrentContext(), "p", regs); + } + } - public void ATTR(IRubyObject[] K, IRubyObject[] V) { - ATTR(K[0],V[0]); - } + return block; + } - public void TEXT_PASS() { - if(!text) { - if(ele_open) { - ele_open = false; - if(ts > -1) { - mark_tag = ts; - } - } else { - mark_tag = p; - } - attr = runtime.getNil(); - tag[0] = runtime.getNil(); - text = true; - } - } + public void strCatEscaped(IRubyObject self, IRubyObject str, byte[] data, int ts, int te) { + IRubyObject sourceStr = RubyString.newString(self.getRuntime(), data, ts, te-ts); + IRubyObject escapedStr = self.callMethod(self.getRuntime().getCurrentContext(), "escape", sourceStr); + ((RubyString)str).concat(escapedStr); + } - public void EBLK(IRubyObject N, int T) { - CAT(tag, p - T + 1); - ELE(N); - } -*/ + public void strCatEscapedForPreformatted(IRubyObject self, IRubyObject str, byte[] data, int ts, int te) { + IRubyObject sourceStr = RubyString.newString(self.getRuntime(), data, ts, te-ts); + IRubyObject escapedStr = self.callMethod(self.getRuntime().getCurrentContext(), "escape_pre", sourceStr); + ((RubyString)str).concat(escapedStr); + } - public void rb_raise(RubyClass error, String message) { - throw new RaiseException(runtime, error, message, true); - } + public void CLEAR(IRubyObject obj) { + if(block == obj) { + block = RubyString.newEmptyString(runtime); + } else if(html == obj) { + html = RubyString.newEmptyString(runtime); + } else if(table == obj) { + table = RubyString.newEmptyString(runtime); + } + } - public IRubyObject rb_str_new2(String s) { - return runtime.newString(s); - } + public void ADD_BLOCK() { + ((RubyString)html).append(red_block(self, regs, block, refs)); + extend = runtime.getNil(); + CLEAR(block); + CLEAR_REGS(); + } + + public void CLEAR_REGS() { + regs = RubyHash.newHash(runtime); + } + + public void CAT(IRubyObject H) { + ((RubyString)H).cat(data, ts, te-ts); + } + + public void INLINE(IRubyObject H, String T) { + ((RubyString)H).append(self.callMethod(runtime.getCurrentContext(), T, regs)); + } + public void DONE(IRubyObject H) { + ((RubyString)html).append(H); + CLEAR(H); + CLEAR_REGS(); + } + + public void ADD_EXTENDED_BLOCK() { + ((RubyString)html).append(red_block(self, regs, block, refs)); + CLEAR(block); + } + + public void ADD_BLOCKCODE() { + ((RubyString)html).append(red_blockcode(self, regs, block)); + CLEAR(block); + CLEAR_REGS(); + } + + public void ADD_EXTENDED_BLOCKCODE() { + ((RubyString)html).append(red_blockcode(self, regs, block)); + CLEAR(block); + } + + public void AINC(String T) { + red_inc(regs, runtime.newSymbol(T)); + } + + public void END_EXTENDED() { + extend = runtime.getNil(); + CLEAR_REGS(); + } + + public void ASET(String T, String V) { + ((RubyHash)regs).aset(runtime.newSymbol(T), runtime.newString(V)); + } + + public void STORE(String T) { + if(p > reg && reg >= ts) { + + IRubyObject str = RubyString.newString(runtime, data, reg, p-reg); + ((RubyHash)regs).aset(runtime.newSymbol(T), str); + } else { + ((RubyHash)regs).aset(runtime.newSymbol(T), runtime.getNil()); + } + } + + public void STORE_B(String T) { + if(p > bck && bck >= ts) { + IRubyObject str = RubyString.newString(runtime, data, bck, p-bck); + ((RubyHash)regs).aset(runtime.newSymbol(T), str); + } else { + ((RubyHash)regs).aset(runtime.newSymbol(T), runtime.getNil()); + } + } + + public IRubyObject self; + public byte[] data; + public int p, pe; + public IRubyObject refs; + + public Ruby runtime; + public int orig_p, orig_pe; + public int cs, act, nest; + public int ts = -1; + public int te = -1; + public int reg = -1; + public int bck = -1; + public int eof = -1; + + public IRubyObject html; + public IRubyObject table; + public IRubyObject block; + public IRubyObject regs; + + public IRubyObject list_layout; + public String list_type = null; + public IRubyObject list_index; + public int list_continue = 0; + public IRubyObject plain_block; + public IRubyObject extend; + public String listm = ""; + public IRubyObject refs_found; + } + + private static class Transformer extends Base { %%{ + machine redcloth_scan; - include redcloth_scan "redcloth_scan.rl"; + include redcloth_common "redcloth_common.java.rl"; + + action extend { extend = ((RubyHash)regs).aref(runtime.newSymbol("type")); } + + # blocks + notextile_tag_start = "" ; + notextile_tag_end = "" LF? ; + noparagraph_line_start = " "+ ; + notextile_block_start = ( "notextile" >A %{ STORE("type"); } A C :> "." ( "." %extend | "" ) " "+ ) ; + pre_tag_start = "]* ">" (space* "")? ; + pre_tag_end = ("" space*)? "" LF? ; + pre_block_start = ( "pre" >A %{ STORE("type"); } A C :> "." ( "." %extend | "" ) " "+ ) ; + bc_start = ( "bc" >A %{ STORE("type"); } A C :> "." ( "." %extend | "" ) " "+ ) ; + bq_start = ( "bq" >A %{ STORE("type"); } A C :> "." ( "." %extend | "" ) ( ":" %A uri %{ STORE("cite"); } )? " "+ ) ; + non_ac_btype = ( "bq" | "bc" | "pre" | "notextile" ); + btype = (alpha alnum*) -- (non_ac_btype | "fn" digit+); + block_start = ( btype >A %{ STORE("type"); } A C :> "." ( "." %extend | "" ) " "+ ) >B %{ STORE_B("fallback"); }; + all_btypes = btype | non_ac_btype; + next_block_start = ( all_btypes A_noactions C_noactions :> "."+ " " ) >A @{ p = reg - 1; } ; + double_return = LF{2,} ; + block_end = ( double_return | EOF ); + ftype = ( "fn" >A %{ STORE("type"); } digit+ >A %{ STORE("id"); } ) ; + footnote_start = ( ftype A C :> dotspace ) ; + ul = "*" %{nest++; list_type = "ul";}; + ol = "#" %{nest++; list_type = "ol";}; + ul_start = ( ul | ol )* ul A C :> " "+ ; + ol_start = ( ul | ol )* ol N A C :> " "+ ; + list_start = ( ul_start | ol_start ) >{nest = 0;} ; + dt_start = "-" . " "+ ; + dd_start = ":=" ; + long_dd = dd_start " "* LF %{ ADD_BLOCK(); ASET("type", "dd"); } any+ >A %{ TRANSFORM("text"); } :>> "=:" ; + dl_start = (dt_start mtext (LF dt_start mtext)* " "* dd_start) ; + blank_line = LF; + link_alias = ( "[" >{ ASET("type", "ignore"); } %A chars %T "]" %A uri %{ STORE_URL("href"); } ) ; + + # image lookahead + IMG_A_LEFT = "<" %{ ASET("float", "left"); } ; + IMG_A_RIGHT = ">" %{ ASET("float", "right"); } ; + aligned_image = ( "["? "!" (IMG_A_LEFT | IMG_A_RIGHT) ) >A @{ p = reg - 1; } ; + + # html blocks + BlockTagName = Name - ("pre" | "notextile" | "a" | "applet" | "basefont" | "bdo" | "br" | "font" | "iframe" | "img" | "map" | "object" | "param" | "q" | "script" | "span" | "sub" | "sup" | "abbr" | "acronym" | "cite" | "code" | "del" | "dfn" | "em" | "ins" | "kbd" | "samp" | "strong" | "var" | "b" | "big" | "i" | "s" | "small" | "strike" | "tt" | "u"); + block_start_tag = "<" BlockTagName space+ AttrSet* (AttrEnd)? ">" | "<" BlockTagName ">"; + block_empty_tag = "<" BlockTagName space+ AttrSet* (AttrEnd)? "/>" | "<" BlockTagName "/>" ; + block_end_tag = "" ; + html_start = indent >B %{STORE_B("indent_before_start");} block_start_tag >B %{STORE_B("start_tag");} indent >B %{STORE_B("indent_after_start");} ; + html_end = indent >B %{STORE_B("indent_before_end");} block_end_tag >B %{STORE_B("end_tag");} (indent LF?) >B %{STORE_B("indent_after_end");} ; + standalone_html = indent (block_start_tag | block_empty_tag | block_end_tag) indent LF+; + html_end_terminating_block = ( LF indent block_end_tag ) >A @{ p = reg - 1; } ; + + # tables + para = ( default+ ) -- LF ; + btext = para ( LF{2} )? ; + tddef = ( D? S A C :> dotspace ) ; + td = ( tddef? btext >A %T :> "|" >{PASS(table, "text", "td");} ) >X ; + trdef = ( A C :> dotspace ) ; + tr = ( trdef? "|" %{INLINE(table, "tr_open");} td+ ) >X %{INLINE(table, "tr_close");} ; + trows = ( tr (LF >X tr)* ) ; + tdef = ( "table" >X A C :> dotspace LF ) ; + table = ( tdef? trows >{table = RubyString.newEmptyString(runtime); INLINE(table, "table_open");} ) >{ reg = -1; } ; + + # info + redcloth_version = ("RedCloth" >A ("::" | " " ) "VERSION"i ":"? " ")? %{STORE("prefix");} "RedCloth::VERSION" (LF* EOF | double_return) ; + + pre_tag := |* + pre_tag_end { CAT(block); DONE(block); fgoto main; }; + default => esc_pre; + *|; + + pre_block := |* + EOF { + ADD_BLOCKCODE(); + fgoto main; + }; + double_return { + if (extend.isNil()) { + ADD_BLOCKCODE(); + fgoto main; + } else { + ADD_EXTENDED_BLOCKCODE(); + } + }; + double_return next_block_start { + if (extend.isNil()) { + ADD_BLOCKCODE(); + fgoto main; + } else { + ADD_EXTENDED_BLOCKCODE(); + END_EXTENDED(); + fgoto main; + } + }; + default => esc_pre; + *|; + + script_tag := |* + script_tag_end { CAT(block); ASET("type", "ignore"); ADD_BLOCK(); fgoto main; }; + EOF { ASET("type", "ignore"); ADD_BLOCK(); fgoto main; }; + default => cat; + *|; + + noparagraph_line := |* + LF { ADD_BLOCK(); fgoto main; }; + default => cat; + *|; + + notextile_tag := |* + notextile_tag_end { ADD_BLOCK(); fgoto main; }; + default => cat; + *|; + + notextile_block := |* + EOF { + ADD_BLOCK(); + fgoto main; + }; + double_return { + if (extend.isNil()) { + ADD_BLOCK(); + CAT(html); + fgoto main; + } else { + CAT(block); + ADD_EXTENDED_BLOCK(); + CAT(html); + } + }; + double_return next_block_start { + if (extend.isNil()) { + ADD_BLOCK(); + CAT(html); + fgoto main; + } else { + CAT(block); + ADD_EXTENDED_BLOCK(); + END_EXTENDED(); + fgoto main; + } + }; + default => cat; + *|; + + html := |* + html_end { ADD_BLOCK(); fgoto main; }; + default => cat; + *|; + + bc := |* + EOF { + ADD_BLOCKCODE(); + INLINE(html, "bc_close"); + plain_block = runtime.newString("p"); + fgoto main; + }; + double_return { + if (extend.isNil()) { + ADD_BLOCKCODE(); + INLINE(html, "bc_close"); + plain_block = runtime.newString("p"); + fgoto main; + } else { + ADD_EXTENDED_BLOCKCODE(); + CAT(html); + } + }; + double_return next_block_start { + if (extend.isNil()) { + ADD_BLOCKCODE(); + INLINE(html, "bc_close"); + plain_block = runtime.newString("p"); + fgoto main; + } else { + ADD_EXTENDED_BLOCKCODE(); + CAT(html); + INLINE(html, "bc_close"); + plain_block = runtime.newString("p"); + END_EXTENDED(); + fgoto main; + } + }; + default => esc_pre; + *|; + + bq := |* + EOF { + ADD_BLOCK(); + INLINE(html, "bq_close"); + fgoto main; + }; + double_return { + if (extend.isNil()) { + ADD_BLOCK(); + INLINE(html, "bq_close"); + fgoto main; + } else { + ADD_EXTENDED_BLOCK(); + } + }; + double_return next_block_start { + if (extend.isNil()) { + ADD_BLOCK(); + INLINE(html, "bq_close"); + fgoto main; + } else { + ADD_EXTENDED_BLOCK(); + INLINE(html, "bq_close"); + END_EXTENDED(); + fgoto main; + } + }; + html_end_terminating_block { + if (extend.isNil()) { + ADD_BLOCK(); + INLINE(html, "bq_close"); + fgoto main; + } else { + ADD_EXTENDED_BLOCK(); + INLINE(html, "bq_close"); + END_EXTENDED(); + fgoto main; + } + }; + default => cat; + *|; + + block := |* + EOF { + ADD_BLOCK(); + fgoto main; + }; + double_return { + if (extend.isNil()) { + ADD_BLOCK(); + fgoto main; + } else { + ADD_EXTENDED_BLOCK(); + } + }; + double_return next_block_start { + if (extend.isNil()) { + ADD_BLOCK(); + fgoto main; + } else { + ADD_EXTENDED_BLOCK(); + END_EXTENDED(); + fgoto main; + } + }; + html_end_terminating_block { + if (extend.isNil()) { + ADD_BLOCK(); + fgoto main; + } else { + ADD_EXTENDED_BLOCK(); + END_EXTENDED(); + fgoto main; + } + }; + LF list_start { + ADD_BLOCK(); + list_layout = runtime.newArray(); + LIST_ITEM(); + fgoto list; + }; + + default => cat; + *|; + + footnote := |* + block_end { ADD_BLOCK(); fgoto main; }; + default => cat; + *|; + + list := |* + LF list_start { ADD_BLOCK(); LIST_ITEM(); }; + block_end { ADD_BLOCK(); nest = 0; LIST_CLOSE(); fgoto main; }; + default => cat; + *|; + + dl := |* + LF dt_start { ADD_BLOCK(); ASET("type", "dt"); }; + dd_start { ADD_BLOCK(); ASET("type", "dd"); }; + long_dd { INLINE(html, "dd"); CLEAR_REGS(); }; + block_end { ADD_BLOCK(); INLINE(html, "dl_close"); fgoto main; }; + default => cat; + *|; + + main := |* + noparagraph_line_start { ASET("type", "ignored_line"); fgoto noparagraph_line; }; + notextile_tag_start { ASET("type", "notextile"); fgoto notextile_tag; }; + notextile_block_start { ASET("type", "notextile"); fgoto notextile_block; }; + script_tag_start { CAT(block); fgoto script_tag; }; + pre_tag_start { ASET("type", "notextile"); CAT(block); fgoto pre_tag; }; + pre_block_start { fgoto pre_block; }; + standalone_html { ASET("type", "html"); CAT(block); ADD_BLOCK(); }; + html_start { ASET("type", "html_block"); fgoto html; }; + bc_start { INLINE(html, "bc_open"); ASET("type", "code"); plain_block = runtime.newString("code"); fgoto bc; }; + bq_start { INLINE(html, "bq_open"); ASET("type", "p"); fgoto bq; }; + block_start { fgoto block; }; + footnote_start { fgoto footnote; }; + list_start { list_layout = runtime.newArray(); LIST_ITEM(); fgoto list; }; + dl_start { p = ts; INLINE(html, "dl_open"); ASET("type", "dt"); fgoto dl; }; + table { INLINE(table, "table_close"); DONE(table); fgoto block; }; + link_alias { ((RubyHash)refs_found).aset(((RubyHash)regs).aref(runtime.newSymbol("text")), ((RubyHash)regs).aref(runtime.newSymbol("href"))); DONE(block); }; + aligned_image { ((RubyHash)regs).aset(runtime.newSymbol("type"), plain_block); fgoto block; }; + redcloth_version { INLINE(html, "redcloth_version"); }; + blank_line => cat; + default + { + CLEAR_REGS(); + ((RubyHash)regs).aset(runtime.newSymbol("type"), plain_block); + CAT(block); + fgoto block; + }; + EOF; + *|; }%% %% write data nofinal; -/***** This all came from Hpricot. ***** -public final static int BUFSIZE=16384; + public Transformer(IRubyObject self, byte[] data, int p, int pe, IRubyObject refs) { + if(p+pe > data.length) { + throw new RuntimeException("BLAHAHA"); + } + this.self = self; + + // This is GROSS but necessary for EOF matching + this.data = new byte[pe+1]; + System.arraycopy(data, p, this.data, 0, pe); + this.data[pe] = 0; + + this.p = 0; + this.pe = pe+1; + this.eof = this.pe; + this.orig_p = 0; + this.orig_pe = this.pe; + + this.refs = refs; + + runtime = self.getRuntime(); + + html = RubyString.newEmptyString(runtime); + table = RubyString.newEmptyString(runtime); + block = RubyString.newEmptyString(runtime); + CLEAR_REGS(); + + list_layout = runtime.getNil(); + list_index = runtime.newArray(); + plain_block = runtime.newString("p"); + extend = runtime.getNil(); + refs_found = RubyHash.newHash(runtime); + } + + public IRubyObject transform() { + %% write init; -private void rb_yield_tokens(IRubyObject sym, IRubyObject tag, IRubyObject attr, IRubyObject raw, boolean taint) { - IRubyObject ary; - if (sym == runtime.newSymbol("text")) { - raw = tag; + %% write exec; + + if(((RubyString)block).getByteList().realSize > 0) { + ADD_BLOCK(); + } + + if(refs.isNil() && !refs_found.callMethod(runtime.getCurrentContext(), "empty?").isTrue()) { + return RedclothScanService.transform(self, data, orig_p, orig_pe, refs_found); + } else { + self.callMethod(self.getRuntime().getCurrentContext(), "after_transform", html); + return html; + } + } } - ary = runtime.newArray(new IRubyObject[]{sym, tag, attr, raw}); - if (taint) { - ary.setTaint(true); - tag.setTaint(true); - attr.setTaint(true); - raw.setTaint(true); + + public static IRubyObject transform(IRubyObject self, byte[] data, int p, int pe, IRubyObject refs) { + return new Transformer(self, data, p, pe, refs).transform(); + } + + public static IRubyObject inline2(IRubyObject workingCopy, IRubyObject self, IRubyObject refs) { + return RedclothInline.inline2(workingCopy, self, refs); + } + + public static IRubyObject transform2(IRubyObject self, IRubyObject str) { + RubyString ss = str.convertToString(); + ss.cat((byte)'\n'); + self.callMethod(self.getRuntime().getCurrentContext(), "before_transform", ss); + return transform(self, ss.getByteList().bytes(), ss.getByteList().begin, ss.getByteList().realSize, self.getRuntime().getNil()); } - block.yield(runtime.getCurrentContext(), ary, null, null, false); -} + @JRubyMethod + public static IRubyObject to(IRubyObject self, IRubyObject formatter) { + Ruby runtime = self.getRuntime(); + self.callMethod(runtime.getCurrentContext(), "delete!", runtime.newString("\r")); + IRubyObject workingCopy = self.rbClone(); -int cs, act, have = 0, nread = 0, curline = 1, p=-1; -boolean text = false; -int ts=-1, te; -int eof=-1; -char[] buf; -Ruby runtime; -IRubyObject attr, bufsize; -IRubyObject[] tag, akey, aval; -int mark_tag, mark_akey, mark_aval; -boolean done = false, ele_open = false; -int buffer_size = 0; -boolean taint = false; -Block block = null; - - -IRubyObject xmldecl, doctype, procins, stag, etag, emptytag, comment, - cdata, sym_text; - -IRubyObject redcloth_scan(IRubyObject recv, IRubyObject port) { - attr = bufsize = runtime.getNil(); - tag = new IRubyObject[]{runtime.getNil()}; - akey = new IRubyObject[]{runtime.getNil()}; - aval = new IRubyObject[]{runtime.getNil()}; - - RubyClass rb_eRedClothParseError = runtime.getModule("RedCloth").getClass("ParseError"); - - taint = port.isTaint(); - if ( !port.respondsTo("read")) { - if ( port.respondsTo("to_str")) { - port = port.callMethod(runtime.getCurrentContext(),"to_str"); + ((RubyObject)workingCopy).extend(new IRubyObject[]{formatter}); + + if(workingCopy.callMethod(runtime.getCurrentContext(), "lite_mode").isTrue()) { + return inline2(workingCopy, self, RubyHash.newHash(runtime)); } else { - throw runtime.newArgumentError("bad RedCloth argument, String or IO only please."); + return transform2(workingCopy, self); } } - buffer_size = BUFSIZE; - if (rubyApi.getInstanceVariable(recv, "@buffer_size") != null) { - bufsize = rubyApi.getInstanceVariable(recv, "@buffer_size"); - if (!bufsize.isNil()) { - buffer_size = RubyNumeric.fix2int(bufsize); + @JRubyMethod(rest=true) + public static IRubyObject html_esc(IRubyObject self, IRubyObject[] args) { + Ruby runtime = self.getRuntime(); + IRubyObject str = runtime.getNil(), + level = runtime.getNil(); + if(Arity.checkArgumentCount(runtime, args, 1, 2) == 2) { + level = args[1]; } - } - buf = new char[buffer_size]; + str = args[0]; - %% write init; + IRubyObject new_str = RubyString.newEmptyString(runtime); + if(str.isNil()) { + return new_str; + } - while( !done ) { - IRubyObject str; - p = have; - int pe; - int len, space = buffer_size - have; + ByteList bl = str.convertToString().getByteList(); - if ( space == 0 ) { - rb_raise(rb_eRedClothParseError, "ran out of buffer space on element <" + tag.toString() + ">, starting on line "+curline+"."); + if(bl.realSize == 0) { + return new_str; } - if (port.respondsTo("read")) { - str = port.callMethod(runtime.getCurrentContext(),"read",runtime.newFixnum(space)); - } else { - str = ((RubyString)port).substr(nread,space); + byte[] bytes = bl.bytes; + int ts = bl.begin; + int te = ts + bl.realSize; + int t = ts, t2 = ts; + String ch = null; + + if(te <= ts) { + return new_str; } + + while(t2 < te) { + ch = null; + // normal + pre + switch(bytes[t2]) { + case '&': ch = "amp"; break; + case '>': ch = "gt"; break; + case '<': ch = "lt"; break; + } - str = str.convertToString(); - String sss = str.toString(); - char[] chars = sss.toCharArray(); - System.arraycopy(chars,0,buf,p,chars.length); + // normal (non-pre) + if(level != runtime.newSymbol("html_escape_preformatted")) { + switch(bytes[t2]) { + case '\n': ch = "br"; break; + case '"' : ch = "quot"; break; + case '\'': + ch = (level == runtime.newSymbol("html_escape_attributes")) ? "apos" : "squot"; + break; + } + } - len = sss.length(); - nread += len; + if(ch != null) { + if(t2 > t) { + ((RubyString)new_str).cat(bytes, t, t2-t); + } + ((RubyString)new_str).concat(self.callMethod(runtime.getCurrentContext(), ch, RubyHash.newHash(runtime))); + t = t2 + 1; + } - if ( len < space ) { - len++; - done = true; + t2++; } - pe = p + len; - char[] data = buf; - %% write exec; + if(t2 > t) { + ((RubyString)new_str).cat(bytes, t, t2-t); + } + + return new_str; + } + + @JRubyMethod + public static IRubyObject latex_esc(IRubyObject self, IRubyObject str) { + Ruby runtime = self.getRuntime(); + IRubyObject new_str = RubyString.newEmptyString(runtime); - if ( cs == redcloth_scan_error ) { - if(!tag[0].isNil()) { - rb_raise(rb_eRedClothParseError, "parse error on element <"+tag.toString()+">, starting on line "+curline+".\n"); - } else { - rb_raise(rb_eRedClothParseError, "parse error on line "+curline+".\n"); - } + if(str.isNil()) { + return new_str; } + + ByteList bl = str.convertToString().getByteList(); - if ( done && ele_open ) { - ele_open = false; - if(ts > -1) { - mark_tag = ts; - ts = -1; - text = true; - } + if(bl.realSize == 0) { + return new_str; } + + byte[] bytes = bl.bytes; + int ts = bl.begin; + int te = ts + bl.realSize; + int t = ts; + int t2 = ts; + String ch = null; + + while(t2 < te) { + ch = null; + + switch(bytes[t2]) { + case '{': ch = "#123"; break; + case '}': ch = "#125"; break; + case '\\': ch = "#92"; break; + case '#': ch = "#35"; break; + case '$': ch = "#36"; break; + case '%': ch = "#37"; break; + case '&': ch = "amp"; break; + case '_': ch = "#95"; break; + case '^': ch = "circ"; break; + case '~': ch = "tilde"; break; + case '<': ch = "lt"; break; + case '>': ch = "gt"; break; + case '\n': ch = "#10"; break; + } - if(ts == -1) { - have = 0; - if(mark_tag != -1 && text) { - if (done) { - if(mark_tag < p-1) { - CAT(tag, p-1); - ELE(sym_text); - } - } else { - CAT(tag, p); + if(ch != null) { + if(t2 > t) { + ((RubyString)new_str).cat(bytes, t, t2-t); } + IRubyObject opts = RubyHash.newHash(runtime); + ((RubyHash)opts).aset(runtime.newSymbol("text"), runtime.newString(ch)); + ((RubyString)new_str).concat(self.callMethod(runtime.getCurrentContext(), "entity", opts)); + t = t2 + 1; } - mark_tag = 0; - } else { - have = pe - ts; - System.arraycopy(buf,ts,buf,0,have); - SLIDE(tag); - SLIDE(akey); - SLIDE(aval); - te = (te - ts); - ts = 0; + + t2++; } - } - return runtime.getNil(); -} -public static IRubyObject __redcloth_scan(IRubyObject recv, IRubyObject port, Block block) { - Ruby runtime = recv.getRuntime(); - RedClothScanService service = new RedClothScanService(); - service.runtime = runtime; - service.xmldecl = runtime.newSymbol("xmldecl"); - service.doctype = runtime.newSymbol("doctype"); - service.procins = runtime.newSymbol("procins"); - service.stag = runtime.newSymbol("stag"); - service.etag = runtime.newSymbol("etag"); - service.emptytag = runtime.newSymbol("emptytag"); - service.comment = runtime.newSymbol("comment"); - service.cdata = runtime.newSymbol("cdata"); - service.sym_text = runtime.newSymbol("text"); - service.block = block; - return service.redcloth_scan(recv, port); -} + if(t2 > t) { + ((RubyString)new_str).cat(bytes, t, t2-t); + } + return new_str; + } -public boolean basicLoad(final Ruby runtime) throws IOException { - Init_redcloth_scan(runtime); - return true; -} + public boolean basicLoad(final Ruby runtime) throws IOException { + Init_redcloth_scan(runtime); + return true; + } -public static void Init_redcloth_scan(Ruby runtime) { - RubyModule mRedCloth = runtime.defineModule("RedCloth"); - mRedCloth.getMetaClass().attr_accessor(runtime.getCurrentContext(),new IRubyObject[]{runtime.newSymbol("buffer_size")}); - CallbackFactory fact = runtime.callbackFactory(RedClothScanService.class); - mRedCloth.getMetaClass().defineMethod("scan",fact.getSingletonMethod("__redcloth_scan",IRubyObject.class)); - mRedCloth.defineClassUnder("ParseError",runtime.getClass("Exception"),runtime.getClass("Exception").getAllocator()); - rubyApi = JavaEmbedUtils.newObjectAdapter(); -}*/ + public static void Init_redcloth_scan(Ruby runtime) { + RubyModule mRedCloth = runtime.defineModule("RedCloth"); + RubyClass super_RedCloth = mRedCloth.defineClassUnder("TextileDoc", runtime.getString(), runtime.getString().getAllocator()); + super_RedCloth.defineAnnotatedMethods(RedclothScanService.class); + super_RedCloth.defineClassUnder("ParseError",runtime.getClass("Exception"),runtime.getClass("Exception").getAllocator()); + } } diff --git a/test/basic.yml b/test/basic.yml index 4fed2085..c0bea111 100644 --- a/test/basic.yml +++ b/test/basic.yml @@ -76,7 +76,7 @@ html: |-
I saw a ship. It ate my elephant.
When the elephant comes to take a p. you…
- --- +--- name: html tags desc: You can certainly use HTML tags inside your Textile documents. HTML will only be escaped if it’s found in a pre or code block. in: |- @@ -868,4 +868,4 @@ in: |- [Papers "blah blah."] html: |-

citation [“(Berk.) Hilton”], see
- [Papers “blah blah.”]

\ No newline at end of file + [Papers “blah blah.”]