From a8f14dbb21da110e2f83564a9e3c7945577a291c Mon Sep 17 00:00:00 2001 From: Brian Quinn Date: Tue, 24 May 2011 12:46:32 +0100 Subject: [PATCH] Improved parser support for erb blocks inside html tags --- lib/deface/parser.rb | 49 +++++++++++++++++++++++++++++++------- spec/deface/parser_spec.rb | 47 ++++++++++++++++++++++++------------ 2 files changed, 72 insertions(+), 24 deletions(-) diff --git a/lib/deface/parser.rb b/lib/deface/parser.rb index b8e2ab5..432fbe1 100644 --- a/lib/deface/parser.rb +++ b/lib/deface/parser.rb @@ -1,5 +1,4 @@ require 'nokogiri' -require 'erubis' require 'cgi' module Deface @@ -7,6 +6,35 @@ class Parser # converts erb to markup # def self.erb_markup!(source) + + #all opening html tags that contain <% %> blocks + source.scan(/<\w+[^<>]+(?:<%.*?%>[^<>]*)+/m).each do |line| + + #regexs to catch <% %> inside attributes id="<% something %>" - with double, single or no quotes + erb_attrs_regexs = [/([\w-]+)(\s?=\s?)(")([^"]*<%.*?%>[^"]*)/m, + /([\w-]+)(\s?=\s?)(')([^']*<%.*?%>[^']*)'/m, + /([\w-]+)(\s?=\s?)()(<%.*?%>)(?:\s|>|\z)/m] + + replace_line = erb_attrs_regexs.inject(line.clone) do |replace_line, regex| + + replace_line = line.scan(regex).inject(replace_line) do |replace_line, match| + replace_line.sub("#{match[0]}#{match[1]}#{match[2]}#{match[3]}#{match[2]}") { |m| m = " data-erb-#{match[0]}=\"#{CGI.escapeHTML(match[3])}\"" } + end + + replace_line + end + + + i = -1 + #catch all <% %> inside tags id

> , not inside attrs + replace_line.scan(/(<%.*?%>)/m).each do |match| + replace_line.sub!(match[0]) { |m| m = " data-erb-#{i += 1}=\"#{CGI.escapeHTML(match[0])}\"" } + end + + source.sub!(line) { |m| m = replace_line } + end + + #replaces all <% %> not inside opening html tags replacements = [ {"<%=" => ""}, {"<%" => ""}, {"%>" => ""} ] @@ -14,7 +42,7 @@ def self.erb_markup!(source) replacements.each{ |h| h.each { |replace, with| source.gsub! replace, with } } source.scan(/()((?:(?!<\/code>)[\s\S])*)(<\/code>)/).each do |match| - source.gsub!("#{match[0]}#{match[1]}#{match[2]}", "#{match[0]}#{CGI.escapeHTML(match[1])}#{match[2]}") + source.sub!("#{match[0]}#{match[1]}#{match[2]}") { |m| m = "#{match[0]}#{CGI.escapeHTML(match[1])}#{match[2]}" } end source @@ -25,18 +53,21 @@ def self.erb_markup!(source) def self.undo_erb_markup!(source) replacements = [ {"" => '<%'}, {"" => '<%='}, - {"" => '%>'}, - {/(<|<)code(\s|%20)erb-silent(>|>)/ => '<%'}, - {/(<|<)code(\s|%20)erb-loud(>|>)/ => '<%='}, - {/(<|<)\/code(>|>)/ => '%>'} ] + {"" => '%>'}] replacements.each{ |h| h.each { |replace, with| source.gsub! replace, with } } + source.scan(/data-erb-(\d+)+=(['"])(.*?)\2/m).each do |match| + source.gsub!("data-erb-#{match[0]}=#{match[1]}#{match[2]}#{match[1]}") { |m| m = CGI.unescapeHTML(match[2]) } + end + + source.scan(/data-erb-([\w-]+)+=(["'])(.*?)\2/m).each do |match| + source.gsub!("data-erb-#{match[0]}=#{match[1]}#{match[2]}#{match[1]}") { |m| "#{match[0]}=#{match[1]}#{CGI.unescapeHTML(match[2])}#{match[1]}" } + end + #un-escape changes from Nokogiri and erb-markup! source.scan(/(<%.*?)((?:(?!%>)[\s\S])*)(%>)/).each do |match| - escaped = URI.unescape match[1] - escaped = CGI.unescapeHTML CGI.unescapeHTML(escaped) - source.gsub!("#{match[0]}#{match[1]}#{match[2]}", "#{match[0]}#{escaped}#{match[2]}") + source.gsub!("#{match[0]}#{match[1]}#{match[2]}") { |m| m = "#{match[0]}#{ CGI.unescapeHTML match[1] }#{match[2]}" } end source diff --git a/spec/deface/parser_spec.rb b/spec/deface/parser_spec.rb index 7c88d68..90130fe 100644 --- a/spec/deface/parser_spec.rb +++ b/spec/deface/parser_spec.rb @@ -37,12 +37,34 @@ module Deface Deface::Parser.convert("<%= method_name %>").to_s.should == " method_name " end - it "should convert nested <% ... %>" do - Deface::Parser.convert("

\">

").to_s.should == "

" + it "should convert first <% ... %> inside html tag" do + Deface::Parser.convert("

>

").to_s.should == "

" end - it "should convert nested <%= ... %> including href attribute" do - Deface::Parser.convert(%(">A Link)).to_s.should == "A Link" + it "should convert second <% ... %> inside html tag" do + Deface::Parser.convert("

<% x = y %>>

").to_s.should == "

" + end + + it "should convert <% ... %> inside double quoted attr value" do + Deface::Parser.convert("

\">

").to_s.should == "

" + end + + it "should convert <% ... %> inside single quoted attr value" do + Deface::Parser.convert("

").to_s.should == "

" + end + + it "should convert <% ... %> inside non-quoted attr value" do + Deface::Parser.convert("

>

").to_s.should == "

" + Deface::Parser.convert("

alt=\"test\">

").to_s.should == "

" + end + + it "should convert multiple <% ... %> inside html tag" do + Deface::Parser.convert(%q{

alt="<% x = 'y' + + \"2\" %>" title='<% method_name %>' <%= other_method %>

}).to_s.should == "

" + end + + it "should convert <%= ... %> including href attribute" do + Deface::Parser.convert(%(">A Link)).to_s.should == "A Link" end it "should escape contents code tags" do @@ -59,21 +81,16 @@ module Deface Deface::Parser.undo_erb_markup!(" method_name ").should == "<%= method_name %>" end - it "should revert nested " do - Deface::Parser.undo_erb_markup!("

1 </code>\">

").should == "

1 %>\">

" - end - - it "should revert nested including href attribute" do - Deface::Parser.undo_erb_markup!("A Link").should == "\">A Link" + it "should revert data-erb-x attrs inside html tag" do + Deface::Parser.undo_erb_markup!("

").should == "

<% x = y %>>

" end - it "should revert nested " do - Deface::Parser.undo_erb_markup!("

").should == "

\">

" + it "should revert data-erb-id attr inside html tag" do + Deface::Parser.undo_erb_markup!("

").should == "

1 %>\">

" end - it "should revert nested including href attribute" do - Deface::Parser.undo_erb_markup!("A Link").should == %(A Link) - Deface::Parser.undo_erb_markup!("A Link").should == %(">A Link) + it "should revert data-erb-href attr inside html tag" do + Deface::Parser.undo_erb_markup!("A Link").should == %(">A Link) end it "should unescape contents of code tags" do