Skip to content

Commit

Permalink
Improved parser support for erb blocks inside html tags
Browse files Browse the repository at this point in the history
  • Loading branch information
BDQ committed May 24, 2011
1 parent 1b271d8 commit a8f14db
Show file tree
Hide file tree
Showing 2 changed files with 72 additions and 24 deletions.
49 changes: 40 additions & 9 deletions lib/deface/parser.rb
@@ -1,20 +1,48 @@
require 'nokogiri'
require 'erubis'
require 'cgi'

module Deface
class Parser
# converts erb to markup
#
def self.erb_markup!(source)

#all opening html tags that contain <% %> blocks
source.scan(/<\w+[^<>]+(?:<%.*?%>[^<>]*)+/m).each do |line|

#regexs to catch <% %> inside attributes id="<% something %>" - with double, single or no quotes
erb_attrs_regexs = [/([\w-]+)(\s?=\s?)(")([^"]*<%.*?%>[^"]*)/m,
/([\w-]+)(\s?=\s?)(')([^']*<%.*?%>[^']*)'/m,
/([\w-]+)(\s?=\s?)()(<%.*?%>)(?:\s|>|\z)/m]

replace_line = erb_attrs_regexs.inject(line.clone) do |replace_line, regex|

replace_line = line.scan(regex).inject(replace_line) do |replace_line, match|
replace_line.sub("#{match[0]}#{match[1]}#{match[2]}#{match[3]}#{match[2]}") { |m| m = " data-erb-#{match[0]}=\"#{CGI.escapeHTML(match[3])}\"" }
end

replace_line
end


i = -1
#catch all <% %> inside tags id <p <%= test %>> , not inside attrs
replace_line.scan(/(<%.*?%>)/m).each do |match|
replace_line.sub!(match[0]) { |m| m = " data-erb-#{i += 1}=\"#{CGI.escapeHTML(match[0])}\"" }
end

source.sub!(line) { |m| m = replace_line }
end

#replaces all <% %> not inside opening html tags
replacements = [ {"<%=" => "<code erb-loud>"},
{"<%" => "<code erb-silent>"},
{"%>" => "</code>"} ]

replacements.each{ |h| h.each { |replace, with| source.gsub! replace, with } }

source.scan(/(<code.*?>)((?:(?!<\/code>)[\s\S])*)(<\/code>)/).each do |match|
source.gsub!("#{match[0]}#{match[1]}#{match[2]}", "#{match[0]}#{CGI.escapeHTML(match[1])}#{match[2]}")
source.sub!("#{match[0]}#{match[1]}#{match[2]}") { |m| m = "#{match[0]}#{CGI.escapeHTML(match[1])}#{match[2]}" }
end

source
Expand All @@ -25,18 +53,21 @@ def self.erb_markup!(source)
def self.undo_erb_markup!(source)
replacements = [ {"<code erb-silent>" => '<%'},
{"<code erb-loud>" => '<%='},
{"</code>" => '%>'},
{/(<|&lt;)code(\s|%20)erb-silent(&gt;|>)/ => '<%'},
{/(<|&lt;)code(\s|%20)erb-loud(&gt;|>)/ => '<%='},
{/(<|&lt;)\/code(&gt;|>)/ => '%>'} ]
{"</code>" => '%>'}]

replacements.each{ |h| h.each { |replace, with| source.gsub! replace, with } }

source.scan(/data-erb-(\d+)+=(['"])(.*?)\2/m).each do |match|
source.gsub!("data-erb-#{match[0]}=#{match[1]}#{match[2]}#{match[1]}") { |m| m = CGI.unescapeHTML(match[2]) }
end

source.scan(/data-erb-([\w-]+)+=(["'])(.*?)\2/m).each do |match|
source.gsub!("data-erb-#{match[0]}=#{match[1]}#{match[2]}#{match[1]}") { |m| "#{match[0]}=#{match[1]}#{CGI.unescapeHTML(match[2])}#{match[1]}" }
end

#un-escape changes from Nokogiri and erb-markup!
source.scan(/(<%.*?)((?:(?!%>)[\s\S])*)(%>)/).each do |match|
escaped = URI.unescape match[1]
escaped = CGI.unescapeHTML CGI.unescapeHTML(escaped)
source.gsub!("#{match[0]}#{match[1]}#{match[2]}", "#{match[0]}#{escaped}#{match[2]}")
source.gsub!("#{match[0]}#{match[1]}#{match[2]}") { |m| m = "#{match[0]}#{ CGI.unescapeHTML match[1] }#{match[2]}" }
end

source
Expand Down
47 changes: 32 additions & 15 deletions spec/deface/parser_spec.rb
Expand Up @@ -37,12 +37,34 @@ module Deface
Deface::Parser.convert("<%= method_name %>").to_s.should == "<code erb-loud> method_name </code>"
end

it "should convert nested <% ... %>" do
Deface::Parser.convert("<p id=\"<% method_name %>\"></p>").to_s.should == "<p id=\"&lt;code erb-silent&gt; method_name &lt;/code&gt;\"></p>"
it "should convert first <% ... %> inside html tag" do
Deface::Parser.convert("<p <% method_name %>></p>").to_s.should == "<p data-erb-0=\"&lt;% method_name %&gt;\"></p>"
end

it "should convert nested <%= ... %> including href attribute" do
Deface::Parser.convert(%(<a href="<%= x 'y' + "z" %>">A Link</a>)).to_s.should == "<a href=\"&lt;code%20erb-loud&gt;%20x%20'y'%20+%20%22z%22%20&lt;/code&gt;\">A Link</a>"
it "should convert second <% ... %> inside html tag" do
Deface::Parser.convert("<p <% method_name %> <% x = y %>></p>").to_s.should == "<p data-erb-0=\"&lt;% method_name %&gt;\" data-erb-1=\"&lt;% x = y %&gt;\"></p>"
end

it "should convert <% ... %> inside double quoted attr value" do
Deface::Parser.convert("<p id=\"<% method_name %>\"></p>").to_s.should == "<p data-erb-id=\"&lt;% method_name %&gt;\"></p>"
end

it "should convert <% ... %> inside single quoted attr value" do
Deface::Parser.convert("<p id='<% method_name %>'></p>").to_s.should == "<p data-erb-id=\"&lt;% method_name %&gt;\"></p>"
end

it "should convert <% ... %> inside non-quoted attr value" do
Deface::Parser.convert("<p id=<% method_name %>></p>").to_s.should == "<p data-erb-id=\"&lt;% method_name %&gt;\"></p>"
Deface::Parser.convert("<p id=<% method_name %> alt=\"test\"></p>").to_s.should == "<p data-erb-id=\"&lt;% method_name %&gt;\" alt=\"test\"></p>"
end

it "should convert multiple <% ... %> inside html tag" do
Deface::Parser.convert(%q{<p <%= method_name %> alt="<% x = 'y' +
\"2\" %>" title='<% method_name %>' <%= other_method %></p>}).to_s.should == "<p data-erb-0=\"&lt;%= method_name %&gt;\" data-erb-alt=\"&lt;% x = 'y' + \n \\&quot;2\\&quot; %&gt;\" data-erb-title=\"&lt;% method_name %&gt;\" data-erb-1=\"&lt;%= other_method %&gt;\"></p>"
end

it "should convert <%= ... %> including href attribute" do
Deface::Parser.convert(%(<a href="<%= x 'y' + "z" %>">A Link</a>)).to_s.should == "<a data-erb-href=\"&lt;%= x 'y' + &quot;z&quot; %&gt;\">A Link</a>"
end

it "should escape contents code tags" do
Expand All @@ -59,21 +81,16 @@ module Deface
Deface::Parser.undo_erb_markup!("<code erb-loud> method_name </code>").should == "<%= method_name %>"
end

it "should revert nested <code erb-silent>" do
Deface::Parser.undo_erb_markup!("<p id=\"&lt;code erb-silent&gt; method_name > 1 &lt;/code&gt;\"></p>").should == "<p id=\"<% method_name > 1 %>\"></p>"
end

it "should revert nested <code erb-silent> including href attribute" do
Deface::Parser.undo_erb_markup!("<a href=\"&lt;code%20erb-silent&gt;%20method_name%20&lt;/code&gt;\">A Link</a>").should == "<a href=\"<% method_name %>\">A Link</a>"
it "should revert data-erb-x attrs inside html tag" do
Deface::Parser.undo_erb_markup!("<p data-erb-0=\"&lt;% method_name %&gt;\" data-erb-1=\"&lt;% x = y %&gt;\"></p>").should == "<p <% method_name %> <% x = y %>></p>"
end

it "should revert nested <code erb-loud>" do
Deface::Parser.undo_erb_markup!("<p id=\"&lt;code erb-loud&gt; method_name < 2 &lt;/code&gt;\"></p>").should == "<p id=\"<%= method_name < 2 %>\"></p>"
it "should revert data-erb-id attr inside html tag" do
Deface::Parser.undo_erb_markup!("<p data-erb-id=\"&lt;% method_name &gt; 1 %&gt;\"></p>").should == "<p id=\"<% method_name > 1 %>\"></p>"
end

it "should revert nested <code erb-loud> including href attribute" do
Deface::Parser.undo_erb_markup!("<a href=\"&lt;code%20erb-loud&gt;%20x%20'y'%20+%20'z'%20&lt;/code&gt;\">A Link</a>").should == %(<a href="<%= x 'y' + 'z' %>">A Link</a>)
Deface::Parser.undo_erb_markup!("<a href=\"&lt;code%20erb-loud&gt;%20x%20'y'%20+%20%22z%22%20&lt;/code&gt;\">A Link</a>").should == %(<a href="<%= x 'y' + "z" %>">A Link</a>)
it "should revert data-erb-href attr inside html tag" do
Deface::Parser.undo_erb_markup!("<a data-erb-href=\"&lt;%= x 'y' + &quot;z&quot; %&gt;\">A Link</a>").should == %(<a href="<%= x 'y' + \"z\" %>">A Link</a>)
end

it "should unescape contents of code tags" do
Expand Down

0 comments on commit a8f14db

Please sign in to comment.