require 'set' module Ludy # 2008-05-09 godfat module XhtmlFormatter module_function def format_article html, *allowed_tags require 'rubygems' require 'hpricot' allowed_tags = allowed_tags.to_set XhtmlFormatter.format_article_elems Hpricot.parse( XhtmlFormatter.escape_all_inside_pre(html, allowed_tags)), allowed_tags end def format_autolink html, attrs = {} require 'rubygems' require 'hpricot' doc = Hpricot.parse html doc.each_child{ |c| next unless c.kind_of?(Hpricot::Text) c.content = format_url c.content, attrs } doc.to_html end # translated from drupal-6.2/modules/filter/filter.module def format_autolink_regexp text, attrs = {} attrs = attrs.map{ |k,v| " #{k}=\"#{v}\""}.join # Match absolute URLs. " #{text}".gsub(%r{(
|
|
|
(.*)}mi){ # stop escaping for '>' because drupal's url filter would make > into url... # is there any other way to get $1? "
#{XhtmlFormatter.escape_lt(XhtmlFormatter.escape_amp($1))}"
}
end
def self.format_article_elems elems, allowed_tags = Set.new, no_format_newline = false
elems.children.map{ |e|
if e.kind_of?(Hpricot::Text)
if no_format_newline
format_url(e.content)
else
format_newline format_url(e.content)
end
elsif e.kind_of?(Hpricot::Elem)
if allowed_tags.member? e.name.to_sym
if e.empty? || e.name == 'a'
e.to_html
else
e.stag.inspect +
XhtmlFormatter.format_article_elems(e, allowed_tags, e.stag.name == 'pre') +
(e.etag || Hpricot::ETag.new(e.stag.name)).inspect
end
else
if e.empty?
XhtmlFormatter.escape_lt(e.stag.inspect)
else
XhtmlFormatter.escape_lt(e.stag.inspect) +
XhtmlFormatter.format_article_elems(e, allowed_tags) +
XhtmlFormatter.escape_lt((e.etag || Hpricot::ETag.new(e.stag.name)).inspect)
end
end
end
}.join
end
def self.escape_amp text
text.gsub('&', '&')
end
def self.escape_lt text
text.gsub('<', '<')
end
end
end # of Ludy