godfat / ludy

Aims to extend Ruby standard library, providing some useful tools that's not existed in the standard library, especially for functional programming.

This URL has Read+Write access

godfat (author)
Sat Oct 11 08:25:35 -0700 2008
commit  2400d1dc84e138fa0d3f292e2cf0f82a3dc4821d
tree    a2619df93757797807d12cb7f4d87d2e9e4ef9d2
parent  71ee19235946c169616dc0e51a96fc4778fd06e5
ludy / lib / ludy / xhtml_formatter.rb
107245e7 » godfat 2008-06-19 * added xhtml_formatter fro... 1 require 'set'
2
3 module Ludy
4
5 # 2008-05-09 godfat
6 module XhtmlFormatter
7 module_function
8 def format_article html, *allowed_tags
9 require 'rubygems'
3f3a7acd » godfat 2008-06-20 * added XhtmlFormatter.form... 10 require 'hpricot'
11
12 allowed_tags = allowed_tags.to_set
107245e7 » godfat 2008-06-19 * added xhtml_formatter fro... 13 XhtmlFormatter.format_article_elems Hpricot.parse(
14 XhtmlFormatter.escape_all_inside_pre(html, allowed_tags)), allowed_tags
15 end
16
17 def format_autolink html, attrs = {}
18 require 'rubygems'
3f3a7acd » godfat 2008-06-20 * added XhtmlFormatter.form... 19 require 'hpricot'
20
21 doc = Hpricot.parse html
107245e7 » godfat 2008-06-19 * added xhtml_formatter fro... 22 doc.each_child{ |c|
23 next unless c.kind_of?(Hpricot::Text)
24 c.content = format_url c.content, attrs
25 }
26 doc.to_html
27 end
28
29 # translated from drupal-6.2/modules/filter/filter.module
3f3a7acd » godfat 2008-06-20 * added XhtmlFormatter.form... 30 def format_autolink_regexp text, attrs = {}
31 attrs = attrs.map{ |k,v| " #{k}=\"#{v}\""}.join
32 # Match absolute URLs.
33 " #{text}".gsub(%r{(<p>|<li>|<br\s*/?>|[ \n\r\t\(])((http://|https://|ftp://|mailto:|smb://|afp://|file://|gopher://|news://|ssl://|sslv2://|sslv3://|tls://|tcp://|udp://)([a-zA-Z0-9@:%_+*~#?&=.,/;-]*[a-zA-Z0-9@:%_+*~#&=/;-]))([.,?!]*?)(?=(</p>|</li>|<br\s*/?>|[ \n\r\t\)])?)}i){ |match|
34 match = [match, $1, $2, $3, $4, $5]
35 match[2] = match[2] # escape something here
36 caption = XhtmlFormatter.trim match[2]
37 # match[2] = sanitize match[2]
38 match[1]+'<a href="'+match[2]+'" title="'+match[2]+"\"#{attrs}>"+
39 caption+'</a>'+match[5]
40
41 # Match e-mail addresses.
42 }.gsub(%r{(<p>|<li>|<br\s*/?>|[ \n\r\t\(])([A-Za-z0-9._-]+@[A-Za-z0-9._+-]+\.[A-Za-z]{2,4})([.,?!]*?)(?=(</p>|</li>|<br\s*/?>|[ \n\r\t\)]))}i, '\1<a href="mailto:\2">\2</a>\3').
43
44 # Match www domains/addresses.
45 gsub(%r{(<p>|<li>|[ \n\r\t\(])(www\.[a-zA-Z0-9@:%_+*~#?&=.,/;-]*[a-zA-Z0-9@:%_+~#\&=/;-])([.,?!]*?)(?=(</p>|</li>|<br\s*/?>|[ \n\r\t\)]))}i){ |match|
46 match = [match, $1, $2, $3, $4, $5]
47 match[2] = match[2] # escape something here
48 caption = XhtmlFormatter.trim match[2]
49 # match[2] = sanitize match[2]
50 match[1]+'<a href="http://'+match[2]+'" title="http://'+match[2]+"\"#{attrs}>"+
51 caption+'</a>'+match[3]
52 }[1..-1]
53 end
54
55 def format_url text, attrs = {}
107245e7 » godfat 2008-06-19 * added xhtml_formatter fro... 56 # translated from drupal-6.2/modules/filter/filter.module
57 # Match absolute URLs.
58 text.gsub(
59 %r{((http://|https://|ftp://|mailto:|smb://|afp://|file://|gopher://|news://|ssl://|sslv2://|sslv3://|tls://|tcp://|udp://|www\.)([a-zA-Z0-9@:%_+*~#?&=.,/;-]*[a-zA-Z0-9@:%_+*~#&=/;-]))([.,?!]*?)}i){ |match|
60 url = $1 # is there any other way to get this variable?
61
62 caption = XhtmlFormatter.trim url
63 attrs = attrs.map{ |k,v| " #{k}=\"#{v}\""}.join
64
65 # Match www domains/addresses.
66 url = "http://#{url}" unless url =~ %r{^http://}
67 "<a href=\"#{url}\" title=\"#{url}\"#{attrs}>#{caption}</a>"
68 # Match e-mail addresses.
69 }.gsub( %r{([A-Za-z0-9._-]+@[A-Za-z0-9._+-]+\.[A-Za-z]{2,4})([.,?!]*?)}i,
70 '<a href="mailto:\1">\1</a>')
71 end
72
73 def format_newline text
74 # windows: \r\n
75 # mac os 9: \r
76 text.gsub("\r\n", "\n").tr("\r", "\n").gsub("\n", '<br />')
77 end
78
79 private
80 def self.trim text, length = 50
81 # Use +3 for '...' string length.
82 if text.size <= 3
83 '...'
84 elsif text.size > length
85 "#{text[0...length-3]}..."
86 else
87 text
88 end
89 end
90 def self.escape_all_inside_pre html, allowed_tags
91 return html unless allowed_tags.member? :pre
92 # don't bother nested pre, because we escape all tags in pre
93 html = html + '</pre>' unless html =~ %r{</pre>}i
94 html.gsub(%r{<pre>(.*)</pre>}mi){
95 # stop escaping for '>' because drupal's url filter would make &gt; into url...
96 # is there any other way to get $1?
97 "<pre>#{XhtmlFormatter.escape_lt(XhtmlFormatter.escape_amp($1))}</pre>"
98 }
99 end
100 def self.format_article_elems elems, allowed_tags = Set.new, no_format_newline = false
101 elems.children.map{ |e|
102 if e.kind_of?(Hpricot::Text)
103 if no_format_newline
104 format_url(e.content)
105 else
106 format_newline format_url(e.content)
107 end
108 elsif e.kind_of?(Hpricot::Elem)
109 if allowed_tags.member? e.name.to_sym
110 if e.empty? || e.name == 'a'
111 e.to_html
112 else
113 e.stag.inspect +
114 XhtmlFormatter.format_article_elems(e, allowed_tags, e.stag.name == 'pre') +
115 (e.etag || Hpricot::ETag.new(e.stag.name)).inspect
116 end
117 else
118 if e.empty?
119 XhtmlFormatter.escape_lt(e.stag.inspect)
120 else
121 XhtmlFormatter.escape_lt(e.stag.inspect) +
122 XhtmlFormatter.format_article_elems(e, allowed_tags) +
123 XhtmlFormatter.escape_lt((e.etag || Hpricot::ETag.new(e.stag.name)).inspect)
124 end
125 end
126 end
127 }.join
128 end
129 def self.escape_amp text
130 text.gsub('&', '&amp;')
131 end
132 def self.escape_lt text
133 text.gsub('<', '&lt;')
134 end
135 end
136 end # of Ludy
137