godfat / ludy
- Source
- Commits
- Network (0)
- Issues (0)
- Downloads (2)
- Wiki (1)
- Graphs
-
Tree:
2400d1d
commit 2400d1dc84e138fa0d3f292e2cf0f82a3dc4821d
tree a2619df93757797807d12cb7f4d87d2e9e4ef9d2
parent 71ee19235946c169616dc0e51a96fc4778fd06e5
tree a2619df93757797807d12cb7f4d87d2e9e4ef9d2
parent 71ee19235946c169616dc0e51a96fc4778fd06e5
| 107245e7 » | godfat | 2008-06-19 | 1 | require 'set' | |
| 2 | |||||
| 3 | module Ludy | ||||
| 4 | |||||
| 5 | # 2008-05-09 godfat | ||||
| 6 | module XhtmlFormatter | ||||
| 7 | module_function | ||||
| 8 | def format_article html, *allowed_tags | ||||
| 9 | require 'rubygems' | ||||
| 3f3a7acd » | godfat | 2008-06-20 | 10 | require 'hpricot' | |
| 11 | |||||
| 12 | allowed_tags = allowed_tags.to_set | ||||
| 107245e7 » | godfat | 2008-06-19 | 13 | XhtmlFormatter.format_article_elems Hpricot.parse( | |
| 14 | XhtmlFormatter.escape_all_inside_pre(html, allowed_tags)), allowed_tags | ||||
| 15 | end | ||||
| 16 | |||||
| 17 | def format_autolink html, attrs = {} | ||||
| 18 | require 'rubygems' | ||||
| 3f3a7acd » | godfat | 2008-06-20 | 19 | require 'hpricot' | |
| 20 | |||||
| 21 | doc = Hpricot.parse html | ||||
| 107245e7 » | godfat | 2008-06-19 | 22 | doc.each_child{ |c| | |
| 23 | next unless c.kind_of?(Hpricot::Text) | ||||
| 24 | c.content = format_url c.content, attrs | ||||
| 25 | } | ||||
| 26 | doc.to_html | ||||
| 27 | end | ||||
| 28 | |||||
| 29 | # translated from drupal-6.2/modules/filter/filter.module | ||||
| 3f3a7acd » | godfat | 2008-06-20 | 30 | def format_autolink_regexp text, attrs = {} | |
| 31 | attrs = attrs.map{ |k,v| " #{k}=\"#{v}\""}.join | ||||
| 32 | # Match absolute URLs. | ||||
| 33 | " #{text}".gsub(%r{(<p>|<li>|<br\s*/?>|[ \n\r\t\(])((http://|https://|ftp://|mailto:|smb://|afp://|file://|gopher://|news://|ssl://|sslv2://|sslv3://|tls://|tcp://|udp://)([a-zA-Z0-9@:%_+*~#?&=.,/;-]*[a-zA-Z0-9@:%_+*~#&=/;-]))([.,?!]*?)(?=(</p>|</li>|<br\s*/?>|[ \n\r\t\)])?)}i){ |match| | ||||
| 34 | match = [match, $1, $2, $3, $4, $5] | ||||
| 35 | match[2] = match[2] # escape something here | ||||
| 36 | caption = XhtmlFormatter.trim match[2] | ||||
| 37 | # match[2] = sanitize match[2] | ||||
| 38 | match[1]+'<a href="'+match[2]+'" title="'+match[2]+"\"#{attrs}>"+ | ||||
| 39 | caption+'</a>'+match[5] | ||||
| 40 | |||||
| 41 | # Match e-mail addresses. | ||||
| 42 | }.gsub(%r{(<p>|<li>|<br\s*/?>|[ \n\r\t\(])([A-Za-z0-9._-]+@[A-Za-z0-9._+-]+\.[A-Za-z]{2,4})([.,?!]*?)(?=(</p>|</li>|<br\s*/?>|[ \n\r\t\)]))}i, '\1<a href="mailto:\2">\2</a>\3'). | ||||
| 43 | |||||
| 44 | # Match www domains/addresses. | ||||
| 45 | gsub(%r{(<p>|<li>|[ \n\r\t\(])(www\.[a-zA-Z0-9@:%_+*~#?&=.,/;-]*[a-zA-Z0-9@:%_+~#\&=/;-])([.,?!]*?)(?=(</p>|</li>|<br\s*/?>|[ \n\r\t\)]))}i){ |match| | ||||
| 46 | match = [match, $1, $2, $3, $4, $5] | ||||
| 47 | match[2] = match[2] # escape something here | ||||
| 48 | caption = XhtmlFormatter.trim match[2] | ||||
| 49 | # match[2] = sanitize match[2] | ||||
| 50 | match[1]+'<a href="http://'+match[2]+'" title="http://'+match[2]+"\"#{attrs}>"+ | ||||
| 51 | caption+'</a>'+match[3] | ||||
| 52 | }[1..-1] | ||||
| 53 | end | ||||
| 54 | |||||
| 55 | def format_url text, attrs = {} | ||||
| 107245e7 » | godfat | 2008-06-19 | 56 | # translated from drupal-6.2/modules/filter/filter.module | |
| 57 | # Match absolute URLs. | ||||
| 58 | text.gsub( | ||||
| 59 | %r{((http://|https://|ftp://|mailto:|smb://|afp://|file://|gopher://|news://|ssl://|sslv2://|sslv3://|tls://|tcp://|udp://|www\.)([a-zA-Z0-9@:%_+*~#?&=.,/;-]*[a-zA-Z0-9@:%_+*~#&=/;-]))([.,?!]*?)}i){ |match| | ||||
| 60 | url = $1 # is there any other way to get this variable? | ||||
| 61 | |||||
| 62 | caption = XhtmlFormatter.trim url | ||||
| 63 | attrs = attrs.map{ |k,v| " #{k}=\"#{v}\""}.join | ||||
| 64 | |||||
| 65 | # Match www domains/addresses. | ||||
| 66 | url = "http://#{url}" unless url =~ %r{^http://} | ||||
| 67 | "<a href=\"#{url}\" title=\"#{url}\"#{attrs}>#{caption}</a>" | ||||
| 68 | # Match e-mail addresses. | ||||
| 69 | }.gsub( %r{([A-Za-z0-9._-]+@[A-Za-z0-9._+-]+\.[A-Za-z]{2,4})([.,?!]*?)}i, | ||||
| 70 | '<a href="mailto:\1">\1</a>') | ||||
| 71 | end | ||||
| 72 | |||||
| 73 | def format_newline text | ||||
| 74 | # windows: \r\n | ||||
| 75 | # mac os 9: \r | ||||
| 76 | text.gsub("\r\n", "\n").tr("\r", "\n").gsub("\n", '<br />') | ||||
| 77 | end | ||||
| 78 | |||||
| 79 | private | ||||
| 80 | def self.trim text, length = 50 | ||||
| 81 | # Use +3 for '...' string length. | ||||
| 82 | if text.size <= 3 | ||||
| 83 | '...' | ||||
| 84 | elsif text.size > length | ||||
| 85 | "#{text[0...length-3]}..." | ||||
| 86 | else | ||||
| 87 | text | ||||
| 88 | end | ||||
| 89 | end | ||||
| 90 | def self.escape_all_inside_pre html, allowed_tags | ||||
| 91 | return html unless allowed_tags.member? :pre | ||||
| 92 | # don't bother nested pre, because we escape all tags in pre | ||||
| 93 | html = html + '</pre>' unless html =~ %r{</pre>}i | ||||
| 94 | html.gsub(%r{<pre>(.*)</pre>}mi){ | ||||
| 95 | # stop escaping for '>' because drupal's url filter would make > into url... | ||||
| 96 | # is there any other way to get $1? | ||||
| 97 | "<pre>#{XhtmlFormatter.escape_lt(XhtmlFormatter.escape_amp($1))}</pre>" | ||||
| 98 | } | ||||
| 99 | end | ||||
| 100 | def self.format_article_elems elems, allowed_tags = Set.new, no_format_newline = false | ||||
| 101 | elems.children.map{ |e| | ||||
| 102 | if e.kind_of?(Hpricot::Text) | ||||
| 103 | if no_format_newline | ||||
| 104 | format_url(e.content) | ||||
| 105 | else | ||||
| 106 | format_newline format_url(e.content) | ||||
| 107 | end | ||||
| 108 | elsif e.kind_of?(Hpricot::Elem) | ||||
| 109 | if allowed_tags.member? e.name.to_sym | ||||
| 110 | if e.empty? || e.name == 'a' | ||||
| 111 | e.to_html | ||||
| 112 | else | ||||
| 113 | e.stag.inspect + | ||||
| 114 | XhtmlFormatter.format_article_elems(e, allowed_tags, e.stag.name == 'pre') + | ||||
| 115 | (e.etag || Hpricot::ETag.new(e.stag.name)).inspect | ||||
| 116 | end | ||||
| 117 | else | ||||
| 118 | if e.empty? | ||||
| 119 | XhtmlFormatter.escape_lt(e.stag.inspect) | ||||
| 120 | else | ||||
| 121 | XhtmlFormatter.escape_lt(e.stag.inspect) + | ||||
| 122 | XhtmlFormatter.format_article_elems(e, allowed_tags) + | ||||
| 123 | XhtmlFormatter.escape_lt((e.etag || Hpricot::ETag.new(e.stag.name)).inspect) | ||||
| 124 | end | ||||
| 125 | end | ||||
| 126 | end | ||||
| 127 | }.join | ||||
| 128 | end | ||||
| 129 | def self.escape_amp text | ||||
| 130 | text.gsub('&', '&') | ||||
| 131 | end | ||||
| 132 | def self.escape_lt text | ||||
| 133 | text.gsub('<', '<') | ||||
| 134 | end | ||||
| 135 | end | ||||
| 136 | end # of Ludy | ||||
| 137 | |||||
