rubys / mars
- Source
- Commits
- Network (3)
- Issues (0)
- Downloads (0)
- Wiki (1)
- Graphs
-
Tree:
a4c9e7e
commit a4c9e7e1e7c6d5e675a6a6ceddc9150ae87ff746
tree ea843df39ee97243590113850876b4826c4cf05a
parent 41bfc6f06cfe0c6dc83a5ed719f9f6e0ed0205c0
tree ea843df39ee97243590113850876b4826c4cf05a
parent 41bfc6f06cfe0c6dc83a5ed719f9f6e0ed0205c0
| a73655d5 » | Scott Bronson | 2008-03-28 | 1 | require 'planet/transmogrify' | |
| 2 | require 'planet/sift' | ||||
| 3 | |||||
| 4 | module Planet | ||||
| 5 | def Planet.harvest source | ||||
| 6 | doc = Planet::Transmogrify.parse(open(source)) | ||||
| 7 | doc.attributes['xml:base'] = source | ||||
| 8 | |||||
| a4c9e7e1 » | joshu | 2008-04-09 | 9 | Planet.add_attrs(doc) | |
| 10 | end | ||||
| 11 | |||||
| 12 | # Augment a document with feed parser attributes | ||||
| 13 | def Planet.add_attrs doc | ||||
| 14 | |||||
| a73655d5 » | Scott Bronson | 2008-03-28 | 15 | class << doc | |
| 16 | attr_accessor :feed, :entries | ||||
| 17 | end | ||||
| 18 | |||||
| 19 | # Anchor the dynamic dictionaries | ||||
| 20 | doc.feed = Feed.new(doc.root) | ||||
| 21 | doc.entries = doc.root.elements.to_a('entry').map {|entry| Entry.new(entry)} | ||||
| 22 | |||||
| 23 | doc | ||||
| 24 | end | ||||
| 25 | |||||
| 26 | # A dynamic dictionary that allows attributes to be accessed via indexing | ||||
| 27 | class UserDict | ||||
| 28 | attr_accessor :node | ||||
| 29 | |||||
| 30 | def initialize node | ||||
| 31 | @node = node || REXML::Element.new('') | ||||
| 32 | end | ||||
| 33 | |||||
| 34 | def [](index) | ||||
| 35 | respond_to?(index) ? send(index) : nil | ||||
| 36 | end | ||||
| 37 | |||||
| 38 | # method generator for elements whose value is defined by its text child | ||||
| 39 | def UserDict.text_element *names | ||||
| 40 | names.each do |name| | ||||
| 41 | define_method name do | ||||
| 42 | element = @node.elements[name.to_s] | ||||
| 43 | element ? element.texts.map {|t| t.value}.join : nil | ||||
| 44 | end | ||||
| 45 | end | ||||
| 46 | end | ||||
| 47 | |||||
| 48 | # method generator for element attribute values | ||||
| 49 | def UserDict.element_attr *names | ||||
| 50 | names.each do |name| | ||||
| 51 | define_method name do | ||||
| 52 | @node.attributes[name.to_s] | ||||
| 53 | end | ||||
| 54 | end | ||||
| 55 | end | ||||
| 56 | |||||
| 57 | # method generator for relative URI attribute values | ||||
| 58 | def UserDict.reluri_attr *names | ||||
| 59 | names.each do |name| | ||||
| 60 | define_method name do | ||||
| 61 | value = @node.attributes[name.to_s] | ||||
| 62 | value = Planet.uri_norm(@node.xmlbase, value) if value | ||||
| 63 | value | ||||
| 64 | end | ||||
| 65 | end | ||||
| 66 | end | ||||
| 67 | |||||
| 68 | # method generator for text constructs (plus detail) | ||||
| 69 | def UserDict.text_construct *names | ||||
| 70 | names.each do |name| | ||||
| 71 | define_method name do | ||||
| 72 | TextConstruct.new(@node.elements[name.to_s]).value | ||||
| 73 | end | ||||
| 74 | |||||
| 75 | define_method name.to_s + "_detail" do | ||||
| 76 | TextConstruct.new(@node.elements[name.to_s]) | ||||
| 77 | end | ||||
| 78 | end | ||||
| 79 | end | ||||
| 80 | end | ||||
| 81 | |||||
| 82 | class CommonElements < UserDict | ||||
| a4c9e7e1 » | joshu | 2008-04-09 | 83 | text_element :id, :updated, :published | |
| a73655d5 » | Scott Bronson | 2008-03-28 | 84 | alias :guid :id | |
| 85 | |||||
| 86 | text_construct :rights | ||||
| 87 | alias :copyright :rights | ||||
| 88 | |||||
| 89 | text_construct :title | ||||
| 90 | |||||
| 91 | def link | ||||
| 92 | links.select {|link| link.rel=='alternate'}.first.href rescue nil | ||||
| 93 | end | ||||
| 94 | |||||
| 95 | def links | ||||
| 96 | @node.elements.to_a('link').map {|node| Link.new(node)} | ||||
| 97 | end | ||||
| 98 | |||||
| 99 | def license | ||||
| 100 | links.select {|link| link.rel=='license'}.first.href rescue nil | ||||
| 101 | end | ||||
| 102 | |||||
| 103 | def tags | ||||
| 104 | @node.elements.to_a('category').map {|node| Category.new(node)} | ||||
| 105 | end | ||||
| 106 | |||||
| 107 | def categories | ||||
| 108 | tags.map {|tag| [tag.scheme, tag.term]} | ||||
| 109 | end | ||||
| 110 | |||||
| 111 | def category | ||||
| 112 | tags.first.term rescue nil | ||||
| 113 | end | ||||
| 114 | |||||
| 115 | def contributors | ||||
| 116 | @node.elements.to_a('contributor').map {|node| Author.new(node)} | ||||
| 117 | end | ||||
| 118 | |||||
| 119 | def author | ||||
| 120 | author_detail.to_s | ||||
| 121 | end | ||||
| 122 | |||||
| 123 | def author_detail | ||||
| 124 | Author.new(@node.elements['author']) | ||||
| 125 | end | ||||
| 126 | |||||
| 127 | alias :publisher :author | ||||
| 128 | alias :publisher_detail :author_detail | ||||
| 129 | end | ||||
| 130 | |||||
| 131 | class Feed < CommonElements | ||||
| 132 | text_element :icon, :logo | ||||
| 133 | text_construct :subtitle | ||||
| 134 | |||||
| 135 | alias :description :subtitle | ||||
| 136 | alias :tagline :subtitle | ||||
| 137 | |||||
| 138 | def generator | ||||
| 139 | generator_detail.name | ||||
| 140 | end | ||||
| 141 | |||||
| 142 | def generator_detail | ||||
| 143 | Generator.new(@node.elements['generator']) | ||||
| 144 | end | ||||
| a4c9e7e1 » | joshu | 2008-04-09 | 145 | ||
| 146 | def message | ||||
| 147 | element = @node.elements['planet:message'] | ||||
| 148 | element ? element.texts.map {|t| t.value}.join : nil | ||||
| 149 | end | ||||
| 150 | |||||
| 151 | def name | ||||
| 152 | element = @node.elements['planet:name'] | ||||
| 153 | element ? element.texts.map {|t| t.value}.join : nil | ||||
| 154 | end | ||||
| 155 | |||||
| 156 | def sources | ||||
| 157 | @node.elements.to_a('planet:source').map {|node| Feed.new(node)} | ||||
| 158 | end | ||||
| 159 | |||||
| 160 | def url | ||||
| 161 | links.select {|link| link.rel=='self'}.first.href rescue nil | ||||
| 162 | end | ||||
| a73655d5 » | Scott Bronson | 2008-03-28 | 163 | end | |
| 164 | |||||
| 165 | class Entry < CommonElements | ||||
| 166 | text_construct :summary | ||||
| 167 | |||||
| 168 | alias :description :summary | ||||
| 169 | |||||
| 170 | def content | ||||
| 171 | @node.elements.to_a('content').map {|node| TextConstruct.new(node)} | ||||
| 172 | end | ||||
| 173 | |||||
| a4c9e7e1 » | joshu | 2008-04-09 | 174 | def enclosure_href | |
| 175 | enclosures.first.href rescue nil | ||||
| 176 | end | ||||
| 177 | |||||
| 178 | def enclosure_length | ||||
| 179 | enclosures.first.length rescue nil | ||||
| 180 | end | ||||
| 181 | |||||
| 182 | def enclosure_type | ||||
| 183 | if enclosures.first.is_a?(Planet::Link) | ||||
| 184 | return enclosures.first.type | ||||
| 185 | else | ||||
| 186 | return nil | ||||
| 187 | end | ||||
| 188 | end | ||||
| 189 | |||||
| a73655d5 » | Scott Bronson | 2008-03-28 | 190 | def enclosures | |
| 191 | links.select {|link| link.rel == 'enclosure'} | ||||
| 192 | end | ||||
| 193 | |||||
| 194 | def comments | ||||
| 195 | links.select { |link| | ||||
| 196 | link.rel == 'replies' and link.type == 'text/html' | ||||
| 197 | }.first.href rescue nil | ||||
| 198 | end | ||||
| 199 | |||||
| 200 | def source | ||||
| 201 | Feed.new(@node.elements['source']) | ||||
| 202 | end | ||||
| 203 | end | ||||
| 204 | |||||
| 205 | class TextConstruct < UserDict | ||||
| 206 | require 'html5' | ||||
| 207 | require 'html5/treewalkers' | ||||
| 208 | require 'html5/serializer' | ||||
| 209 | |||||
| 210 | REXML_TREEWALKER = HTML5::TreeWalkers['rexml'] | ||||
| 211 | |||||
| 212 | element_attr :src | ||||
| 213 | |||||
| 214 | def value | ||||
| 215 | case @node.attributes['type'] | ||||
| 216 | when 'xhtml' | ||||
| 217 | serialize(@node.elements[1].to_a).strip | ||||
| 218 | when 'text', nil, /^text\//i | ||||
| 219 | (@node.text || '').strip | ||||
| 220 | when 'html' | ||||
| 221 | text = @node.text.strip rescue '' | ||||
| 222 | serialize HTML5.parse_fragment(text, :encoding => 'UTF-8') | ||||
| 223 | when /\+xml$/i, /\/xml$/i | ||||
| 224 | @node.to_a.to_s.strip | ||||
| 225 | else | ||||
| 226 | # base 64 | ||||
| 227 | @node.text.gsub(/\s/,'').unpack('m').first | ||||
| 228 | end | ||||
| 229 | end | ||||
| 230 | |||||
| 231 | def type | ||||
| 232 | case @node.attributes['type'] | ||||
| 233 | when 'xhtml' | ||||
| 234 | 'application/xhtml+xml' | ||||
| 235 | when 'text', nil | ||||
| 236 | 'text/plain' | ||||
| 237 | when 'html' | ||||
| 238 | 'text/html' | ||||
| 239 | else | ||||
| 240 | @node.attributes['type'] | ||||
| 241 | end | ||||
| 242 | end | ||||
| 243 | |||||
| 244 | def base | ||||
| 245 | url_norm(@node.xmlbase) | ||||
| 246 | end | ||||
| 247 | |||||
| a4c9e7e1 » | joshu | 2008-04-09 | 248 | def language | |
| 249 | @node.attributes['xml:lang'] | ||||
| 250 | end | ||||
| 251 | |||||
| a73655d5 » | Scott Bronson | 2008-03-28 | 252 | private | |
| 253 | |||||
| 254 | # DOM to string | ||||
| 255 | def serialize nodes | ||||
| 256 | nodes.map { |node| | ||||
| 257 | # resolve relative URIs | ||||
| 258 | if node.respond_to? :attributes | ||||
| 259 | if !node.parent.parent | ||||
| 260 | node.parent.attributes['xml:base'] ||= @node.xmlbase | ||||
| 261 | end | ||||
| 262 | resolve node if node.respond_to? :attributes | ||||
| 263 | end | ||||
| 264 | |||||
| 265 | HTML5::XHTMLSerializer.serialize(REXML_TREEWALKER.new(node)) | ||||
| 266 | }.join | ||||
| 267 | end | ||||
| 268 | |||||
| 269 | # resolve relative URIs | ||||
| 270 | def resolve element | ||||
| 271 | element.attributes.each do |name,value| | ||||
| 272 | if %w(href).include? name | ||||
| 273 | element.attributes[name] = | ||||
| 274 | Planet.uri_norm(element.xmlbase, value) | ||||
| 275 | end | ||||
| 276 | end | ||||
| 277 | element.each_element { |child| resolve child } | ||||
| 278 | end | ||||
| 279 | end | ||||
| 280 | |||||
| 281 | class Author < UserDict | ||||
| 282 | text_element :name, :email, :uri | ||||
| 283 | |||||
| 284 | def uri | ||||
| 285 | value = @node.elements['uri'] | ||||
| 286 | if value | ||||
| 287 | value = Planet.uri_norm(value.xmlbase, value.text) | ||||
| 288 | end | ||||
| 289 | value | ||||
| 290 | end | ||||
| 291 | |||||
| 292 | def to_s | ||||
| a4c9e7e1 » | joshu | 2008-04-09 | 293 | if name | |
| 294 | email ? "#{name} (#{email})" : "#{name}" | ||||
| 295 | else | ||||
| 296 | "#{email}" | ||||
| 297 | end | ||||
| a73655d5 » | Scott Bronson | 2008-03-28 | 298 | end | |
| 299 | |||||
| 300 | alias :url :uri | ||||
| 301 | alias :href :uri | ||||
| 302 | end | ||||
| 303 | |||||
| 304 | class Link < UserDict | ||||
| 305 | element_attr :title, :length, :hreflang | ||||
| 306 | reluri_attr :href | ||||
| 307 | |||||
| 308 | alias :url :href | ||||
| 309 | |||||
| 310 | def rel | ||||
| 311 | @node.attributes['rel'] or 'alternate' | ||||
| 312 | end | ||||
| 313 | |||||
| 314 | def type | ||||
| 315 | @node.attributes['type'] or (rel=='self' ? 'application/atom+xml' : nil) | ||||
| 316 | end | ||||
| 317 | end | ||||
| 318 | |||||
| 319 | class Category < UserDict | ||||
| 320 | element_attr :term, :scheme, :label | ||||
| 321 | end | ||||
| 322 | |||||
| 323 | class Generator < UserDict | ||||
| 324 | element_attr :version | ||||
| 325 | reluri_attr :uri | ||||
| 326 | |||||
| 327 | alias :href :uri | ||||
| 328 | |||||
| 329 | def name | ||||
| 330 | @node.text | ||||
| 331 | end | ||||
| 332 | end | ||||
| 333 | end | ||||
