public
Description: Yet Another Planet Refactoring
Homepage: http://intertwingly.net/blog/2007/12/19/Yet-Another-Planet-Refactoring
Clone URL: git://github.com/rubys/mars.git
Search Repo:
Sam Ruby (author)
Thu Apr 03 17:55:30 -0700 2008
commit  594cd30192668c6310b3236b978d5d4a6d706fb7
tree    fb2c98375a3f69e66dc22da049a8c7e26438d717
parent  775bc2a397c7812ae67b9979f288c3c835aab059
mars / planet / harvest.rb
100644 294 lines (235 sloc) 6.397 kb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
require 'planet/transmogrify'
require 'planet/sift'
 
module Planet
  def Planet.harvest source
    doc = Planet::Transmogrify.parse(open(source))
    doc.attributes['xml:base'] = source
 
    # augment the document with feed parser attributes
    class << doc
      attr_accessor :feed, :entries
    end
 
    # Anchor the dynamic dictionaries
    doc.feed = Feed.new(doc.root)
    doc.entries = doc.root.elements.to_a('entry').map {|entry| Entry.new(entry)}
 
    doc
  end
 
  # A dynamic dictionary that allows attributes to be accessed via indexing
  class UserDict
    attr_accessor :node
 
    def initialize node
      @node = node || REXML::Element.new('')
    end
 
    def [](index)
      respond_to?(index) ? send(index) : nil
    end
 
    # method generator for elements whose value is defined by its text child
    def UserDict.text_element *names
      names.each do |name|
        define_method name do
          element = @node.elements[name.to_s]
          element ? element.texts.map {|t| t.value}.join : nil
        end
      end
    end
 
    # method generator for element attribute values
    def UserDict.element_attr *names
      names.each do |name|
        define_method name do
          @node.attributes[name.to_s]
        end
      end
    end
 
    # method generator for relative URI attribute values
    def UserDict.reluri_attr *names
      names.each do |name|
        define_method name do
          value = @node.attributes[name.to_s]
          value = Planet.uri_norm(@node.xmlbase, value) if value
          value
        end
      end
    end
 
    # method generator for text constructs (plus detail)
    def UserDict.text_construct *names
      names.each do |name|
        define_method name do
          TextConstruct.new(@node.elements[name.to_s]).value
        end
 
        define_method name.to_s + "_detail" do
          TextConstruct.new(@node.elements[name.to_s])
        end
      end
    end
  end
 
  class CommonElements < UserDict
    text_element :id
    alias :guid :id
 
    text_construct :rights
    alias :copyright :rights
 
    text_construct :title
 
    def link
      links.select {|link| link.rel=='alternate'}.first.href rescue nil
    end
 
    def links
      @node.elements.to_a('link').map {|node| Link.new(node)}
    end
 
    def license
      links.select {|link| link.rel=='license'}.first.href rescue nil
    end
 
    def tags
      @node.elements.to_a('category').map {|node| Category.new(node)}
    end
 
    def categories
      tags.map {|tag| [tag.scheme, tag.term]}
    end
 
    def category
      tags.first.term rescue nil
    end
 
    def contributors
      @node.elements.to_a('contributor').map {|node| Author.new(node)}
    end
 
    def categories
      tags.map {|tag| [tag.scheme, tag.term]}
    end
 
    def category
      tags.first.term rescue nil
    end
 
    def author
      author_detail.to_s
    end
 
    def author_detail
      Author.new(@node.elements['author'])
    end
 
    alias :publisher :author
    alias :publisher_detail :author_detail
  end
 
  class Feed < CommonElements
    text_element :icon, :logo
    text_construct :subtitle
 
    alias :description :subtitle
    alias :tagline :subtitle
 
    def generator
      generator_detail.name
    end
 
    def generator_detail
      Generator.new(@node.elements['generator'])
    end
  end
 
  class Entry < CommonElements
    text_construct :summary
 
    alias :description :summary
 
    def content
      @node.elements.to_a('content').map {|node| TextConstruct.new(node)}
    end
 
    def enclosures
      links.select {|link| link.rel == 'enclosure'}
    end
 
    def comments
      links.select { |link|
        link.rel == 'replies' and link.type == 'text/html'
      }.first.href rescue nil
    end
 
    def source
      Feed.new(@node.elements['source'])
    end
  end
 
  class TextConstruct < UserDict
    require 'html5'
    require 'html5/treewalkers'
    require 'html5/serializer'
 
    REXML_TREEWALKER = HTML5::TreeWalkers['rexml']
 
    element_attr :src
 
    def value
      case @node.attributes['type']
        when 'xhtml'
          serialize(@node.elements[1].to_a).strip
        when 'text', nil, /^text\//i
          (@node.text || '').strip
        when 'html'
          text = @node.text.strip rescue ''
          serialize HTML5.parse_fragment(text, :encoding => 'UTF-8')
        when /\+xml$/i, /\/xml$/i
          @node.to_a.to_s.strip
        else
          # base 64
          @node.text.gsub(/\s/,'').unpack('m').first
      end
    end
 
    def type
      case @node.attributes['type']
        when 'xhtml'
          'application/xhtml+xml'
        when 'text', nil
          'text/plain'
        when 'html'
          'text/html'
        else
          @node.attributes['type']
      end
    end
 
    def base
      url_norm(@node.xmlbase)
    end
 
  private
 
    # DOM to string
    def serialize nodes
      nodes.map { |node|
        # resolve relative URIs
        if node.respond_to? :attributes
          if !node.parent.parent
            node.parent.attributes['xml:base'] ||= @node.xmlbase
          end
          resolve node if node.respond_to? :attributes
        end
 
        HTML5::XHTMLSerializer.serialize(REXML_TREEWALKER.new(node))
      }.join
    end
 
    # resolve relative URIs
    def resolve element
      element.attributes.each do |name,value|
        if %w(href).include? name
          element.attributes[name] =
            Planet.uri_norm(element.xmlbase, value)
        end
      end
      element.each_element { |child| resolve child }
    end
  end
 
  class Author < UserDict
    text_element :name, :email, :uri
 
    def uri
      value = @node.elements['uri']
      if value
        value = Planet.uri_norm(value.xmlbase, value.text)
      end
      value
    end
 
    def to_s
      email ? "#{name} (#{email})" : "#{name}"
    end
 
    alias :url :uri
    alias :href :uri
  end
 
  class Link < UserDict
    element_attr :title, :length, :hreflang
    reluri_attr :href
 
    alias :url :href
 
    def rel
      @node.attributes['rel'] or 'alternate'
    end
 
    def type
      @node.attributes['type'] or (rel=='self' ? 'application/atom+xml' : nil)
    end
  end
 
  class Category < UserDict
    element_attr :term, :scheme, :label
  end
 
  class Generator < UserDict
    element_attr :version
    reluri_attr :uri
 
    alias :href :uri
 
    def name
      @node.text
    end
  end
end