public
Description: Yet Another Planet Refactoring
Homepage: http://intertwingly.net/blog/2007/12/19/Yet-Another-Planet-Refactoring
Clone URL: git://github.com/rubys/mars.git
mars / planet / harvest.rb
a73655d5 » Scott Bronson 2008-03-28 Add full Mars bzr tree. 1 require 'planet/transmogrify'
2 require 'planet/sift'
3
4 module Planet
5 def Planet.harvest source
6 doc = Planet::Transmogrify.parse(open(source))
7 doc.attributes['xml:base'] = source
8
a4c9e7e1 » joshu 2008-04-09 haml templates for mars, v0.4 9 Planet.add_attrs(doc)
10 end
11
12 # Augment a document with feed parser attributes
13 def Planet.add_attrs doc
14
a73655d5 » Scott Bronson 2008-03-28 Add full Mars bzr tree. 15 class << doc
16 attr_accessor :feed, :entries
17 end
18
19 # Anchor the dynamic dictionaries
20 doc.feed = Feed.new(doc.root)
21 doc.entries = doc.root.elements.to_a('entry').map {|entry| Entry.new(entry)}
22
23 doc
24 end
25
26 # A dynamic dictionary that allows attributes to be accessed via indexing
27 class UserDict
28 attr_accessor :node
29
30 def initialize node
31 @node = node || REXML::Element.new('')
32 end
33
34 def [](index)
35 respond_to?(index) ? send(index) : nil
36 end
37
38 # method generator for elements whose value is defined by its text child
39 def UserDict.text_element *names
40 names.each do |name|
41 define_method name do
42 element = @node.elements[name.to_s]
43 element ? element.texts.map {|t| t.value}.join : nil
44 end
45 end
46 end
47
48 # method generator for element attribute values
49 def UserDict.element_attr *names
50 names.each do |name|
51 define_method name do
52 @node.attributes[name.to_s]
53 end
54 end
55 end
56
57 # method generator for relative URI attribute values
58 def UserDict.reluri_attr *names
59 names.each do |name|
60 define_method name do
61 value = @node.attributes[name.to_s]
62 value = Planet.uri_norm(@node.xmlbase, value) if value
63 value
64 end
65 end
66 end
67
68 # method generator for text constructs (plus detail)
69 def UserDict.text_construct *names
70 names.each do |name|
71 define_method name do
72 TextConstruct.new(@node.elements[name.to_s]).value
73 end
74
75 define_method name.to_s + "_detail" do
76 TextConstruct.new(@node.elements[name.to_s])
77 end
78 end
79 end
80 end
81
82 class CommonElements < UserDict
a4c9e7e1 » joshu 2008-04-09 haml templates for mars, v0.4 83 text_element :id, :updated, :published
a73655d5 » Scott Bronson 2008-03-28 Add full Mars bzr tree. 84 alias :guid :id
85
86 text_construct :rights
87 alias :copyright :rights
88
89 text_construct :title
90
91 def link
92 links.select {|link| link.rel=='alternate'}.first.href rescue nil
93 end
94
95 def links
96 @node.elements.to_a('link').map {|node| Link.new(node)}
97 end
98
99 def license
100 links.select {|link| link.rel=='license'}.first.href rescue nil
101 end
102
103 def tags
104 @node.elements.to_a('category').map {|node| Category.new(node)}
105 end
106
107 def categories
108 tags.map {|tag| [tag.scheme, tag.term]}
109 end
110
111 def category
112 tags.first.term rescue nil
113 end
114
115 def contributors
116 @node.elements.to_a('contributor').map {|node| Author.new(node)}
117 end
118
119 def author
120 author_detail.to_s
121 end
122
123 def author_detail
124 Author.new(@node.elements['author'])
125 end
126
127 alias :publisher :author
128 alias :publisher_detail :author_detail
129 end
130
131 class Feed < CommonElements
132 text_element :icon, :logo
133 text_construct :subtitle
134
135 alias :description :subtitle
136 alias :tagline :subtitle
137
138 def generator
139 generator_detail.name
140 end
141
142 def generator_detail
143 Generator.new(@node.elements['generator'])
144 end
a4c9e7e1 » joshu 2008-04-09 haml templates for mars, v0.4 145
146 def message
147 element = @node.elements['planet:message']
148 element ? element.texts.map {|t| t.value}.join : nil
149 end
150
151 def name
152 element = @node.elements['planet:name']
153 element ? element.texts.map {|t| t.value}.join : nil
154 end
155
156 def sources
157 @node.elements.to_a('planet:source').map {|node| Feed.new(node)}
158 end
159
160 def url
161 links.select {|link| link.rel=='self'}.first.href rescue nil
162 end
a73655d5 » Scott Bronson 2008-03-28 Add full Mars bzr tree. 163 end
164
165 class Entry < CommonElements
166 text_construct :summary
167
168 alias :description :summary
169
170 def content
171 @node.elements.to_a('content').map {|node| TextConstruct.new(node)}
172 end
173
a4c9e7e1 » joshu 2008-04-09 haml templates for mars, v0.4 174 def enclosure_href
175 enclosures.first.href rescue nil
176 end
177
178 def enclosure_length
179 enclosures.first.length rescue nil
180 end
181
182 def enclosure_type
183 if enclosures.first.is_a?(Planet::Link)
184 return enclosures.first.type
185 else
186 return nil
187 end
188 end
189
a73655d5 » Scott Bronson 2008-03-28 Add full Mars bzr tree. 190 def enclosures
191 links.select {|link| link.rel == 'enclosure'}
192 end
193
194 def comments
195 links.select { |link|
196 link.rel == 'replies' and link.type == 'text/html'
197 }.first.href rescue nil
198 end
199
200 def source
201 Feed.new(@node.elements['source'])
202 end
203 end
204
205 class TextConstruct < UserDict
206 require 'html5'
207 require 'html5/treewalkers'
208 require 'html5/serializer'
209
210 REXML_TREEWALKER = HTML5::TreeWalkers['rexml']
211
212 element_attr :src
213
214 def value
215 case @node.attributes['type']
216 when 'xhtml'
217 serialize(@node.elements[1].to_a).strip
218 when 'text', nil, /^text\//i
219 (@node.text || '').strip
220 when 'html'
221 text = @node.text.strip rescue ''
222 serialize HTML5.parse_fragment(text, :encoding => 'UTF-8')
223 when /\+xml$/i, /\/xml$/i
224 @node.to_a.to_s.strip
225 else
226 # base 64
227 @node.text.gsub(/\s/,'').unpack('m').first
228 end
229 end
230
231 def type
232 case @node.attributes['type']
233 when 'xhtml'
234 'application/xhtml+xml'
235 when 'text', nil
236 'text/plain'
237 when 'html'
238 'text/html'
239 else
240 @node.attributes['type']
241 end
242 end
243
244 def base
245 url_norm(@node.xmlbase)
246 end
247
a4c9e7e1 » joshu 2008-04-09 haml templates for mars, v0.4 248 def language
249 @node.attributes['xml:lang']
250 end
251
a73655d5 » Scott Bronson 2008-03-28 Add full Mars bzr tree. 252 private
253
254 # DOM to string
255 def serialize nodes
256 nodes.map { |node|
257 # resolve relative URIs
258 if node.respond_to? :attributes
259 if !node.parent.parent
260 node.parent.attributes['xml:base'] ||= @node.xmlbase
261 end
262 resolve node if node.respond_to? :attributes
263 end
264
265 HTML5::XHTMLSerializer.serialize(REXML_TREEWALKER.new(node))
266 }.join
267 end
268
269 # resolve relative URIs
270 def resolve element
271 element.attributes.each do |name,value|
272 if %w(href).include? name
273 element.attributes[name] =
274 Planet.uri_norm(element.xmlbase, value)
275 end
276 end
277 element.each_element { |child| resolve child }
278 end
279 end
280
281 class Author < UserDict
282 text_element :name, :email, :uri
283
284 def uri
285 value = @node.elements['uri']
286 if value
287 value = Planet.uri_norm(value.xmlbase, value.text)
288 end
289 value
290 end
291
292 def to_s
a4c9e7e1 » joshu 2008-04-09 haml templates for mars, v0.4 293 if name
294 email ? "#{name} (#{email})" : "#{name}"
295 else
296 "#{email}"
297 end
a73655d5 » Scott Bronson 2008-03-28 Add full Mars bzr tree. 298 end
299
300 alias :url :uri
301 alias :href :uri
302 end
303
304 class Link < UserDict
305 element_attr :title, :length, :hreflang
306 reluri_attr :href
307
308 alias :url :href
309
310 def rel
311 @node.attributes['rel'] or 'alternate'
312 end
313
314 def type
315 @node.attributes['type'] or (rel=='self' ? 'application/atom+xml' : nil)
316 end
317 end
318
319 class Category < UserDict
320 element_attr :term, :scheme, :label
321 end
322
323 class Generator < UserDict
324 element_attr :version
325 reluri_attr :uri
326
327 alias :href :uri
328
329 def name
330 @node.text
331 end
332 end
333 end