public
Rubygem
Fork of jmhodges/rfeedparser
Description: rFeedParser is a translation of the Universal Feed Parser from Python into Ruby. It has nearly the exact same behavior.
Homepage: http://rfeedparser.rubyforge.org
Clone URL: git://github.com/technomancy/rfeedparser.git
Applying Charlie Savage's patch (slightly modified) to allow use of libxml 
instead of expat.

Split lib/rfeedparser/parsers.rb into three files: expat_parser.rb,
libxml_parser.rb, and loose_feed_parser.rb.

Modified rfeedparser.rb to know about this split and be able to handle
the case where only one of the parsing libraries is installed.

http://rubyforge.org/tracker/index.php?func=detail&aid=12459&group_id=3309
&atid=12738
technomancy (author)
Tue May 06 13:46:34 -0700 2008
commit  e8e178fe6d2b687e57e5fe398e6e315f9e3f0e56
tree    69f32467192ed485953789f3ea63b594ccf1260f
parent  0b4539988ebc073bd0a57f32f897d9248f072719
...
1
2
 
 
 
 
 
 
 
 
 
3
4
5
6
7
8
9
10
11
12
13
14
 
15
16
17
18
19
20
21
22
 
 
 
 
 
 
 
 
 
 
 
23
24
25
 
26
 
 
27
28
29
 
30
31
 
 
32
 
33
34
35
36
37
38
39
40
41
42
43
44
45
46
 
47
48
49
50
51
52
53
54
55
 
56
 
 
 
 
 
 
 
 
 
 
 
 
 
57
58
59
...
213
214
215
216
 
 
217
218
219
...
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
 
 
 
408
409
410
...
1
 
2
3
4
5
6
7
8
9
10
11
 
 
 
 
 
 
 
 
12
13
14
15
16
17
18
 
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
 
36
37
38
39
40
 
 
41
42
 
43
44
45
46
47
48
49
 
 
 
 
 
 
 
50
51
52
 
53
54
55
56
57
58
59
 
 
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
...
232
233
234
 
235
236
237
238
239
...
413
414
415
 
 
 
 
 
 
 
 
 
 
416
 
417
418
419
420
421
422
0
@@ -1,59 +1,78 @@
0
 #!/usr/bin/env ruby
0
-"""Universal feed parser in Ruby
0
+# Universal feed parser in Ruby
0
+#
0
+# Handles RSS 0.9x, RSS 1.0, RSS 2.0, CDF, Atom 0.3, and Atom 1.0 feeds
0
+#
0
+# Visit http://feedparser.org/ for the latest version in Python
0
+# Visit http://feedparser.org/docs/ for the latest documentation
0
+# Email Jeff Hodges at jeff@obquo.com for questions
0
+#
0
+# Required: Ruby 1.8
0
 
0
-Handles RSS 0.9x, RSS 1.0, RSS 2.0, CDF, Atom 0.3, and Atom 1.0 feeds
0
-
0
-Visit http://feedparser.org/ for the latest version in Python
0
-Visit http://feedparser.org/docs/ for the latest documentation
0
-Email Jeff Hodges at jeff@obquo.com for questions
0
-
0
-Required: Ruby 1.8
0
-"""
0
 $KCODE = 'UTF8'
0
 require 'stringio'
0
 require 'uri'
0
+require 'open-uri'
0
 require 'cgi' # escaping html
0
 require 'time'
0
 require 'pp'
0
-require 'rubygems'
0
 require 'base64'
0
 require 'iconv'
0
 require 'zlib'
0
 
0
+require 'rubygems'
0
+
0
+# If available, Nikolai's UTF-8 library will ease use of utf-8 documents.
0
+# See http://git.bitwi.se/ruby-character-encodings.git/.
0
+begin
0
+ gem 'character-encodings', ">=0.2.0"
0
+ require 'encoding/character/utf-8'
0
+rescue LoadError
0
+end
0
+
0
+# TODO: require these in the files that need them, not in the toplevel
0
 gem 'hpricot', "=0.6"
0
 require 'hpricot'
0
-gem 'character-encodings', ">=0.2.0"
0
+
0
 gem 'htmltools', ">=1.10"
0
+require 'html/sgml-parser'
0
+
0
 gem 'htmlentities', ">=4.0.0"
0
-gem 'activesupport', ">=1.4.1"
0
-gem 'rchardet', ">=1.0"
0
+require 'htmlentities'
0
 
0
-require 'xml/saxdriver' # calling expat through the xmlparser gem
0
+gem 'activesupport', ">=1.4.1"
0
+require 'active_support'
0
 
0
+gem 'rchardet', ">=1.0"
0
 require 'rchardet'
0
 $chardet = true
0
 
0
-require 'encoding/character/utf-8'
0
-require 'html/sgml-parser'
0
-require 'htmlentities'
0
-require 'active_support'
0
-require 'open-uri'
0
-include OpenURI
0
-
0
 $debug = false
0
 $compatible = true
0
 
0
-$LOAD_PATH << File.expand_path(File.dirname(__FILE__))
0
+$LOAD_PATH.unshift File.expand_path(File.dirname(__FILE__))
0
 require 'rfeedparser/utilities'
0
 require 'rfeedparser/forgiving_uri'
0
 require 'rfeedparser/better_sgmlparser'
0
 require 'rfeedparser/better_attributelist'
0
 require 'rfeedparser/feedparserdict'
0
 require 'rfeedparser/parser_mixin'
0
-require 'rfeedparser/parsers'
0
-require 'rfeedparser/monkey_patches'
0
 
0
+require 'rfeedparser/loose_feed_parser'
0
 
0
+begin
0
+ require 'rfeedparser/libxml_parser'
0
+ StrictFeedParser = FeedParser::LibXml::StrictFeedParser
0
+rescue LoadError
0
+end
0
+
0
+begin
0
+ require 'rfeedparser/expat_parser'
0
+ StrictFeedParser = FeedParser::Expat::StrictFeedParser
0
+rescue LoadError
0
+end
0
+
0
+require 'rfeedparser/monkey_patches'
0
 
0
 module FeedParser
0
   extend FeedParserUtilities
0
@@ -213,7 +232,8 @@ module FeedParser
0
     
0
     # Use the default compatibility if compatible is nil
0
     $compatible = options[:compatible].nil? ? $compatible : options[:compatible]
0
-
0
+
0
+ # TODO: don't even try strict if it's not defined
0
     strictklass = options[:strict] || StrictFeedParser
0
     looseklass = options[:loose] || LooseFeedParser
0
     options[:handlers] = options[:handlers] || []
0
@@ -393,18 +413,10 @@ module FeedParser
0
     end
0
 
0
     if use_strict_parser
0
- # initialize the SAX parser
0
- saxparser = XML::SAX::Helpers::ParserFactory.makeParser("XML::Parser::SAXDriver")
0
- feedparser = strictklass.new(baseuri, baselang, 'utf-8')
0
- saxparser.setDocumentHandler(feedparser)
0
- saxparser.setDTDHandler(feedparser)
0
- saxparser.setEntityResolver(feedparser)
0
- saxparser.setErrorHandler(feedparser)
0
-
0
- inputdata = XML::SAX::InputSource.new('parsedfeed')
0
- inputdata.setByteStream(StringIO.new(data))
0
       begin
0
- saxparser.parse(inputdata)
0
+ parser = StrictFeedParser.new(baseuri, baselang)
0
+ feedparser = parser.handler
0
+ parser.parse(data)
0
         
0
       rescue StandardError, XML::SAX::SAXParseException => parseerr # resparse
0
 

Comments

    No one has commented yet.