public
Description: Yet Another Planet Refactoring
Homepage: http://intertwingly.net/blog/2007/12/19/Yet-Another-Planet-Refactoring
Clone URL: git://github.com/rubys/mars.git
Search Repo:
Sam Ruby (author)
Thu Apr 03 17:55:30 -0700 2008
commit  594cd30192668c6310b3236b978d5d4a6d706fb7
tree    fb2c98375a3f69e66dc22da049a8c7e26438d717
parent  775bc2a397c7812ae67b9979f288c3c835aab059
mars / planet / xmlparser.rb
100644 168 lines (136 sloc) 4.283 kb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
require 'rexml/document'
require 'html5/liberalxmlparser'
 
module Planet
  module XmlParser
    begin
      require 'xml/parser' # http://www.yoshidam.net/xmlparser_en.txt
      @@parser = :expat
    rescue LoadError
      begin
        require 'xml/libxml' # http://libxml.rubyforge.org/
        @@parser = :libxml2
      rescue LoadError
        @@parser = :rexml
      end
    end
 
    def XmlParser.parse source
      source = source.read if source.respond_to? :read
 
      begin
        case @@parser
        when :expat
          # fast, compliant, but not always installed
          doc = XmlParser.expat source
        when :libxml2
          # also fast, compliant, but not always installed
          doc = XmlParser.libxml2 source
        else
          # fairly fast, fairly compliant, always available
          doc = REXML::Document.new source
        end
        bozo = false
      rescue Exception => e
        # If everything is being bozo'd, enable this to see why.
        # print "PARSE ERROR: #{$!}\n #{$!.backtrace.join("\n ")}\n"
 
        # last ditch attempt: use a liberal XML parser
        parser = HTML5::XMLParser.new
        doc = REXML::Document.new
        parser.parse_fragment(source).each {|node| doc << node rescue nil}
        bozo = true
      end
 
      # augment the document with feed parser attributes
      source = nil
      class << doc
        attr_accessor :bozo
      end
      doc.bozo = bozo
 
      doc
    end
 
    def XmlParser.expat source
      parser = XML::Parser.new
      class <<parser
        # enable additional events
        attr_accessor :startDoctypeDecl
        attr_accessor :comment
      end
 
      doc = REXML::Document.new
      node = doc
 
      parser.parse(source) do |type, name, data|
        case type
        when XML::Parser::START_ELEM
          # name = element name ; data = hash of attributes
          node = node.add_element(name)
          data.each {|name,value| node.add_attribute(name,value)}
 
        when XML::Parser::END_ELEM
          # name = element name ; data = nil
          node = node.parent
 
        when XML::Parser::CDATA
          # name = nil ; data = string
          node.add_text(data)
 
        when XML::Parser::COMMENT
          # name = nil ; data = string
          REXML::Comment.new(data,node)
 
        when XML::Parser::START_DOCTYPE_DECL
          # name = notation name ; data = [URL base, system ID, public ID]
          REXML::DocType.new([name, data[2] ? 'SYSTEM' : 'PUBLIC',
            data[1].inspect, data[0].inspect], node)
        end
      end
 
      parser.done
 
      doc
    end
 
    if @@parser==:libxml2
      if !XML::SaxParser.const_defined?(:Callbacks)
        # shim to upgrade libxml 0.3.8.4 to the 0.5.2.0 interface
        class XML::SaxParser
          module Callbacks
          end
 
          def callbacks= callback
            callback.methods.grep(/^on_/).each do |method|
              send(method) { |*args| callback.send method, *args }
            end
          end
        end
      end
 
      class Callbacks
 
        include XML::SaxParser::Callbacks
 
        def initialize(node)
          @node = node
        end
 
        def on_start_element(name, attrs)
          @node = @node.add_element(name)
          attrs.each {|key,value| @node.add_attribute(key,value)}
        end
 
        def on_end_element(name)
          @node = @node.parent
        end
 
        def on_characters(chars)
          @node.add_text(chars)
        end
 
        def on_cdata_block(cdata)
          @node.add_text(cdata)
        end
 
        def on_comment(data)
          REXML::Comment.new(data,@node)
        end
 
        def on_parser_error(message)
          raise Exception.new(message)
        end
 
        def on_parser_fatal_error(message)
          raise Exception.new(message)
        end
 
        def on_external_subset(name, externalId, systemId)
          REXML::DocType.new([name, 'PUBLIC', externalId.inspect,
            systemId.inspect], @node)
        end
      end
    end
 
    def XmlParser.libxml2 source
      parser = XML::SaxParser.new
 
      doc = REXML::Document.new
 
      parser.string = source
      parser.callbacks = XmlParser::Callbacks.new(doc)
      parser.parse
 
      doc
    end
  end
end