public
Rubygem
Fork of jmhodges/rfeedparser
Description: rFeedParser is a translation of the Universal Feed Parser from Python into Ruby. It has nearly the exact same behavior.
Homepage: http://rfeedparser.rubyforge.org
Clone URL: git://github.com/technomancy/rfeedparser.git
rfeedparser / tests / rfeedparser_test_helper.rb
100644 257 lines (227 sloc) 9.593 kb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
# -*- coding: utf-8 -*-
 
require 'test/unit'
require File.join(File.dirname(__FILE__),'../lib/rfeedparser')
 
begin
  require 'rubygems'
  gem 'mongrel'
  require 'mongrel'
rescue LoadError
  STDERR.puts "Whoops, had an error with loading mongrel as a gem. Trying just 'require'. Mongrel is required for testing."
  require 'mongrel'
end
 
Mongrel::HTTP_STATUS_CODES[220] = "Unspecified success"
 
def uconvert(one, two, three); FeedParser::uconvert(one, two, three); end
def _ebcdic_to_ascii(one); FeedParser::_ebcdic_to_ascii(one); end
 
$PORT = 8097 # Not configurable, hard coded in the xml files
 
def translate_data(data)
  if data[0..3] == "\x4c\x6f\xa7\x94"
    # EBCDIC
    data = _ebcdic_to_ascii(data)
  elsif data[0..3] == "\x00\x3c\x00\x3f"
    # UTF-16BE
    data = uconvert(data, 'utf-16be', 'utf-8')
  elsif data.size >= 4 and data[0..1] == "\xfe\xff" and data[2..3] != "\x00\x00"
    # UTF-16BE with BOM
    data = uconvert(data[2..-1], 'utf-16be', 'utf-8')
  elsif data[0..3] == "\x3c\x00\x3f\x00"
    # UTF-16LE
    data = uconvert(data, 'utf-16le', 'utf-8')
  elsif data.size >=4 and data[0..1] == "\xff\xfe" and data[2..3] != "\x00\x00"
    # UTF-16LE with BOM
    data = uconvert(data[2..-1], 'utf-16le', 'utf-8')
  elsif data[0..3] == "\x00\x00\x00\x3c"
    # UTF-32BE
    data = uconvert(data, 'utf-32be', 'utf-8')
  elsif data[0..3] == "\x3c\x00\x00\x00"
    # UTF-32LE
    data = uconvert(data, 'utf-32le', 'utf-8')
  elsif data[0..3] == "\x00\x00\xfe\xff"
    # UTF-32BE with BOM
    data = uconvert(data[4..-1], 'utf-32BE', 'utf-8')
  elsif data[0..3] == "\xff\xfe\x00\x00"
    # UTF-32LE with BOM
    data = uconvert(data[4..-1], 'utf-32LE', 'utf-8')
  elsif data[0..2] == "\xef\xbb\xbf"
    # UTF-8 with BOM
    data = data[3..-1]
  else
    # ASCII-compatible
  end
  return data
end
 
def scrape_headers(xmlfile)
  # Called by the server
  xm = open(xmlfile)
  data = xm.read
  htaccess = File.dirname(xmlfile)+"/.htaccess"
  xml_headers = {}
  server_headers = {}
  the_type = nil
  if File.exists? htaccess
    fn = File.split(xm.path)[-1]
    ht_file = open(htaccess)
    type_match = ht_file.read.match(/^\s*<Files\s+#{fn}>\s*\n\s*AddType\s+(.*?)\s+.xml/m)
    the_type = type_match[1].strip.gsub(/^("|')/,'').gsub(/("|')$/,'').strip if type_match and type_match[1]
    if type_match and the_type
      #content_type, charset = type_match[1].split(';')
      server_headers["Content-Type"] = the_type
    end
  end
  data = translate_data(data)
  header_regexp = /^Header:\s*([^:]+)\s*:\s*(.+)\s*$/
  da = data.scan header_regexp
  unless da.nil? or da.empty?
    da.flatten!
    da.each{|e| e.strip!;e.gsub!(/(Content-type|content-type|content-Type)/, "Content-Type")}
    xml_headers = Hash[*da] # Asterisk magic!
  end
  Mongrel::Const::const_set('ETAG_FORMAT', xml_headers['ETag']) unless (xml_headers['ETag'].nil? or xml_headers['ETag'].empty?)
  return xml_headers.merge(server_headers)
end
 
def scrape_status(xmlfile)
  # Called by the server
  xm = open(xmlfile)
  data = xm.read
  data = translate_data(data)
  da = data.scan /^Status:\s*(.+)\s?$/
  unless da.nil? or da.empty?
    da.flatten!
    da.each{ |e| return e.to_i }
  end
  return 200
end
 
def scrape_assertion_strings(xmlfile)
  # Called by the testing client
  data = open(xmlfile).read
  data = translate_data(data)
  test = data.scan /Description:\s*(.*?)\s*Expect:\s*(.*)\s*-->/
  description, evalString = test.first.map{ |s| s.strip }
 
  # Here we translate the expected values in Python to Ruby
  
  # Find Python unicode strings starting with u"
  evalString.gsub!(/\bu'(.*?)'/) do |m|
    esc = $1.to_s.dup
    # Replace \u hex values with actual Unicode char
    esc.gsub!(/\\u([0-9a-fA-F]{4})/){ |m| [$1.hex].pack('U*') }
    " '"+esc+"'"
  end
  
  # Find Python unicode strings starting with u"
  evalString.gsub!(/\bu"(.*?)"/) do |m|
    esc = $1.to_s.dup
    # Replace \u hex values with actual Unicode char
    esc.gsub!(/\\u([0-9a-fA-F]{4})/){ |m| [$1.hex].pack('U*') }
    " \""+esc+"\""
  end
  # The above does the following: u'string' => 'string'
  # u'ba\u20acha' => 'ba€ha' # Same for double quoted strings
 
  evalString.gsub!(/\\x([0-9a-fA-F]{2})/){ |m| [$1.hex].pack('U*') } # "ba\xa3la" => "ba£la"
  evalString.gsub! /'\s*:\s+/, "' => " # {'foo': 'bar'} => {'foo' => 'bar'}
  evalString.gsub! /"\s*:\s+/, "\" => " # {"foo": 'bar'} => {"foo" => 'bar'}
  evalString.gsub! /\=\s*\((.*?)\)/, '= [\1]' # = (2004, 12, 4) => = [2004, 12, 4]
  evalString.gsub!(/"""(.*?)"""/) do # """<a b="foo">""" => "<a b=\"foo\">"
    "\""+$1.gsub!(/"/,"\\\"")+"\"" # haha, ugly!
  end
  evalString.gsub! /(\w|\])\s*\=\= 0\s*$/, '\1 == false' # ] == 0 => ] == false
  evalString.gsub! /(\w|\])\s*\=\= 1\s*$/, '\1 == true' # ] == 1 => ] == true
  evalString.gsub! /len\((.*?)\)\s*\=\=\s*(\d{1,3})/, '\1.length == \2' # len(ary) == 1 => ary.length == 1
  evalString.gsub! /None/, "nil" # None => nil # well, duh
  return description, evalString
end
 
def is_invalid(response_status)
  !is_valid(response_status)
end
 
def is_valid(response_status)
  response_status > 199 && response_status < 300
end
 
class FeedParserTestRequestHandler < Mongrel::DirHandler
  def process(request, response)
    req_method = request.params[Mongrel::Const::REQUEST_METHOD] || Mongrel::Const::GET
    req_path = can_serve request.params[Mongrel::Const::PATH_INFO]
    if not req_path
      # not found, return a 404
      response.start(404) do |head, out|
        head['Content-Type'] = 'text/plain'
        out << "File not found"
      end
    else
      begin
        if File.directory? req_path
          send_dir_listing(request.params[Mongrel::Const::REQUEST_URI], req_path, response)
        elsif req_method == Mongrel::Const::HEAD
          response_status = scrape_status(req_path)
          response.start(response_status) do |head,out|
            xml_head = scrape_headers(req_path)
            xml_head.each_key{|k| head[k] = xml_head[k] }
            
            if is_invalid(response_status)
              head['content-type'] = 'text/plain;'
              out << response_status
            end
          end
 
          send_file(req_path, request, response, true) unless is_invalid(response_status)
        elsif req_method == Mongrel::Const::GET
          response_status = scrape_status(req_path)
          response.start(response_status) do |head,out|
            xml_head = scrape_headers(req_path)
            xml_head.each_key{|k| head[k] = xml_head[k] }
            if is_invalid(response_status)
              head['content-type'] = 'text/plain;'
              out << response_status
            end
          end
 
          send_file(req_path, request, response, false) unless is_invalid(response_status)
        else
          response.start(403) { |head,out|
            head['Content-Type'] = 'text/plain'
            out.write(ONLY_HEAD_GET)
          }
        end
      rescue => details
        STDERR.puts "Error sending file #{req_path}: #{details}"
      end
    end
  end
  
  # Overriding the send_file in DirHandler for a goddamn one line bug fix.
  # Holy shit does this suck. Changing `response.status = 200` to
  # `response.status ||= 200`. Also, adding Mongrel:: in front of the Const
  # because subclassing makes them break.
  def send_file(req_path, request, response, header_only=false)
 
    stat = File.stat(req_path)
 
    # Set the last modified times as well and etag for all files
    mtime = stat.mtime
    # Calculated the same as apache, not sure how well the works on win32
    etag = Mongrel::Const::ETAG_FORMAT % [mtime.to_i, stat.size, stat.ino]
 
    modified_since = request.params[Mongrel::Const::HTTP_IF_MODIFIED_SINCE]
    none_match = request.params[Mongrel::Const::HTTP_IF_NONE_MATCH]
 
    # test to see if this is a conditional request, and test if
    # the response would be identical to the last response
    same_response = case
                    when modified_since && !last_response_time = Time.httpdate(modified_since) rescue nil : false
                    when modified_since && last_response_time > Time.now : false
                    when modified_since && mtime > last_response_time : false
                    when none_match && none_match == '*' : false
                    when none_match && !none_match.strip.split(/\s*,\s*/).include?(etag) : false
                    else modified_since || none_match # validation successful if we get this far and at least one of the header exists
                    end
 
    header = response.header
    header[Mongrel::Const::ETAG] = etag
 
    if same_response
      response.start(304) {}
    else
      # first we setup the headers and status then we do a very fast send on the socket directly
      response.status ||= 200
      header[Mongrel::Const::LAST_MODIFIED] = mtime.httpdate
 
      # set the mime type from our map based on the ending
      dot_at = req_path.rindex('.')
      if dot_at
        header[Mongrel::Const::CONTENT_TYPE] = MIME_TYPES[req_path[dot_at .. -1]] || @default_content_type
      else
        header[Mongrel::Const::CONTENT_TYPE] = @default_content_type
      end
 
      # send a status with out content length
      response.send_status(stat.size)
      response.send_header
 
      if not header_only
        response.send_file(req_path, stat.size < Mongrel::Const::CHUNK_SIZE * 2)
      end
    end
  end
end