public
Description: Mechanize is a ruby library that makes automated web interaction easy.
Homepage: http://mechanize.rubyforge.org/
Clone URL: git://github.com/aaronp/mechanize.git
Search Repo:
Click here to lend your support to: mechanize and make a donation at www.pledgie.com !
adding link scheme handlers [#18955]
aaronp (author)
Sun May 11 19:09:52 -0700 2008
commit  67ce61445a1beee6bcd556796e4e734f24ff294a
tree    ca8f777f3f49b652bb0373b038d40078593c7a7d
parent  a37ae038f5bef1064045bd1cc06280f2932f43d3
...
6
7
8
 
 
9
10
11
...
6
7
8
9
10
11
12
13
0
@@ -6,6 +6,8 @@
0
   * Added support for reading Mozilla cookie jars. Thanks Chris Riddoch!
0
   * Moving text, password, hidden, int to default. Thanks Tim Harper!
0
   * Mechanize#history_added callback for page loads. Thanks Tobi Reif!
0
+ * Mechanize#scheme_handlers callbacks for handling unsupported schemes on
0
+ links.
0
 
0
 * Bug Fixes:
0
   * Ignoring scheme case
...
8
9
10
 
11
12
13
...
71
72
73
 
74
75
76
...
125
126
127
 
 
 
 
 
 
 
 
 
128
129
130
...
420
421
422
 
423
424
425
...
8
9
10
11
12
13
14
...
72
73
74
75
76
77
78
...
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
...
431
432
433
434
435
436
437
0
@@ -8,6 +8,7 @@
0
 
0
 require 'www/mechanize/content_type_error'
0
 require 'www/mechanize/response_code_error'
0
+require 'www/mechanize/unsupported_scheme_error'
0
 require 'www/mechanize/cookie'
0
 require 'www/mechanize/cookie_jar'
0
 require 'www/mechanize/history'
0
@@ -71,6 +72,7 @@
0
     attr_accessor :follow_meta_refresh
0
     attr_accessor :verify_callback
0
     attr_accessor :history_added
0
+ attr_accessor :scheme_handlers
0
   
0
     attr_reader :history
0
     attr_reader :pluggable_parser
0
@@ -125,6 +127,15 @@
0
       @connection_cache = {}
0
       @keep_alive_time = 300
0
       @keep_alive = true
0
+
0
+ @scheme_handlers = Hash.new { |h,k|
0
+ h[k] = lambda { |link, page|
0
+ raise UnsupportedSchemeError.new(k)
0
+ }
0
+ }
0
+ @scheme_handlers['http'] = lambda { |link, page| link }
0
+ @scheme_handlers['https'] = @scheme_handlers['http']
0
+ @scheme_handlers['relative'] = @scheme_handlers['http']
0
   
0
       yield self if block_given?
0
     end
0
@@ -420,6 +431,7 @@
0
               )
0
       end
0
   
0
+ url = @scheme_handlers[url.relative? ? 'relative' : url.scheme.downcase].call(url, cur_page)
0
       url.path = '/' if url.path.length == 0
0
   
0
       # construct an absolute uri
...
 
 
 
 
 
 
 
 
 
 
...
1
2
3
4
5
6
7
8
9
10
0
@@ -1 +1,11 @@
0
+module WWW
0
+ class Mechanize
0
+ class UnsupportedSchemeError < RuntimeError
0
+ attr_accessor :scheme
0
+ def initialize(scheme)
0
+ @scheme = scheme
0
+ end
0
+ end
0
+ end
0
+end
...
12
13
14
 
 
15
16
...
12
13
14
15
16
17
18
0
@@ -12,6 +12,8 @@
0
     <a href="link with space.html">not encoded space</a>
0
     <!-- End escaped bug -->
0
     <a href="unusual&&%3F%3F%23%23.html">unusual characters</a>
0
+
0
+ <a href="javascript:new_page('1')">javascript link</a>
0
   </body>
0
 </html>
...
42
43
44
45
 
46
47
48
...
42
43
44
 
45
46
47
48
0
@@ -42,7 +42,7 @@
0
   end
0
 
0
   def test_unsupported_scheme
0
- assert_raise(RuntimeError) {
0
+ assert_raise(WWW::Mechanize::UnsupportedSchemeError) {
0
       @agent.get('ftp://server.com/foo.html')
0
     }
0
   end
...
5
6
7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
9
10
...
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
0
@@ -5,6 +5,21 @@
0
     @agent = WWW::Mechanize.new
0
   end
0
 
0
+ def test_unsupported_link_types
0
+ page = @agent.get("http://google.com/tc_links.html")
0
+ link = page.links.text('javascript link').first
0
+ assert_raise(WWW::Mechanize::UnsupportedSchemeError) {
0
+ link.click
0
+ }
0
+
0
+ @agent.scheme_handlers['javascript'] = lambda { |link, page|
0
+ URI.parse('http://localhost/tc_links.html')
0
+ }
0
+ assert_nothing_raised {
0
+ link.click
0
+ }
0
+ end
0
+
0
   def test_base
0
     page = @agent.get("http://google.com/tc_base_link.html")
0
     page = page.links.first.click

Comments

    No one has commented yet.