public
Rubygem
Description: Tool for spidering websites/blogs to extract geodata.
Homepage: http://geospider.rubyforge.org
Clone URL: git://github.com/tomtaylor/geo-spider.git
Postcode geocoder now uses Multimap for much better accuracy.
tomtaylor (author)
Fri Oct 03 08:19:35 -0700 2008
commit  84122b64559585a2e98a94c325212e15d046b774
tree    8cd46dc1428b7b4b8692bdaafbc1a2365435ce57
parent  49166f038c27bee87de8114289e498e3e4a26e12
...
11
12
13
14
 
15
16
17
18
19
20
21
 
22
23
24
...
27
28
29
30
 
31
32
33
...
36
37
38
39
40
 
 
41
42
43
...
11
12
13
 
14
15
16
17
18
19
20
 
21
22
23
24
...
27
28
29
 
30
31
32
33
...
36
37
38
 
 
39
40
41
42
43
0
@@ -11,14 +11,14 @@ module GeoSpider
0
       REGEXP = /(GIR 0AA|[A-PR-UWYZ]([0-9]{1,2}|([A-HK-Y][0-9]|[A-HK-Y][0-9]([0-9]|[ABEHMNPRV-Y]))|[0-9][A-HJKS-UW])(\s*)[0-9][ABD-HJLNP-UW-Z]{2})/i
0
       
0
       def locations
0
- results = @element.inner_html.scan(REGEXP)
0
+ results = @element.inner_text.scan(REGEXP)
0
         results = results.map(&:first)
0
         
0
         found_locations = []
0
         
0
         results.each do |result|
0
           begin
0
- p = geocoder.locate(result)
0
+ p = geocoder.locate(:zip => result, :country => "GB")
0
             found_locations << Location.new(:latitude => p.latitude, :longitude => p.longitude, :title => result)
0
           rescue Graticule::Error => e
0
             next
0
@@ -27,7 +27,7 @@ module GeoSpider
0
         return found_locations
0
       end
0
       
0
- # You need to set a valid Yahoo API key before the UK postcode geocoding will work. Yahoo have vastly better UK postcode accuracy than the other large mapping providers, apart from perhaps Multimap.
0
+ # You need to set a valid Multimap API key before the UK postcode geocoding will work. Multimap have the most accurate UK postcode geocoding I've discovered.
0
       
0
       def self.api_key=(api_key)
0
         @@api_key = api_key
0
@@ -36,8 +36,8 @@ module GeoSpider
0
       private
0
       
0
       def geocoder
0
- raise "No Yahoo API key set" unless @@api_key
0
- Graticule.service(:yahoo).new @@api_key
0
+ raise "No Multimap API key set" unless @@api_key
0
+ Graticule.service(:multimap).new @@api_key
0
       end
0
       
0
     end
...
25
26
27
28
 
29
30
 
31
32
33
...
49
50
51
52
 
53
54
 
55
56
57
...
122
123
124
 
 
 
 
 
 
 
 
 
 
 
 
 
125
126
...
25
26
27
 
28
29
 
30
31
32
33
...
49
50
51
 
52
53
 
54
55
56
57
...
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
0
@@ -25,9 +25,9 @@ describe Page, "with a single postcode which is being parsed" do
0
     OpenURI.should_receive(:open_uri).and_return(page_as_string('single_postcode.html'))
0
     @page = Page.new("http://www.example.com")
0
     GeoSpider::Extractors::Postcode.api_key = "waffles"
0
- mock_geocoder_result = OpenStruct.new( {:location => [51.000000, -1.000000]} )
0
+ mock_geocoder = OpenStruct.new( { :locate => OpenStruct.new({ :longitude => -1.000000, :latitude => 51.000000 })})
0
     Graticule.stub!(:service)
0
- Graticule.service.should_receive(:new).and_return(mock_geocoder_result)
0
+ Graticule.service.should_receive(:new).and_return(mock_geocoder)
0
   end
0
   
0
   it "should find one location" do
0
@@ -49,9 +49,9 @@ describe Page, "with multiple microformats and postcodes being parsed" do
0
     OpenURI.should_receive(:open_uri).and_return(page_as_string('multiple_postcodes_and_microformats.html'))
0
     @page = Page.new("http://www.example.com")
0
     
0
- mock_geocoder_result = OpenStruct.new( {:location => [51.000000, -1.000000]} )
0
+ mock_geocoder = OpenStruct.new( { :locate => OpenStruct.new({ :longitude => -1.000000, :latitude => 51.000000 })})
0
     Graticule.stub!(:service)
0
- Graticule.service.should_receive(:new).twice.and_return(mock_geocoder_result)
0
+ Graticule.service.should_receive(:new).twice.and_return(mock_geocoder)
0
   end
0
   
0
   it "should find four locations" do
0
@@ -122,4 +122,17 @@ describe Page, "which is finding the title" do
0
       @page.title.should == "Heading 1"
0
     end
0
   end
0
+end
0
+
0
+describe Page, "which is parsing a page with a string in a URL that happens to match a postcode" do
0
+
0
+ before(:each) do
0
+ OpenURI.should_receive(:open_uri).and_return(page_as_string('postcode_in_url.html'))
0
+ @page = Page.new("http://www.example.com")
0
+ end
0
+
0
+ it "should not find any locations" do
0
+ @page.locations.should be_empty
0
+ end
0
+
0
 end
0
\ No newline at end of file

Comments

    No one has commented yet.