Skip to content

Commit

Permalink
Simplified.
Browse files Browse the repository at this point in the history
  • Loading branch information
MichaelBone committed Aug 16, 2018
1 parent 7f5d455 commit 46b6192
Showing 1 changed file with 3 additions and 6 deletions.
9 changes: 3 additions & 6 deletions scraper.rb
Expand Up @@ -7,17 +7,15 @@
url = "#{base_url}enquirylists.aspx"

agent = Mechanize.new do |a|
a.keep_alive = true # to avoid a "Net::HTTP::Persistent::Error:too many connection resets" condition
# https://github.com/tenderlove/mechanize/issues/123#issuecomment-6432074

a.keep_alive = true
# a.log = Logger.new $stderr
# a.agent.http.debug_output = $stderr
# a.verify_mode = OpenSSL::SSL::VERIFY_NONE
end

p "Getting first page"
first_page = agent.get url
url_query = url + '?' + first_page.body.scan(/js=-?\d+/)[0]
url_query = url + '?' + first_page.body.scan(/js=-?\d+/)[0] # enable JavaScript
first_page = agent.get url_query

p "Selecting List of Development Applications and clicking Next"
Expand All @@ -37,7 +35,6 @@
button = search_form.button_with(:value => "Search")
# submit the form using that button
summary_page = agent.submit(search_form, button)
# p summary_page.title.strip

count = 0
das_data = []
Expand All @@ -58,7 +55,7 @@
break
end
next_page_path = next_page_img['onclick'].split(',').find { |e| e =~ /.*PageNumber=\d+.*/ }.gsub('"', '').strip
p "Found another page: " + next_page_path
p "Next page: " + next_page_path
summary_page = agent.get "#{base_url}#{next_page_path}"
end
end
Expand Down

0 comments on commit 46b6192

Please sign in to comment.