Permalink
Browse files

added DAs scan for 6xxx and 8xxx series

  • Loading branch information...
LoveMyData committed Jun 8, 2017
1 parent fa95e38 commit 00a893f0e014a2f561018c9b16fb72538ef16536
Showing with 52 additions and 36 deletions.
  1. +52 −36 scraper.rb
View
@@ -27,55 +27,71 @@ def is_valid_year(date_str, min=2004, max=DateTime.now.year)
page = agent.get base_url + "GeneralEnquiry/EnquiryLists.aspx?ModuleCode=LAP"
# local DB lookup if DB exist and find out what is the maxDA number
i = 1;
sql = "select * from data where `council_reference` like '%/#{ENV['MORPH_PERIOD']}'"
sequences = {1=>1, 6=>6000, 8=>8000};
sql = "select council_reference from data where `council_reference` like '%/#{ENV['MORPH_PERIOD']}'"
results = ScraperWiki.sqliteexecute(sql) rescue false
if ( results )
results.each do |result|
maxDA = result['council_reference'].gsub!("/#{ENV['MORPH_PERIOD']}", '')
if maxDA.to_i > i
i = maxDA.to_i
case maxDA.to_i
when (6000..7999)
if maxDA.to_i > sequences[6]
sequences[6] = maxDA.to_i
end
when (8000..9999)
if maxDA.to_i > sequences[8]
sequences[8] = maxDA.to_i
end
else
if maxDA.to_i > sequences[1]
sequences[1] = maxDA.to_i
end
end
end
end
error = 0
cont = true
while cont do
form = page.form
form.field_with(:name=>'ctl00$MainBodyContent$mGeneralEnquirySearchControl$mTabControl$ctl04$mFormattedNumberTextBox').value = i.to_s + '/' + ENV['MORPH_PERIOD'].to_s
button = form.button_with(:value => "Search")
list = form.click_button(button)
sequences.each do |index, sequence|
i = sequence
error = 0
continue = true
while continue do
form = page.form
form.field_with(:name=>'ctl00$MainBodyContent$mGeneralEnquirySearchControl$mTabControl$ctl04$mFormattedNumberTextBox').value = i.to_s + '/' + ENV['MORPH_PERIOD'].to_s
button = form.button_with(:value => "Search")
list = form.click_button(button)
table = list.search("table.ContentPanel")
unless ( table.empty? )
error = 0
tr = table.search("tr.ContentPanel")
table = list.search("table.ContentPanel")
unless ( table.empty? )
error = 0
tr = table.search("tr.ContentPanel")
record = {
'council_reference' => tr.search('a').inner_text,
'address' => tr.search('span')[3].inner_text,
'description' => tr.search('span')[2].inner_text.gsub("\n", '. ').squeeze(' '),
'info_url' => base_url + 'GeneralEnquiry/' + tr.search('a')[0]['href'],
'comment_url' => comment_url,
'date_scraped' => Date.today.to_s,
'date_received' => Date.parse(tr.search('span')[1].inner_text).to_s,
}
record = {
'council_reference' => tr.search('a').inner_text,
'address' => tr.search('span')[3].inner_text,
'description' => tr.search('span')[2].inner_text.gsub("\n", '. ').squeeze(' '),
'info_url' => base_url + 'GeneralEnquiry/' + tr.search('a')[0]['href'],
'comment_url' => comment_url,
'date_scraped' => Date.today.to_s,
'date_received' => Date.parse(tr.search('span')[1].inner_text).to_s,
}
if (ScraperWiki.select("* from data where `council_reference`='#{record['council_reference']}'").empty? rescue true)
puts "Saving record " + record['council_reference'] + ", " + record['address']
# puts record
ScraperWiki.save_sqlite(['council_reference'], record)
if (ScraperWiki.select("* from data where `council_reference`='#{record['council_reference']}'").empty? rescue true)
puts "Saving record " + record['council_reference'] + ", " + record['address']
# puts record
ScraperWiki.save_sqlite(['council_reference'], record)
else
puts 'Skipping already saved record ' + record['council_reference']
end
else
puts 'Skipping already saved record ' + record['council_reference']
error += 1
end
else
error += 1
end
# increase i value and scan the next DA
i += 1
if error == 10
cont = false
# increase i value and scan the next DA
i += 1
if error == 10
continue = false
end
end
end

0 comments on commit 00a893f

Please sign in to comment.