Skip to content

Commit

Permalink
Automatic update to make ScraperWiki scraper work on morph.io
Browse files Browse the repository at this point in the history
  • Loading branch information
TOMLACC committed Feb 23, 2018
1 parent 75e5a89 commit ccd4cd1
Showing 1 changed file with 5 additions and 4 deletions.
9 changes: 5 additions & 4 deletions scraper.rb
@@ -1,3 +1,4 @@
require 'scraperwiki'
require 'rubygems'
require 'mechanize'
require 'nokogiri'
Expand All @@ -6,8 +7,8 @@
require 'date'
require 'yaml'

#ScraperWiki.sqliteexecute('CREATE TABLE `swdata` (`url` text, `uid` text, `date_scraped` text,`start_date` text)')
#ScraperWiki.sqliteexecute('CREATE INDEX date_scraped ON swdata (date_scraped)')
#ScraperWiki.sqliteexecute('CREATE TABLE `data` (`url` text, `uid` text, `date_scraped` text,`start_date` text)')
#ScraperWiki.sqliteexecute('CREATE INDEX date_scraped ON data (date_scraped)')
#exit

BASE_URL = 'http://www.southoxon.gov.uk/ccm/support/'
Expand Down Expand Up @@ -39,11 +40,11 @@ def search_for_new_applications(until_date=Date.today)
end

def update_stale_applications
unpopulated_applications = ScraperWiki.select("* from swdata WHERE date_scraped IS NULL LIMIT 500")
unpopulated_applications = ScraperWiki.select("* from data WHERE date_scraped IS NULL LIMIT 500")
unpopulated_applications.each do |app|
populate_application_details(app)
end
current_applications = ScraperWiki.select("* from swdata WHERE date_validated > '#{(Date.today-60).strftime('%F')}' ORDER BY date_scraped LIMIT 500")
current_applications = ScraperWiki.select("* from data WHERE date_validated > '#{(Date.today-60).strftime('%F')}' ORDER BY date_scraped LIMIT 500")
current_applications.each do |app|
populate_application_details(app)
end
Expand Down

0 comments on commit ccd4cd1

Please sign in to comment.