Skip to content

Commit

Permalink
Update scraper.py
Browse files Browse the repository at this point in the history
  • Loading branch information
Kooben209 committed Apr 11, 2020
1 parent 66cc35b commit 5956369
Showing 1 changed file with 6 additions and 4 deletions.
10 changes: 6 additions & 4 deletions scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,8 @@

WEB_DRIVER_OPTIONS.add_argument("user-agent=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36")

scraperwiki.sqlite.execute("CREATE TABLE IF NOT EXISTS 'data' ('application' TEXT, 'dateAdded' DATE, 'decision' TEXT, 'address' TEXT, 'proposal' TEXT, 'applicationType' TEXT, 'applicationURL' TEXT, 'documentsURL' TEXT, 'searchName' TEXT,'amendedDateTime' DATETIME, PRIMARY KEY('application','dateAdded','decision'))")
scraperwiki.sqlite.execute("CREATE UNIQUE INDEX IF NOT EXISTS 'data_unique_key' ON 'data' ('application','dateAdded','decision')")
#scraperwiki.sqlite.execute("CREATE TABLE IF NOT EXISTS 'data' ('application' TEXT, 'dateAdded' DATE, 'decision' TEXT, 'address' TEXT, 'proposal' TEXT, 'applicationType' TEXT, 'applicationURL' TEXT, 'documentsURL' TEXT, 'searchName' TEXT,'amendedDateTime' DATETIME, PRIMARY KEY('application','dateAdded','decision'))")
#scraperwiki.sqlite.execute("CREATE UNIQUE INDEX IF NOT EXISTS 'data_unique_key' ON 'data' ('application','dateAdded','decision')")

#driver = webdriver.Chrome(options=WEB_DRIVER_OPTIONS,executable_path='/usr/local/bin/chromedriver')
driver = webdriver.Chrome(options=WEB_DRIVER_OPTIONS)
Expand Down Expand Up @@ -202,8 +202,10 @@

amendedDateTime = datetime.now()

scraperwiki.sqlite.execute("INSERT OR IGNORE INTO 'data' VALUES (?,?,?,?,?,?,?,?,?,?)", (application,dateAdded,decision,address,proposal,applicationType,applicationURL,documentsURL,searchName,amendedDateTime))

if DEBUG:
print("write to db")
#scraperwiki.sqlite.execute("INSERT OR IGNORE INTO 'data' VALUES (?,?,?,?,?,?,?,?,?,?)", (application,dateAdded,decision,address,proposal,applicationType,applicationURL,documentsURL,searchName,amendedDateTime))
scraperwiki.sqlite.save(unique_keys=['name'], data={"name": "susan", "occupation": "software developer"})
#if there is a next button click it then get rows and loop over them again
try:
nextPageBtn = WebDriverWait(driver, DELAY_SECS).until(EC.presence_of_element_located((By.XPATH, '//*[@id="ctl00_MainContent_grdResults_ctl00"]/tfoot/tr/td/table/tbody/tr/td/div[3]/input[1]')))
Expand Down

0 comments on commit 5956369

Please sign in to comment.