Skip to content

Commit

Permalink
Update scraper.py
Browse files Browse the repository at this point in the history
  • Loading branch information
GianlucaGL committed Jan 26, 2018
1 parent 96d74ce commit d4af88d
Showing 1 changed file with 24 additions and 3 deletions.
27 changes: 24 additions & 3 deletions scraper.py
Expand Up @@ -23,9 +23,30 @@
print record
scraperwiki.sqlite.save(unique_keys=['link'], data=record)

tds = root.cssselect("td div")
print 'THESE ARE THE TDS', tds
print 'THERE ARE', len(tds), 'TDS'
# tds = root.cssselect("td div")
# print 'THESE ARE THE TDS', tds
# print 'THERE ARE', len(tds), 'TDS'
#for td in tds:
# div = td.text_content().encode('ascii','ignore')
# if "years" in div:
# print div
#record['age'] = div
#scraperwiki.sqlite.save(unique_keys=['age'], data=record, table_name="ages")

root = lxml.html.fromstring(html)
nages = root.cssselect("tr")
for name in nages:
# print name.text
print name.text.encode('ascii', 'ignore')
print name.attrib['href']
#store the link in the variable 'record' under the key 'link'
record['link'] = name.attrib['href']
record['name'] = name.text.encode('ascii', 'ignore')
print record
scraperwiki.sqlite.save(unique_keys=['link'], data=record)






Expand Down

0 comments on commit d4af88d

Please sign in to comment.