Update scraper.py

GianlucaGL · Jan 26, 2018 · d4af88d · d4af88d
1 parent 96d74ce
commit d4af88d
Showing 1 changed file with 24 additions and 3 deletions.
diff --git a/scraper.py b/scraper.py
@@ -23,9 +23,30 @@
   print record
   scraperwiki.sqlite.save(unique_keys=['link'], data=record)
 
-tds = root.cssselect("td div")
-print 'THESE ARE THE TDS', tds
-print 'THERE ARE', len(tds), 'TDS'
+# tds = root.cssselect("td div")
+# print 'THESE ARE THE TDS', tds
+# print 'THERE ARE', len(tds), 'TDS'
+#for td in tds:
+# div = td.text_content().encode('ascii','ignore')
+# if "years" in div:
+ # print div
+  #record['age'] = div
+  #scraperwiki.sqlite.save(unique_keys=['age'], data=record, table_name="ages")
+
+root = lxml.html.fromstring(html)
+nages = root.cssselect("tr")
+for name in nages:
+  # print name.text
+  print name.text.encode('ascii', 'ignore')
+  print name.attrib['href']
+  #store the link in the variable 'record' under the key 'link'
+  record['link'] = name.attrib['href']
+  record['name'] = name.text.encode('ascii', 'ignore')
+  print record
+  scraperwiki.sqlite.save(unique_keys=['link'], data=record)
+
+
+