Update scraper.py

TimLundSE26 · Dec 13, 2017 · cf16b08 · cf16b08
1 parent 7a5a44c
commit cf16b08
Showing 1 changed file with 46 additions and 57 deletions.
diff --git a/scraper.py b/scraper.py
@@ -28,74 +28,63 @@ def search():
 	#                "workhone": workhone,  "homeMobile": homeMobile,  "workMobile": workMobile,  "party": party, "ward": ward}
 
 			cols = councillor.xpath("td")
-#			print len(cols)
+			if len(cols) == 4:
 
-			paras = cols[1].xpath('p')
+				paras = cols[1].xpath('p')
 
-			for i, para in enumerate(paras):
-				if i == 0:
-					name = "".join(para.xpath('./a/text()')).strip()
-					link = "".join(para.xpath('./a/@href')).strip()
-				else:
-					pText = "".join(para.xpath('text()')).strip()
-#					print i, pText
+				for i, para in enumerate(paras):
+					if i == 0:
+						name = "".join(para.xpath('./a/text()')).strip()
+						link = "".join(para.xpath('./a/@href')).strip()
+					else:
+						pText = "".join(para.xpath('text()')).strip()
+	#					print i, pText
 
-					if len(para.xpath('a')) ==1:
-						link1 = "".join(para.xpath('./a/@href')).strip()
-						matchObj = re.search( r'@', link1)
-						if matchObj:
-							matchObj1 = re.search( r'work', pText, re.I)
-							if re.search( r'work', pText, re.I):
-								eWork = link1
-							elif re.search( r'home', pText, re.I):
-								eHome = link1
+						if len(para.xpath('a')) ==1:
+							link1 = "".join(para.xpath('./a/@href')).strip()
+							matchObj = re.search( r'@', link1)
+							if matchObj:
+								matchObj1 = re.search( r'work', pText, re.I)
+								if re.search( r'work', pText, re.I):
+									eWork = link1
+								elif re.search( r'home', pText, re.I):
+									eHome = link1
+								else:
+									print i, pText, link1
 							else:
-								print i, pText, link1
+								print "non email address link"
 						else:
-							print "non email address link"
-					else:
-						matchObj = re.search( r'OX\d \d[A-Z]{2}', pText)
-						if matchObj:
-							address = pText
-						else:
-							matchObj = re.search( r'^(.+)?\:\s+(0[0-9 ]+)$', pText)
+							matchObj = re.search( r'OX\d \d[A-Z]{2}', pText)
 							if matchObj:
-								number = matchObj.group(2)
-								numberType = matchObj.group(1)
-
-								if re.search( r'home\s+mob', numberType, re.I):
-									homeMobile = number
-								elif re.search( r'work\s+mob', numberType, re.I):
-									workMobile = number							
-								elif re.search( r'home', numberType, re.I):
-									homePhone = number							
-								elif re.search( r'work', numberType, re.I):
-									workPhone = number
+								address = pText
 							else:
-								print i, pText
-								roles = roles.join(pText)
+								matchObj = re.search( r'^(.+)?\:\s+(0[0-9 ]+)$', pText)
+								if matchObj:
+									number = matchObj.group(2)
+									numberType = matchObj.group(1)
 
+									if re.search( r'home\s+mob', numberType, re.I):
+										homeMobile = number
+									elif re.search( r'work\s+mob', numberType, re.I):
+										workMobile = number							
+									elif re.search( r'home', numberType, re.I):
+										homePhone = number							
+									elif re.search( r'work', numberType, re.I):
+										workPhone = number
+								else:
+									print i, pText
+									roles = roles.join(pText)
 
-			party = "".join(cols[2].xpath('text()')).strip()
-			ward = "".join(cols[3].xpath('text()')).strip()
 
-			data = { "index": index, "name": name, "link": link, "address": address, "roles": roles, "eWork": eWork, "eHome": eHome, "homePhone": homePhone,  "workPhone": workPhone,  "homeMobile": homeMobile,  "workMobile": workMobile,  "party": party, "ward": ward}
-
-			scraperwiki.sqlite.save(unique_keys=['index', 'link'], data=data)
-
-			print data
-
+				party = "".join(cols[2].xpath('text()')).strip()
+				ward = "".join(cols[3].xpath('text()')).strip()
+
+				data = { "index": index, "name": name, "link": link, "address": address, "roles": roles, "eWork": eWork, "eHome": eHome, "homePhone": homePhone,  "workPhone": workPhone,  "homeMobile": homeMobile,  "workMobile": workMobile,  "party": party, "ward": ward}
 
+				scraperwiki.sqlite.save(unique_keys=['index', 'link'], data=data)
+
+				print data
 
-#	if len(councillors) == 0:
-#		return
-#	else:
-#		for index, councillor in enumerate(councillors):
-#			col1 = councillor.xpath(".//td)[1]").strip()
-#			data = {"col1": col1, "index": index}
-
-#			print data
-#			scraperwiki.sqlite.save(unique_keys=['index'], data=data)
-
+
 
 search()