Update scraper.py

TimLundSE26 · Dec 23, 2017 · 754f780 · 754f780
1 parent 485564f
commit 754f780
Showing 1 changed file with 11 additions and 8 deletions.
diff --git a/scraper.py b/scraper.py
@@ -8,7 +8,7 @@
 
 def search(mth):
 
-  	request_data = {"month": mth, "dateType": "DC_Validated" , "searchType": "Application" }
+ # 	request_data = {"month": mth, "dateType": "DC_Validated" , "searchType": "Application" }
 
 #	sleep(2)
 #	result = requests.post('http://public.oxford.gov.uk/online-applications/monthlyListResults.do?action=firstPage', request_data)
@@ -17,9 +17,7 @@ def search(mth):
 #		print "No result returned"
 #		return
 
-#	result_dom = fromstring(result.content)  
-#  	applications = result_dom.xpath("//li[@class='searchresult']")
-#  	print len(applications)
+
 
 #	for application in applications:
 #		link = "".join(application.xpath('a/@href')).strip()
@@ -29,8 +27,6 @@ def search(mth):
 
 #		print link, address
 
-	sleep(2)
-
 # GET on the url with searchCriteria.page=N ...
 #	result = requests.get('http://public.oxford.gov.uk/online-applications/pagedSearchResults.do?action=page&searchCriteria.page=2')
 
@@ -44,17 +40,24 @@ def search(mth):
 #    answer in my head right now), but the good news is that they don't seem to be necessary - submitting the same POST
 #    request without them still gets the desired results.
 
-	#searchCriteria.page="n"
-	#action" value="page"
+#searchCriteria.page="n"
+#action" value="page"
+
 	request_data = {"month": mth, 
 			"dateType": "DC_Validated" , 
 			"searchType": "Application", 
 			"searchCriteria.page": "2" , 
 #			"action": "page",
 		        "searchCriteria.resultsPerPage": "5"}
 
+	sleep(2)
+
 	result = requests.post('http://public.oxford.gov.uk/online-applications/pagedSearchResults.do?action=page', request_data)
 
+	result_dom = fromstring(result.content)  
+  	applications = result_dom.xpath("//li[@class='searchresult']")
+  	print len(applications)	
+
 	for application in applications:
 		link = "".join(application.xpath('a/@href')).strip()
 		description = "".join(application.xpath('a/text()')).strip()