Permalink
Browse files

More parsing completed

  • Loading branch information...
1 parent 60b91bf commit 935b8c9fd663bc1a76ab537df71a1cde4190d4ee @puntofisso puntofisso committed Jun 18, 2012
Showing with 23 additions and 9 deletions.
  1. +23 −9 cron/jobs.py
View
@@ -7,24 +7,38 @@
def downloadJobDetails(url):
jobs_detail_url = settings["jobs"]["url"] + "/" + url
- job = (urllib2.urlopen(jobs_url)).read()
+ job = (urllib2.urlopen(jobs_detail_url)).read()
soup = BeautifulSoup(job)
- maincontent_element = soup.findAll(attrs={'id': 'maincontent'})[0]
+ maincontent_element = soup.findAll(attrs={'id': 'leftpanel'})[0]
+ title = ""
+ unit = ""
reference = ""
closing = ""
interview = ""
salary = ""
description = ""
+ doc_file_url = ""
- msonormal_elements = soup.findAll(attrs={'class': 'staffprofilequote'})
- i=0
+ soup_2 = BeautifulSoup(str(maincontent_element))
+ msonormal_elements = soup_2.findAll('strong')
for desc_elem in msonormal_elements:
- i = i + 1
- description = description + str(desc_elem.contents)
-
- doc_file_url = ""
-
+ contents = str(desc_elem.contents)
+ sib = desc_elem.nextSibling
+ if 'Reference' in contents:
+ reference = str(sib)
+ elif 'Closing Date' in contents:
+ closing = str(sib)
+ elif 'Interview Date' in contents:
+ interview = str(sib)
+ elif 'Salary' in contents:
+ salary = str(sib)
+ elif 'font style' in contents:
+ title = "" #str(sib.children[0])
+ else:
+ unit = str(sib)
+
+ print "Reference: " + reference +", Title: " + title + ", Unit: "+ unit + ", Closing: " + closing + ", Interview: " + interview + ", Salary: " + salary
return description

0 comments on commit 935b8c9

Please sign in to comment.