Skip to content

Commit

Permalink
[fix] cache/sync wasn't passed arround
Browse files Browse the repository at this point in the history
  • Loading branch information
Psycojoker committed Apr 6, 2015
1 parent 8243ea3 commit 7b94b83
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 8 deletions.
4 changes: 2 additions & 2 deletions lachambre/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ class Deputy(models.Model, Jsonify, Parsable):

@classmethod
def fetch_list(klass, cache=False, sync=False):
soup = scraper.get("http://www.lachambre.be/kvvcr/showpage.cfm?section=/depute&language=fr&rightmenu=right_depute&cfm=/site/wwwcfm/depute/cvlist.cfm", "deputies")
soup = scraper.get("http://www.lachambre.be/kvvcr/showpage.cfm?section=/depute&language=fr&rightmenu=right_depute&cfm=/site/wwwcfm/depute/cvlist.cfm", "deputies", cache=cache, sync=sync)

for dep in soup.table('tr'):
items = dep('td')
Expand Down Expand Up @@ -114,7 +114,7 @@ def fetch_list(klass, cache=False, sync=False):

@classmethod
def fetch_one(klass, deputy, cache=False, sync=False):
soup, suppe = scraper.get_with_nl(LACHAMBRE_PREFIX + deputy.url, deputy.full_name)
soup, suppe = scraper.get_with_nl(LACHAMBRE_PREFIX + deputy.url, deputy.full_name, cache=cache, sync=sync)

deputy.photo_uri = "http://www.lachambre.be" + soup.table.img["src"]
# XXX can't get this anymore I guess :(
Expand Down
12 changes: 6 additions & 6 deletions scraper/scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,14 @@


def get_with_nl(url, name, cache, sync):
soup = get(url, name)
suppe = get(url.replace("&language=fr", "&language=nl", 1), name + " nl")
soup = get(url, name, cache=cache, sync=sync)
suppe = get(url.replace("&language=fr", "&language=nl", 1), name + " nl", cache=cache, sync=sync)
return soup, suppe


def get(url, name, cache, sync):
logger.debug("\033[0;33mparsing %s --- %s\033[0m" % (url, name))
text = retreive_content(url, name)
text = retreive_content(url, name, cache=cache, sync=sync)
soup = BeautifulSoup(text, "html5lib", from_encoding="latin1")
if soup.title.text == "404 Not Found":
raise IndexError
Expand All @@ -28,14 +28,14 @@ def to_soup(html):


def lxml_get_with_nl(url, name, cache, sync):
soup = lxml_get(url, name)
suppe = lxml_get(url.replace("&language=fr", "&language=nl", 1), name + " nl")
soup = lxml_get(url, name, cache=cache, sync=sync)
suppe = lxml_get(url.replace("&language=fr", "&language=nl", 1), name + " nl", cache=cache, sync=sync)
return soup, suppe


def lxml_get(url, name, cache, sync):
logger.debug("LXML parsing %s --- %s" % (url, name))
text = retreive_content(url)
text = retreive_content(url, cache=cache, sync=sync)
soup = etree.parse(text, etree.HTMLParser())
return soup

Expand Down

0 comments on commit 7b94b83

Please sign in to comment.