Skip to content

Commit

Permalink
examiner: Be more lenient when finding exam pages
Browse files Browse the repository at this point in the history
  • Loading branch information
JakobGM committed Jan 20, 2019
1 parent 881ebf3 commit 41a02cc
Showing 1 changed file with 8 additions and 9 deletions.
17 changes: 8 additions & 9 deletions examiner/crawlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,15 +80,10 @@ def exams_pages(self) -> str:
continue

soup = bs(response.content, 'html.parser')
patterns = r'(?:' + r'|'.join([
r'old exams',
r'gamle eksamensoppgaver',
r'eksamensoppgaver',
r'tidligere eksamener',
r'earlier exams',
r'old exam sets',
r'eksamenssett',
]) + r')'
patterns = r'.*(?:' + r'|'.join([
r'exam',
r'eksam',
]) + r').*'
link = soup.find(
'a',
text=re.compile(patterns, re.IGNORECASE),
Expand All @@ -109,6 +104,10 @@ def pdf_urls(self) -> List[str]:
result = set()

for exams_url in exams_urls:
if exams_url[-4:] == '.pdf':
# This is PDF so we can add it directly to the results
result.add(exams_url)
continue
try:
response = requests.get(exams_url, timeout=2)
except Exception:
Expand Down

0 comments on commit 41a02cc

Please sign in to comment.