Skip to content

Commit

Permalink
Make gatherling scraper slightly more resilient to timeouts?
Browse files Browse the repository at this point in the history
  • Loading branch information
silasary committed Feb 19, 2019
1 parent 8b7f632 commit 6abac2c
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 3 deletions.
4 changes: 2 additions & 2 deletions decksite/scrapers/gatherling.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def scrape(limit: int = 50) -> None:
return

def tournament(url: str, name: str) -> int:
s = fetcher.internal.fetch(url, character_encoding='utf-8')
s = fetcher.internal.fetch(url, character_encoding='utf-8', retry=True)

# Tournament details
soup = BeautifulSoup(s, 'html.parser')
Expand Down Expand Up @@ -188,7 +188,7 @@ def tournament_deck(cells: ResultSet, competition_id: int, date: datetime.dateti

def tournament_matches(d: deck.Deck) -> List[bs4.element.Tag]:
url = 'https://gatherling.com/deck.php?mode=view&id={identifier}'.format(identifier=d.identifier)
s = fetcher.internal.fetch(url, character_encoding='utf-8')
s = fetcher.internal.fetch(url, character_encoding='utf-8', retry=True)
soup = BeautifulSoup(s, 'html.parser')
anchor = soup.find(string='MATCHUPS')
if anchor is None:
Expand Down
4 changes: 3 additions & 1 deletion shared/fetcher_internal.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
'http://whatsinstandard.com',
CacheControlAdapter(heuristic=ExpiresAfter(days=14)))

def fetch(url: str, character_encoding: Optional[str] = None, force: bool = False) -> str:
def fetch(url: str, character_encoding: Optional[str] = None, force: bool = False, retry: bool = False) -> str:
headers = {}
if force:
headers['Cache-Control'] = 'no-cache'
Expand All @@ -38,6 +38,8 @@ def fetch(url: str, character_encoding: Optional[str] = None, force: bool = Fals
print('Getting text from response was very slow. Setting an explicit character_encoding may help.')
return t
except (urllib.error.HTTPError, requests.exceptions.ConnectionError) as e: # type: ignore # urllib isn't fully stubbed
if retry:
return fetch(url, character_encoding, force, retry=False)
raise FetchException(e)

async def fetch_async(url: str) -> str:
Expand Down

0 comments on commit 6abac2c

Please sign in to comment.