Skip to content

Commit

Permalink
Handle rewriting malformed links
Browse files Browse the repository at this point in the history
  • Loading branch information
NicolasLM committed Jul 3, 2019
1 parent 889cfe8 commit ff03026
Showing 1 changed file with 4 additions and 1 deletion.
5 changes: 4 additions & 1 deletion reader/html_processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,10 @@ def rewrite_relative_links(soup: bs4.BeautifulSoup, base_url: str):
"""
for tag_name, attrib in URL_REWRITE_PAIRS:
for tag in soup.find_all(tag_name, attrs={attrib: True}):
tag[attrib] = urllib.parse.urljoin(base_url, tag[attrib])
try:
tag[attrib] = urllib.parse.urljoin(base_url, tag[attrib])
except ValueError as e:
logger.info('Could not rewrite link: %s', e)


def unify_style(soup: bs4.BeautifulSoup):
Expand Down

0 comments on commit ff03026

Please sign in to comment.