Skip to content

Commit

Permalink
Merge pull request #159 from Lukas0907/error-handling
Browse files Browse the repository at this point in the history
Adjustment to response caching, logging
  • Loading branch information
Lukas0907 committed Aug 23, 2018
2 parents 46ecb4a + 3306f6d commit 92ce80e
Show file tree
Hide file tree
Showing 3 changed files with 8 additions and 4 deletions.
2 changes: 1 addition & 1 deletion feeds/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
logger = logging.getLogger(__name__)


IGNORE_HTTP_CODES = [403, 404, 500, 502, 503, 504]
IGNORE_HTTP_CODES = [403, 404] + list(range(500, 600))


def read_meta(root):
Expand Down
6 changes: 3 additions & 3 deletions feeds/spidermiddlewares.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,9 @@ def from_crawler(cls, crawler):

def process_spider_exception(self, response, exception, spider):
if isinstance(exception, HttpError):
if response.status in [500, 502, 503, 504]:
# These status codes are usually induced by overloaded sites,
# updates, short downtimes, etc. and are not that relevant.
if response.status >= 500:
# Transient errors usually caused by overloaded sites, updates, short
# downtimes, etc.
lgr = logger.info
else:
lgr = logger.warning
Expand Down
4 changes: 4 additions & 0 deletions feeds/spiders/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@
from feeds.loaders import FeedEntryItemLoader
from feeds.spiders import FeedsSpider

# Readability's output is not that interesting to justify log level "INFO".
import readability.readability
readability.readability.log.info = readability.readability.log.debug


class GenericSpider(FeedsSpider):
name = "generic"
Expand Down

0 comments on commit 92ce80e

Please sign in to comment.