Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from setuptools import setup, find_packages

setup(name='tap-github',
version='2.0.11',
version='2.0.12',
description='Singer.io tap for extracting data from the GitHub API',
author='Stitch',
url='http://singer.io',
Expand Down
31 changes: 27 additions & 4 deletions tap_github/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,11 @@
# Set default timeout of 300 seconds
REQUEST_TIMEOUT = 300

# How many total seconds to retry when getting rate limit error from API. The limit resets every hour.
RATE_LIMIT_RETRY_MAX_TIME = 3600

PAGINATION_EXCEED_MSG = 'In order to keep the API fast for everyone, pagination is limited for this resource.'
RATE_LIMIT_EXCEED_MSG = 'API rate limit exceeded'

class GithubException(Exception):
pass
Expand Down Expand Up @@ -51,9 +55,15 @@ class MovedPermanentlyError(GithubException):
class ConflictError(GithubException):
pass

# Thrown when we receive 403 Rate Limit Exceeded from Github API
class RateLimitExceeded(GithubException):
pass

# Thrown when we're expected to sleep for longer than the max_sleep_seconds limit
class RateLimitSleepExceeded(GithubException):
pass

# Thrown when 429 is received from Github API
class TooManyRequests(GithubException):
pass

Expand Down Expand Up @@ -111,6 +121,13 @@ def raise_for_error(resp, source, stream, client, should_skip_404):
except JSONDecodeError:
response_json = {}

response_message = response_json.get('message', '')

if error_code == 403 and RATE_LIMIT_EXCEED_MSG in response_message:
message = f"HTTP-error-code: 403, Error: {response_message}"
LOGGER.warning(message)
raise RateLimitExceeded() from None

if error_code == 404 and should_skip_404:
# Add not accessible stream into list.
client.not_accessible_repos.add(stream)
Expand All @@ -122,8 +139,8 @@ def raise_for_error(resp, source, stream, client, should_skip_404):
# Don't raise a NotFoundException
return None

if error_code == 422 and PAGINATION_EXCEED_MSG in response_json.get('message', ''):
message = f"HTTP-error-code: 422, Error: {response_json.get('message', '')}. " \
if error_code == 422 and PAGINATION_EXCEED_MSG in response_message:
message = f"HTTP-error-code: 422, Error: {response_message}. " \
f"Please refer '{response_json.get('documentation_url')}' for more details." \
"This is a known issue when the results exceed 40k and the last page is not full" \
" (it will trim the results to get only the available by the API)."
Expand All @@ -150,13 +167,18 @@ def rate_throttling(response, max_sleep_seconds, min_remain_rate_limit):
"""
For rate limit errors, get the remaining time before retrying and calculate the time to sleep before making a new request.
"""
if "Retry-After" in response.headers:
# handles the secondary rate limit
seconds_to_sleep = int(response.headers['Retry-After'])
LOGGER.info("Retry-After header found in response. Tap will retry the data collection after %s seconds.", seconds_to_sleep)
time.sleep(seconds_to_sleep)
if 'X-RateLimit-Remaining' in response.headers:
if int(response.headers['X-RateLimit-Remaining']) <= min_remain_rate_limit:
seconds_to_sleep = calculate_seconds(int(response.headers['X-RateLimit-Reset']))
seconds_to_sleep = calculate_seconds(int(response.headers['X-RateLimit-Reset']) + 15)

if seconds_to_sleep > max_sleep_seconds:
message = "API rate limit exceeded, please try after {} seconds.".format(seconds_to_sleep)
raise RateLimitExceeded(message) from None
raise RateLimitSleepExceeded(message) from None

LOGGER.info("API rate limit exceeded. Tap will retry the data collection after %s seconds.", seconds_to_sleep)
time.sleep(seconds_to_sleep)
Expand Down Expand Up @@ -206,6 +228,7 @@ def set_auth_in_session(self):
@backoff.on_exception(backoff.expo, (requests.Timeout, requests.ConnectionError, Server5xxError, TooManyRequests),
max_tries=5, factor=2)
@backoff.on_exception(backoff.expo, (BadCredentialsException, ), max_tries=3, factor=2)
@backoff.on_exception(backoff.constant, (RateLimitExceeded, ), interval=60, jitter=None, max_time=RATE_LIMIT_RETRY_MAX_TIME)
def authed_get_single_page(self, source, url, headers={}, stream="", should_skip_404 = True):
"""
Call rest API and return the response in case of status code 200.
Expand Down