Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion config.sample.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,6 @@
"repository": "singer-io/target-stitch",
"start_date": "2021-01-01T00:00:00Z",
"request_timeout": 300,
"base_url": "https://api.github.com"
"base_url": "https://api.github.com",
"extract_archived": "false"
}
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from setuptools import setup, find_packages

setup(name='tap-github',
version='2.0.13',
version='2.0.14',
description='Singer.io tap for extracting data from the GitHub API',
author='Stitch',
url='http://singer.io',
Expand Down
40 changes: 40 additions & 0 deletions tap_github/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,10 @@ class RateLimitSleepExceeded(GithubException):
class TooManyRequests(GithubException):
pass

# Thrown when repository is archived and extract_archived is not enabled
class ArchivedRepositoryError(GithubException):
pass


ERROR_CODE_EXCEPTION_MAPPING = {
301: {
Expand Down Expand Up @@ -200,6 +204,9 @@ def __init__(self, config):
self.set_auth_in_session()
self.not_accessible_repos = set()
self.max_per_page = self.config.get('max_per_page', DEFAULT_MAX_PER_PAGE)
# Convert string 'true'/'false' to boolean, default to False
extract_archived_value = str(self.config.get('extract_archived', 'false')).lower()
self.extract_archived = extract_archived_value == 'true'

def get_request_timeout(self):
"""
Expand Down Expand Up @@ -282,9 +289,31 @@ def verify_repo_access(self, url_for_repo, repo):
message = "HTTP-error-code: 404, Error: Please check the repository name \'{}\' or you do not have sufficient permissions to access this repository.".format(repo)
raise NotFoundException(message) from None

def check_repo_archived(self, repo):
"""
Check if a repository is archived and raise an error if extract_archived is not enabled.

Args:
repo: Repository in 'org/repo' format

Raises:
ArchivedRepositoryError: If repo is archived and extract_archived config is not true
"""
url = "{}/repos/{}".format(self.base_url, repo)
response = self.authed_get_single_page("checking repository archived status", url, should_skip_404=False)
repo_info = response.json()

if repo_info.get('archived', False):
if not self.extract_archived:
message = "Repository '{}' is archived. To extract data from archived repositories, " \
"set 'extract_archived' to 'true' in the config.".format(repo)
raise ArchivedRepositoryError(message)
LOGGER.warning("Repository '%s' is archived. Proceeding with extraction as 'extract_archived' is enabled.", repo)

def verify_access_for_repo(self):
"""
For all the repositories mentioned in the config, check the access for each repos.
Also checks if repositories are archived and fails if extract_archived is not enabled.
"""
repositories, org = self.extract_repos_from_config() # pylint: disable=unused-variable

Expand All @@ -296,6 +325,9 @@ def verify_access_for_repo(self):
# Verifying for Repo access
self.verify_repo_access(url_for_repo, repo)

# Check if repository is archived
self.check_repo_archived(repo)

def extract_orgs_from_config(self):
"""
Extracts all organizations from the config
Expand Down Expand Up @@ -383,6 +415,14 @@ def get_all_repos(self, organizations: list):
repo
)

# Check if repository is archived (info already available in response)
if repo.get('archived', False):
if not self.extract_archived:
message = "Repository '{}' is archived. To extract data from archived repositories, " \
"set 'extract_archived' to 'true' in the config.".format(repo_full_name)
raise ArchivedRepositoryError(message)
LOGGER.warning("Repository '%s' is archived. Proceeding with extraction as 'extract_archived' is enabled.", repo_full_name)

repos.append(repo_full_name)
except NotFoundException:
# Throwing user-friendly error message as it checks token access
Expand Down