-
-
Notifications
You must be signed in to change notification settings - Fork 48
/
archive.py
329 lines (294 loc) · 12.8 KB
/
archive.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
import os
import shutil
import stat
from datetime import datetime
from typing import (
List,
Union,
)
import woodchips
from github import (
Auth,
Gist,
Github,
Repository,
)
from github_archive.constants import (
CLONE_OPERATION,
DEFAULT_BASE_URL,
DEFAULT_LOCATION,
DEFAULT_LOG_LEVEL,
DEFAULT_NUM_THREADS,
DEFAULT_TIMEOUT,
GIST_CONTEXT,
LOGGER_NAME,
ORG_CONTEXT,
PERSONAL_CONTEXT,
PULL_OPERATION,
STAR_CONTEXT,
USER_CONTEXT,
)
from github_archive.gists import (
iterate_gists_to_archive,
iterate_gists_to_fork,
view_gists,
)
from github_archive.logger import (
log_and_raise_value_error,
setup_logger,
)
from github_archive.repos import (
iterate_repos_to_archive,
iterate_repos_to_fork,
view_repos,
)
class GithubArchive:
def __init__(
self,
token=None,
users=None,
orgs=None,
gists=None,
stars=None,
view=False,
clone=False,
pull=False,
fork=False,
include=None,
exclude=None,
forks=False,
location=DEFAULT_LOCATION,
use_https=False,
timeout=DEFAULT_TIMEOUT,
threads=DEFAULT_NUM_THREADS,
base_url=DEFAULT_BASE_URL,
log_level=DEFAULT_LOG_LEVEL,
):
# Parameter variables
self.token = token
self.users = users.lower().split(',') if users else ''
self.orgs = orgs.lower().split(',') if orgs else ''
self.gists = gists.lower().split(',') if gists else ''
self.stars = stars.lower().split(',') if stars else ''
self.view = view
self.clone = clone
self.pull = pull
self.fork = fork
self.include = include.lower().split(',') if include else ''
self.exclude = exclude.lower().split(',') if exclude else ''
self.forks = forks
self.location = os.path.expanduser(location)
self.use_https = use_https
self.timeout = timeout
self.threads = threads
self.base_url = base_url
self.log_level = log_level
# Internal variables
self.github_instance = (
Github(auth=Auth.Token(self.token), base_url=self.base_url)
if self.token
else Github(base_url=self.base_url)
)
self.authenticated_user = self.github_instance.get_user() if self.token else None
self.authenticated_username = self.authenticated_user.login.lower() if self.token else None
def run(self):
"""Run the tool based on the arguments passed via the CLI."""
self.initialize_project()
logger = woodchips.get(LOGGER_NAME)
logger.info('# GitHub Archive started...')
start_time = datetime.now()
failed_repo_dirs = []
# Personal (includes personal authenticated items)
if self.token and self.users and self.authenticated_user_in_users():
logger.info('# Making API call to GitHub for personal repos...')
personal_repos = self.get_all_git_assets(PERSONAL_CONTEXT)
if self.view:
logger.info('# Viewing user repos...')
view_repos(personal_repos)
if self.clone:
logger.info('# Cloning missing personal repos...')
failed_repos = iterate_repos_to_archive(self, personal_repos, CLONE_OPERATION)
if any(failed_repos):
failed_repo_dirs.extend(failed_repos)
if self.pull:
logger.info('# Pulling changes to personal repos...')
_ = iterate_repos_to_archive(self, personal_repos, PULL_OPERATION)
if self.fork:
# We can't fork a repo we already have, do nothing
pass
# We remove the authenticated user from the list so that we don't double pull their
# repos for the `users` logic.
self.users.remove(self.authenticated_username)
# Users (can include personal non-authenticated items, excludes personal authenticated calls)
if self.users and len(self.users) > 0:
logger.info('# Making API calls to GitHub for user repos...')
user_repos = self.get_all_git_assets(USER_CONTEXT)
if self.view:
logger.info('# Viewing user repos...')
view_repos(user_repos)
if self.clone:
logger.info('# Cloning missing user repos...')
failed_repos = iterate_repos_to_archive(self, user_repos, CLONE_OPERATION)
if any(failed_repos):
failed_repo_dirs.extend(failed_repos)
if self.pull:
logger.info('# Pulling changes to user repos...')
_ = iterate_repos_to_archive(self, user_repos, PULL_OPERATION)
if self.fork:
logger.info('# Forking starred repos...')
iterate_repos_to_fork(user_repos)
# Orgs
if self.orgs:
logger.info('# Making API calls to GitHub for org repos...')
org_repos = self.get_all_git_assets(ORG_CONTEXT)
if self.view:
logger.info('# Viewing org repos...')
view_repos(org_repos)
if self.clone:
logger.info('# Cloning missing org repos...')
failed_repos = iterate_repos_to_archive(self, org_repos, CLONE_OPERATION)
if any(failed_repos):
failed_repo_dirs.extend(failed_repos)
if self.pull:
logger.info('# Pulling changes to org repos...')
_ = iterate_repos_to_archive(self, org_repos, PULL_OPERATION)
if self.fork:
logger.info('# Forking starred repos...')
iterate_repos_to_fork(org_repos)
# Stars
if self.stars:
logger.info('# Making API call to GitHub for starred repos...')
starred_repos = self.get_all_git_assets(STAR_CONTEXT)
if self.view:
logger.info('# Viewing stars...')
view_repos(starred_repos)
if self.clone:
logger.info('# Cloning missing starred repos...')
failed_repos = iterate_repos_to_archive(self, starred_repos, CLONE_OPERATION)
if any(failed_repos):
failed_repo_dirs.extend(failed_repos)
if self.pull:
logger.info('# Pulling changes to starred repos...')
_ = iterate_repos_to_archive(self, starred_repos, PULL_OPERATION)
if self.fork:
logger.info('# Forking starred repos...')
iterate_repos_to_fork(starred_repos)
if failed_repo_dirs:
logger.info('Cleaning up repos...')
self.remove_failed_dirs('repos', failed_repo_dirs)
# Gists
if self.gists:
logger.info('# Making API call to GitHub for gists...')
gists = self.get_all_git_assets(GIST_CONTEXT)
failed_gist_dirs = []
if self.view:
logger.info('# Viewing gists...')
view_gists(gists)
if self.clone:
logger.info('# Cloning missing gists...')
failed_gists = iterate_gists_to_archive(self, gists, CLONE_OPERATION)
if any(failed_gists):
failed_gist_dirs.extend(failed_gists)
if self.pull:
logger.info('# Pulling changes to gists...')
_ = iterate_gists_to_archive(self, gists, PULL_OPERATION)
if self.fork:
logger.info('# Forking gists...')
iterate_gists_to_fork(gists)
if failed_gist_dirs:
logger.info('Cleaning up gists...')
self.remove_failed_dirs('gists', failed_gist_dirs)
execution_time = f'Execution time: {datetime.now() - start_time}.'
finish_message = f'GitHub Archive complete! {execution_time}'
logger.info(finish_message)
def initialize_project(self):
"""Initialize the tool and ensure everything is in order before moving on:
1. Directories are setup correctly
2. A git operation was specified
3. A list of assets to run operations on is specified
"""
setup_logger(self)
logger = woodchips.get(LOGGER_NAME)
if not os.path.exists(self.location):
os.makedirs(os.path.join(self.location, 'repos'))
os.makedirs(os.path.join(self.location, 'gists'))
if (self.users or self.orgs or self.gists or self.stars) and not (
self.view or self.clone or self.pull or self.fork
):
log_and_raise_value_error(
logger=logger,
message='A git operation must be specified when a list of users or orgs is provided.',
)
elif not (self.users or self.orgs or self.gists or self.stars) and (
self.view or self.clone or self.pull or self.fork
):
log_and_raise_value_error(
logger=logger,
message='A list must be provided when a git operation is specified.',
)
elif not (
self.users or self.orgs or self.gists or self.stars or self.view or self.clone or self.pull or self.fork
):
log_and_raise_value_error(
logger=logger,
message='At least one git operation and one list must be provided to run github-archive.',
)
elif self.include and self.exclude:
log_and_raise_value_error(
logger=logger,
message='The include and exclude flags are mutually exclusive. Only one can be used on each run.',
)
def authenticated_user_in_users(self) -> bool:
"""Returns True if the authenticated user is in the list of users."""
return self.authenticated_user.login.lower() in self.users
def get_all_git_assets(self, context: str) -> List[Union[Repository.Repository, Gist.Gist]]:
"""Retrieve a list of lists via API of git assets (repos, gists) of the
specified owner(s) (users, orgs). Returns a flattened, sorted list of git assets.
"""
logger = woodchips.get(LOGGER_NAME)
get_org_repos = lambda owner: self.github_instance.get_organization(owner).get_repos() # noqa
get_personal_repos = lambda _: self.authenticated_user.get_repos(affiliation='owner') # noqa
get_starred_repos = lambda owner: self.github_instance.get_user(owner).get_starred() # noqa
get_user_gists = lambda owner: self.github_instance.get_user(owner).get_gists() # noqa
get_user_repos = lambda owner: self.github_instance.get_user(owner).get_repos() # noqa
context_manager = {
GIST_CONTEXT: [self.gists, get_user_gists, 'gists'],
ORG_CONTEXT: [self.orgs, get_org_repos, 'repos'],
PERSONAL_CONTEXT: [self.users, get_personal_repos, 'repos'],
STAR_CONTEXT: [self.stars, get_starred_repos, 'starred repos'],
USER_CONTEXT: [self.users, get_user_repos, 'repos'],
}
all_git_assets = []
owner_list = context_manager[context][0]
git_asset_string = context_manager[context][2]
for owner in owner_list:
formatted_owner_name = owner.strip()
git_assets = context_manager[context][1](owner)
logger.debug(f'{formatted_owner_name} {git_asset_string} retrieved!')
for item in git_assets:
if context == GIST_CONTEXT:
# Automatically add gists since we don't support forked gists
all_git_assets.append(item)
elif self.forks or (self.forks is False and item.fork is False):
all_git_assets.append(item)
else:
# Do not include this forked asset
pass
final_sorted_list = sorted(all_git_assets, key=lambda item: item.owner.login)
return final_sorted_list
def remove_failed_dirs(self, dirs_location: str, failed_dirs: List[str]):
"""Removes a directory if it fails a git operation due to
timing out or other errors so it can be retried on the next run.
"""
logger = woodchips.get(LOGGER_NAME)
def make_dir_writable(function, path, _):
"""The `.git` folder on Windows cannot be gracefully removed due to being read-only,
so we make the directory writable on a failure and retry the original function.
"""
os.chmod(path, stat.S_IWRITE)
function(path)
for directory in set(failed_dirs):
path = os.path.join(self.location, dirs_location, directory)
if os.path.exists(path):
logger.debug(f'Removing {directory} due to a failed git operation...')
shutil.rmtree(path, onerror=make_dir_writable)