/
project.py
269 lines (236 loc) · 10.3 KB
/
project.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
import os
import functools
import tempfile
import pkgutil
import boto3
import botocore
import click
import humanize
import s3sup.catalogue
import s3sup.fileprepper
import s3sup.rules
import s3sup.utils
def load_skeleton_s3sup_toml():
return pkgutil.get_data(__package__, 'skeleton.s3sup.toml')
class Project:
def __init__(self, local_project_root, dryrun=False,
preserve_deleted_files=False, verbose=True):
self.dryrun = dryrun
self.verbose = verbose
self.local_project_root = local_project_root
try:
self.rules = s3sup.rules.load_rules(os.path.join(
local_project_root, 's3sup.toml'))
except FileNotFoundError:
error_text = (
'\n{0} not an s3sup project directory (no s3sup.toml found). '
'Either:\n'
' * Change to an s3sup project directory before running.\n'
' * Supply project directory using -p/--projectdir.\n'
' * Create a new s3sup project direction using "s3sup init".'
).format(os.path.abspath(local_project_root))
raise click.FileError(
os.path.join(local_project_root, 's3sup.toml'),
hint=error_text)
self._preserve_deleted_files = preserve_deleted_files
try:
self._preserve_deleted_files = (
preserve_deleted_files or self.rules['preserve_deleted_files'])
except KeyError:
pass
self._fp_cache = {}
self.local_preflight_checks()
def _boto_bucket(self):
s = boto3.session.Session()
res_args = {}
try:
res_args['region_name'] = self.rules['aws']['region_name']
except KeyError:
pass
try:
res_args['endpoint_url'] = self.rules['aws']['s3_endpoint_url']
except KeyError:
pass
r = s.resource(service_name='s3', **res_args)
b = r.Bucket(self.rules['aws']['s3_bucket_name'])
return r, b
def file_prepper_wrapped(self, path):
try:
return self._fp_cache[path]
except KeyError:
self._fp_cache[path] = s3sup.fileprepper.FilePrepper(
self.local_project_root, path, self.rules)
return self._fp_cache[path]
def _local_fs_path(self, rel_path):
return os.path.join(self.local_project_root, rel_path)
def local_preflight_checks(self):
"""
Can't stop things going wrong during the upload, but having a good poke
round catches most problems.
"""
return True
def remote_preflight_checks(self):
"""
Can't stop things going wrong during the upload, but having a good poke
round catches most problems.
"""
rmt_cat_fp = self.file_prepper_wrapped('.s3sup.write_test')
rsrc, b = self._boto_bucket()
o = b.Object(rmt_cat_fp.s3_path())
try:
o.put(Body='Can s3sup write to bucket?', ACL='private')
except rsrc.meta.client.exceptions.NoSuchBucket:
raise click.ClickException('S3 bucket does not exist: {0}'.format(
self.rules['aws']['s3_bucket_name']))
o.delete()
@functools.lru_cache(maxsize=8)
def local_catalogue(self):
local_cat = s3sup.catalogue.Catalogue(
preserve_deleted_files=self._preserve_deleted_files)
for root, dirs, files in os.walk(self.local_project_root):
for f in files:
if f == 's3sup.toml':
continue
abs_path = os.path.join(root, f)
rel_path = os.path.relpath(
abs_path, start=self.local_project_root)
fp = self.file_prepper_wrapped(rel_path)
local_cat.add_file(
rel_path, fp.content_hash(), fp.attributes_hash())
return local_cat
@functools.lru_cache(maxsize=8)
def get_remote_catalogue(self):
remote_cat = s3sup.catalogue.Catalogue(
preserve_deleted_files=self._preserve_deleted_files)
_, b = self._boto_bucket()
old_cat_fp = self.file_prepper_wrapped('.s3sup.catalogue.csv')
old_f = b.Object(old_cat_fp.s3_path())
new_cat_fp = self.file_prepper_wrapped('.s3sup.cat')
new_f = b.Object(new_cat_fp.s3_path())
hndl, tmpp = tempfile.mkstemp()
os.close(hndl)
try:
new_f.download_file(tmpp)
remote_cat.from_sqlite(tmpp)
except botocore.exceptions.NoCredentialsError:
raise click.UsageError(
'Cannot find AWS credentials.\n -> Configure AWS credentials '
' using any method that the underlying boto3 library supports:'
'\n -> https://boto3.amazonaws.com/v1/documentation/'
'api/latest/guide/configuration.html')
except botocore.exceptions.ClientError:
if self.verbose:
click.echo(
('Could not find SQLite based remote catalogue on S3 '
'(expected at {0}).').format(new_cat_fp.s3_path()))
try:
old_f.download_file(tmpp)
remote_cat.from_csv(tmpp)
click.echo(click.style((
'WARNING: After the next s3sup push, do not attempt to '
'use older versions of s3sup (0.3.0 or below) with this '
'project, as they will no longer be able to read the '
'remote catalogue.'), fg='blue'))
except botocore.exceptions.ClientError:
if self.verbose:
click.echo(
('Could not find older CSV based remote catalogue on '
'S3 either (expected at {0}). This indicates the '
'project has never been pushed to S3 before.').format(
old_cat_fp.s3_path()))
pass
pass
os.remove(tmpp)
return remote_cat
def write_remote_catalogue(self, catalogue):
hndl, tmpp = tempfile.mkstemp()
os.close(hndl)
catalogue.to_sqlite(tmpp)
rmt_cat_fp = self.file_prepper_wrapped('.s3sup.cat')
_, b = self._boto_bucket()
o = b.Object(rmt_cat_fp.s3_path())
with open(tmpp, 'rb') as lf:
o.put(Body=lf, ACL='private')
os.remove(tmpp)
# Deliberately break older s3sup clients <= 0.3.0.
# This file even needs uploading even for projects that have never used
# the old format, just in-case an old version of s3sup is used on it
# in the future (perhaps if part of a CI/CD system is used).
old_rmt_cat_fp = self.file_prepper_wrapped('.s3sup.catalogue.csv')
the_breaker = (
b'\xF9\xF9This is a deliberately corrupt old version of the s3sup '
b'catalogue format It is not used any more and this file is only '
b'here to cause s3sup clients <= 0.3.0 to fail, rather than have '
b'them try to upload everything again.')
b.Object(old_rmt_cat_fp.s3_path()).put(Body=the_breaker, ACL='private')
def calculate_diff(self):
local_cat = self.local_catalogue()
remote_cat = self.get_remote_catalogue()
diff, new_remote_cat = local_cat.diff_dict(remote_cat)
return (diff, new_remote_cat)
def sync(self):
self.remote_preflight_checks()
diff, new_remote_cat = self.calculate_diff()
changes = s3sup.catalogue.change_list(diff)
changes_with_prep = [
(cr, p, self.file_prepper_wrapped(p)) for cr, p in changes]
if len(changes) <= 0:
return changes
if self.dryrun:
click.echo(click.style(
'Not making any changes as this is a dry run.', fg='blue'))
return changes
_, b = self._boto_bucket()
def display_current(item):
if item is None:
return ''
cr, p, fp = item
crs = s3sup.catalogue.CR_STYLES[cr]
change_symbol = click.style(
'{symbol}'.format(symbol=getattr(crs, 'symbol')),
fg=getattr(crs, 'colour'))
cur = ' {0} {1}'.format(change_symbol, fp.s3_path())
if (cr == s3sup.catalogue.ChangeReason['NEW_FILE'] or
cr == s3sup.catalogue.ChangeReason['CONTENT_CHANGED']):
cur += ' ({0})'.format(humanize.naturalsize(fp.size()))
return cur
with click.progressbar(changes_with_prep, label='Syncing to S3',
item_show_func=display_current) as bar:
for cr, p, fp in bar:
o = b.Object(fp.s3_path())
if cr == s3sup.catalogue.ChangeReason['NEW_FILE']:
with fp.content_fileobj() as lf:
o.put(Body=lf, **fp.attributes_as_boto_args())
elif cr == s3sup.catalogue.ChangeReason['CONTENT_CHANGED']:
with fp.content_fileobj() as lf:
o.put(Body=lf, **fp.attributes_as_boto_args())
elif cr == s3sup.catalogue.ChangeReason['ATTRIBUTES_CHANGED']:
o.copy_from(
CopySource={
'Bucket': self.rules['aws']['s3_bucket_name'],
'Key': fp.s3_path()},
MetadataDirective='REPLACE',
TaggingDirective='REPLACE',
**fp.attributes_as_boto_args())
elif cr == s3sup.catalogue.ChangeReason['DELETED']:
o.delete()
else:
raise Exception('Unknown ChangeReason: {0}'.format(cr))
self.write_remote_catalogue(new_remote_cat)
return changes
def print_summary(self):
lcl_dir = click.format_filename(self.local_project_root)
if lcl_dir == '.':
lcl_dir += ' (current dir)'
s3p = 's3://{0}/'.format(self.rules['aws']['s3_bucket_name'])
try:
s3p += self.rules['aws']['s3_project_root'].lstrip('/').rstrip('/')
except KeyError:
pass
to_print = {
'Local project dir': lcl_dir,
'AWS region': self.rules['aws']['region_name'],
'S3 bucket': s3p
}
s3sup.utils.pprint_h1('PROJECT INFORMATION')
s3sup.utils.pprint_dict(to_print)