/
dflat.py
467 lines (420 loc) · 15.5 KB
/
dflat.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
"""
An implementation of the Dflat and ReDD specifications from CDL for
versioning of digital objects.
"""
DFLAT_VERSION = '0.16'
DNATURAL_VERSION = '0.17'
REDD_VERSION = '0.1'
import os
import re
import sys
import time
import shutil
import hashlib
import logging
import namaste
import os.path
import datetime
import optparse
from functools import wraps
if sys.version_info.major >= 3:
from urllib.parse import quote, unquote
else:
from urllib import quote, unquote
# short alias for this since we call it a lot
j = os.path.join
_QUIET = False
def main():
"""Parse options and dispatch to the appropriate method."""
parser = _option_parser()
_, args = parser.parse_args()
try:
cmd = args[0]
except IndexError:
parser.error('no command specified')
home = _dflat_home(os.getcwd())
try:
version = args[1]
except IndexError:
# optional arg not passed
pass
if cmd == 'init':
init(os.getcwd())
elif cmd == 'help':
_print(parser.get_usage())
elif not home:
_print("not a dflat")
elif cmd == 'checkout':
checkout(home)
elif cmd == 'commit':
commit(home)
elif cmd == 'status':
status(home)
elif cmd == 'export':
export(home, version)
else:
_print("unknown command: %s" % cmd)
def lock(func):
"""Decorator for commands to obtain and release lock."""
@wraps(func)
def new_f(home, *args, **opts):
_get_lock(home, func)
result = func(home, *args, **opts)
_release_lock(home)
return result
return new_f
def log(func):
"""Decorator to log to the Dflat home log."""
@wraps(func)
def new_f(home, *args, **opts):
log_file = j(home, 'log', 'dflat.log')
_configure_logger(log_file)
result = func(home, *args, **opts)
return result
return new_f
@lock
def init(home):
"""Convert a directory into a Dflat directory."""
contents = [x for x in os.listdir(home) if x != 'lock.txt']
info = open(j(home, 'dflat-info.txt'), 'w')
namaste.dirtype(home, 'dflat_%s' % DFLAT_VERSION, verbose=False)
info.write(_anvl('Object-scheme', 'Dflat/%s' % DFLAT_VERSION))
info.write(_anvl('Manifest-scheme', 'Checkm/0.1'))
info.write(_anvl('Full-scheme', 'Dnatural/%s' % DNATURAL_VERSION))
info.write(_anvl('Delta-scheme', 'ReDD/%s' % REDD_VERSION))
info.write(_anvl('Current-scheme', 'file'))
info.write(_anvl('Class-scheme', 'CLOP/0.3'))
info.close()
os.mkdir(j(home, 'log'))
version = _new_version(home)
_set_current(home, version)
# move original inhabitants into their new apartment
for filename in contents:
os.rename(j(home, filename),
j(home, version, 'full', 'producer', filename))
_update_manifest(j(home, version))
# can't use decorator since the log directory doesn't exist when
# init is called
log_file = j(home, 'log', 'dflat.log')
_configure_logger(log_file)
logging.info("initialized dflat: %s", home)
@log
@lock
def checkout(home):
"""Check out a new version of the Dflat."""
current_version = _current_version(home)
new_version = _next_version(home)
if os.path.isdir(j(home, new_version)):
_print("%s already checked out" % new_version)
return new_version
_copy_tree(j(home, current_version), j(home, new_version))
logging.info('checked out new version %s', new_version)
_print("checked out %s" % new_version)
return new_version
@log
@lock
def commit(home): #, msg=None):
"""Commit a modified version to the Dflat."""
current_version = _current_version(home)
modified_version = _latest_version(home)
if current_version == modified_version:
_print("nothing to commit")
return
_update_manifest(j(home, modified_version))
delta = _delta(home, current_version, modified_version)
if not _has_changes(delta):
_print("no changes")
return
redd_home = j(home, current_version, 'delta')
os.mkdir(redd_home)
namaste.dirtype(redd_home, 'redd_%s' % REDD_VERSION, verbose=False)
changed = False
if len(delta['deleted']) > 0:
changed = True
os.mkdir(j(redd_home, 'add'))
for filename in delta['deleted']:
os.renames(j(home, current_version, 'full', filename),
j(redd_home, 'add', filename))
if len(delta['added']) > 0:
changed = True
delete = open(j(redd_home, 'delete.txt'), 'w')
for filename in delta['added']:
delete.write("%s\n" % quote(filename))
delete.close()
if len(delta['modified']) > 0:
changed = True
if not os.path.isdir(j(redd_home, 'add')):
os.mkdir(j(redd_home, 'add'))
delete = open(j(redd_home, 'delete.txt'), 'a')
for filename in delta['modified']:
delete.write("%s\n" % quote(filename))
os.renames(j(home, current_version, 'full', filename),
j(redd_home, 'add', filename))
delete.close()
shutil.rmtree(j(home, current_version, 'full'))
_set_current(home, modified_version)
if changed:
_update_manifest(j(home, current_version), is_delta=True)
logging.info('committed %s %s', modified_version, delta)
_print("committed %s" % modified_version)
return delta
# TODO: add lock decorator?
@log
def export(home, version):
"""Export the specified version of the Dflat."""
# validate specified version
versions = _versions(home)
if version not in versions:
raise Exception("version %s not found in %s" %
(version, ", ".join(versions)))
# copy the latest version
current_version = _current_version(home)
export_version = 'export-%s' % version
_copy_tree(j(home, current_version), j(home, export_version))
# walk back from latest version-1 to specified version, applying changes
delta_versions = _versions(home,
reverse=True,
from_version=current_version,
to_version=version)[1:]
# apply adds, deletes, and replaces
for delta in delta_versions:
# delete deleted files
if os.path.isfile(j(home, delta, 'delta', 'delete.txt')):
with open(j(home, delta, 'delta', 'delete.txt')) as f:
deletes = f.read().split()
for delete in deletes:
os.remove(j(home, export_version, 'full',
unquote(delete)))
# add added files
if os.path.isdir(j(home, delta, 'delta', 'add')):
for filename in os.listdir(j(home, delta, 'delta', 'add')):
_copy_tree(j(home, delta, 'delta', 'add', filename),
j(home, export_version, 'full', filename))
logging.info('exported version %s', version)
def status(home):
"""Print current status of the Dflat."""
_print("dflat home: %s" % home)
current_version = _current_version(home)
_print("current version: %s" % current_version)
latest_version = _latest_version(home)
_print("working version: %s" % latest_version)
if current_version == latest_version:
_print("no changes")
delta = None
else:
_update_manifest(j(home, latest_version))
delta = _delta(home, current_version, latest_version)
_print_delta_files(delta, 'added')
_print_delta_files(delta, 'modified')
_print_delta_files(delta, 'deleted')
return delta
def _update_manifest(version_dir, is_delta=False):
"""Update the manifest for a specific version of the Dflat."""
if is_delta:
container_dir = j(version_dir, 'delta')
manifest_file = j(version_dir, 'd-manifest.txt')
else:
container_dir = j(version_dir, 'full')
manifest_file = j(version_dir, 'manifest.txt')
manifest = open(manifest_file, 'w')
for dirpath, _, filenames in os.walk(container_dir):
for filename in filenames:
if dirpath != 'full' and filename in ('manifest.txt', 'lock.txt'):
continue
# make the filename relative to the container directory
dirpath = re.sub(r'^%s/?' % container_dir, '', dirpath)
md5 = _md5(j(container_dir, dirpath, filename))
filename = quote(j(dirpath, filename))
manifest.write("%s md5 %s\n" % (filename, md5))
manifest.close()
return manifest_file
def _current_version(home):
"""Return the current version of the Dflat."""
current_file = j(home, 'current.txt')
if os.path.isfile(current_file):
with open(current_file, 'r') as f:
return f.read()
return None
def _anvl(name, value):
"""Encode a name-value pair as an ANVL string."""
return "%s: %s\n" % (name, value)
def _get_lock(home, caller):
"""Obtain a LockIt lock."""
# TODO: log this operation?
lockfile = j(home, 'lock.txt')
if os.path.isfile(lockfile):
raise Exception("already locked")
timestamp = _rfc3339(datetime.datetime.now())
agent = "dflat-%s" % caller.__name__
lockfile = open(lockfile, 'w')
lockfile.write("Lock: %s %s\n" % (timestamp, agent))
lockfile.close()
def _release_lock(home):
"""Release a LockIt lock."""
# TODO: log this operation?
lockfile = j(home, 'lock.txt')
if not os.path.isfile(lockfile):
return
os.remove(lockfile)
def _new_version(home):
"""Create base directories for a new full version of the Dflat."""
version = _next_version(home)
os.mkdir(j(home, version))
os.mkdir(j(home, version, 'full'))
namaste.dirtype(j(home, version, 'full'), 'dnatural_%s' % DNATURAL_VERSION,
verbose=False)
os.mkdir(j(home, version, 'full', 'producer'))
open(j(home, version, 'manifest.txt'), 'w').close()
return version
def _next_version(home):
"""Return the name of the version following the current version."""
version = _current_version(home)
if version == None:
return 'v001'
else:
return 'v%03i' % (_version_number(version) + 1)
def _latest_version(home):
"""Return the name of the latest version in the Dflat."""
versions = _versions(home)
if len(versions) == 0:
return None
else:
return versions.pop()
def _versions(home, reverse=False, from_version=None, to_version=None):
"""Return an array of all versions in the Dflat."""
versions = [x for x in os.listdir(home) if re.match(r'^v\d+$', x)]
if from_version:
versions = [x for x in versions
if _version_number(x) <= _version_number(from_version)]
if to_version:
versions = [x for x in versions
if _version_number(x) >= _version_number(to_version)]
#versions.sort(lambda a, b: cmp(_version_number(a), _version_number(b)))
versions.sort(key=_version_number)
if reverse:
#versions.sort(lambda a, b: cmp(_version_number(b), _version_number(a)))
versions.sort(key=_version_number, reverse=True)
return versions
def _version_number(version_dir):
"""Convert a version directory name to an integer."""
return int(version_dir[1:])
def _md5(filename):
"""Helper method to checksum files for a Dflat manifest."""
with open(filename, 'rb') as f:
md5 = hashlib.md5()
while True:
byte_string = f.read(0x1000)
if not byte_string:
break
md5.update(byte_string)
f.close()
return md5.hexdigest()
def _delta(home, old_version, new_version):
"""
Determine which files must be added to or removed from an old version to
obtain a new version.
"""
delta = {'modified': [], 'deleted': [], 'added': []}
manifest_old_version = _manifest_dict(home, old_version)
manifest_new_version = _manifest_dict(home, new_version)
for filename in list(manifest_new_version.keys()):
if filename in manifest_old_version:
if manifest_new_version[filename] != manifest_old_version[filename]:
delta['modified'].append(filename)
else:
delta['added'].append(filename)
for filename in list(manifest_old_version.keys()):
if filename not in manifest_new_version:
delta['deleted'].append(filename)
return delta
def _print_delta_files(delta, dtype):
"""Print the files which appear in a delta between Dflat versions."""
files = delta[dtype]
files.sort()
if len(files) > 0:
_print("%s:" % dtype)
for filename in files:
_print(" %s" % unquote(filename))
def _has_changes(delta):
"""Does the delta contain any changes?"""
for value in list(delta.values()):
if len(value) > 0:
return True
return False
def _manifest_dict(home, version):
"""Parse a Checkm manifest into a dictionary."""
manifest_dict = {}
with open(j(home, version, 'manifest.txt')) as f:
for line in f:
if line.startswith('#'):
continue
cols = line.split()
manifest_dict[unquote(cols[0])] = cols[2]
return manifest_dict
def _dflat_home(directory):
"""
Return the absolute path of the Dflat containing the given directory,
if any.
"""
if 'dflat-info.txt' in os.listdir(directory):
return os.path.abspath(directory)
elif directory == '/':
return None
else:
return _dflat_home(os.path.abspath(os.path.dirname(directory)))
def _option_parser():
"""Construct the option parser."""
parser = optparse.OptionParser(usage='''usage: %prog <command> [args]
commands:
init initialize current working directory as a dflat
checkout check out a new version of the dflat for modification
commit commit new version as the current version of the object
status report uncommitted changes to the dflat in the current directory
export export the current version of the dflat into a new directory''')
return parser
def _set_current(home, version):
"""Update the Dflat with the current version label."""
with open(j(home, 'current.txt'), 'w') as f:
f.write(version)
def _configure_logger(filename):
"""Configure the logger."""
timezone = _timezone()
logging.basicConfig(filename=filename,
level=logging.INFO,
format='%(asctime)s %(levelname)-8s %(message)s',
datefmt='%Y-%m-%dT%H:%M:%S'+timezone)
def _timezone():
"""Return the timezone."""
if time.daylight:
utcoffset = -time.altzone
else:
utcoffset = -time.timezone
hours = int(float(utcoffset)) // 3600
minutes = abs(utcoffset) % 3600 // 60
return '%+03d:%02d' % (hours, minutes)
def _rfc3339(dt):
"""Convert a datetime into an RFC 3339-formatted timestamp."""
return dt.strftime('%Y-%m-%dT%H:%M:%S') + _timezone()
def _print(msg):
"""Print messages when in verbose mode."""
if not _QUIET:
print(msg)
def _copy_tree(src_dir, dest_dir):
"""
Replacement for shutil.copytree that will copy directories that already
exist.
"""
# shutil.copytree doesn't like copying directories that already exist
# so here's a new one
if not os.path.exists(dest_dir):
os.mkdir(dest_dir)
for filename in os.listdir(src_dir):
src = j(src_dir, filename)
dest = j(dest_dir, filename)
if os.path.isdir(src):
if not os.path.exists(dest):
os.mkdir(dest)
shutil.copystat(src, dest) # preserve permissions manually
_copy_tree(src, dest)
else:
shutil.copy2(src, dest) # copy2 preserves permissions