public
Description: Python backup classes to backup files to key/value store, such as Amazon S3
Homepage: http://www.1729.com/software/keevalbak/index.html
Clone URL: git://github.com/pdorrell/keevalbak.git
keevalbak / keevalbak / BackupOperations.py
27d6c3a9 » Philip Dorrell 2008-06-01 init 1 # Copyright (c) 2008 Philip Dorrell, http://www.1729.com/
2 #
3 # Permission is hereby granted, free of charge, to any person obtaining a copy
4 # of this software and associated documentation files (the "Software"), to deal
5 # in the Software without restriction, including without limitation the rights
6 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 # copies of the Software, and to permit persons to whom the Software is
8 # furnished to do so, subject to the following conditions:
9 #
10 # The above copyright notice and this permission notice shall be included in
11 # all copies or substantial portions of the Software.
12 #
13 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 # THE SOFTWARE.
20
21 import yaml
22 import hashlib
23 import os
24 import time
25 import datetime
26 import shutil
27 import CompareDirectories
28 import re
29 from sets import Set
30
31 def readFileBytes(filename):
44f6addd » Philip Dorrell 2008-06-04 pydocs, missing license and... 32 """Read named file and return contents as a byte string"""
27d6c3a9 » Philip Dorrell 2008-06-01 init 33 f = file(filename, "rb")
34 bytes = f.read()
35 f.close()
36 return bytes
37
38 def writeFileBytes(filename, bytes):
44f6addd » Philip Dorrell 2008-06-04 pydocs, missing license and... 39 """Write byte string as new contents of named file"""
27d6c3a9 » Philip Dorrell 2008-06-01 init 40 f = file(filename, "wb")
41 f.write(bytes)
42 f.close()
43
5411dee5 » Philip Dorrell 2008-07-17 version checks 44 BackupsVersion = 2
b2451d80 » Philip Dorrell 2008-06-29 make deleteMapValues multi-... 45
27d6c3a9 » Philip Dorrell 2008-06-01 init 46 class PathSummary(object):
44f6addd » Philip Dorrell 2008-06-04 pydocs, missing license and... 47 """Information about a file or directory specified as a relative path within some base directory
48 Note: all paths are '/' separated, whether or not we are in Microsoft Windows"""
27d6c3a9 » Philip Dorrell 2008-06-01 init 49 def __init__(self, relativePath):
50 self.relativePath = relativePath
51
52 def fullPath(self, basePath):
44f6addd » Philip Dorrell 2008-06-04 pydocs, missing license and... 53 """Return the full path given the path of the base directory"""
27d6c3a9 » Philip Dorrell 2008-06-01 init 54 return basePath + self.relativePath
55
56 @staticmethod
57 def fromYamlData(data):
44f6addd » Philip Dorrell 2008-06-04 pydocs, missing license and... 58 """Convert YAML data into FileSummary or DirSummary (inverse of toYamlData methods)"""
27d6c3a9 » Philip Dorrell 2008-06-01 init 59 pathType = data["type"]
60 if pathType == "file":
61 return FileSummary.fromYamlData(data)
62 elif pathType == "dir":
63 return DirSummary.fromYamlData(data)
64 else:
65 raise "Unknown path type: %s" % pathType
66
67 class FileSummary(PathSummary):
44f6addd » Philip Dorrell 2008-06-04 pydocs, missing license and... 68 """Information about a file specified as a relative path within some (unspecified) base directory,
69 including a SHA1 hash of the file's contents."""
74e704c5 » Philip Dorrell 2008-07-17 remove "written" attribute ... 70 def __init__(self, relativePath, hash):
27d6c3a9 » Philip Dorrell 2008-06-01 init 71 super(FileSummary, self).__init__(relativePath)
72 self.isDir = False
73 self.isFile = True
74 self.hash = hash
75
128f941f » Philip Dorrell 2008-06-25 Make unicode file names work. 76 def __unicode__(self):
74e704c5 » Philip Dorrell 2008-07-17 remove "written" attribute ... 77 return u"FILE: %r : %s" % (self.relativePath, self.hash)
27d6c3a9 » Philip Dorrell 2008-06-01 init 78
2f24c84b » Philip Dorrell 2008-06-28 __repr__ for FileSummary,Di... 79 def __repr__(self):
80 return self.__unicode__()
81
27d6c3a9 » Philip Dorrell 2008-06-01 init 82 def toYamlData(self):
44f6addd » Philip Dorrell 2008-06-04 pydocs, missing license and... 83 """Convert to YAML"""
27d6c3a9 » Philip Dorrell 2008-06-01 init 84 return {"type": "file",
85 "path": self.relativePath,
74e704c5 » Philip Dorrell 2008-07-17 remove "written" attribute ... 86 "hash": self.hash }
27d6c3a9 » Philip Dorrell 2008-06-01 init 87
88 @staticmethod
89 def fromYamlData(data):
44f6addd » Philip Dorrell 2008-06-04 pydocs, missing license and... 90 """Create from YAML (inverse of toYamlData)"""
74e704c5 » Philip Dorrell 2008-07-17 remove "written" attribute ... 91 return FileSummary(data["path"], data["hash"])
27d6c3a9 » Philip Dorrell 2008-06-01 init 92
93 class DirSummary(PathSummary):
44f6addd » Philip Dorrell 2008-06-04 pydocs, missing license and... 94 """Information about a file specified as a relative path within some (unspecified) base directory"""
27d6c3a9 » Philip Dorrell 2008-06-01 init 95 def __init__(self, relativePath):
96 super(DirSummary, self).__init__(relativePath)
97 self.isDir = True
98 self.isFile = False
99
128f941f » Philip Dorrell 2008-06-25 Make unicode file names work. 100 def __unicode__(self):
5a35cb97 » Philip Dorrell 2008-06-25 fix logging errors 101 return u"DIR: %r" % (self.relativePath)
27d6c3a9 » Philip Dorrell 2008-06-01 init 102
103 def toYamlData(self):
44f6addd » Philip Dorrell 2008-06-04 pydocs, missing license and... 104 """Convert to YAML"""
27d6c3a9 » Philip Dorrell 2008-06-01 init 105 return {"type": "dir",
106 "path": self.relativePath
107 }
108
2f24c84b » Philip Dorrell 2008-06-28 __repr__ for FileSummary,Di... 109 def __repr__(self):
110 return self.__unicode__()
111
27d6c3a9 » Philip Dorrell 2008-06-01 init 112 @staticmethod
113 def fromYamlData(data):
44f6addd » Philip Dorrell 2008-06-04 pydocs, missing license and... 114 """Create from YAML (inverse of toYamlData)"""
27d6c3a9 » Philip Dorrell 2008-06-01 init 115 return DirSummary(data["path"])
84b538f0 » Philip Dorrell 2008-06-06 implement verifyIncremental... 116
117 def sha1Digest(content):
118 return hashlib.sha1(content).hexdigest()
27d6c3a9 » Philip Dorrell 2008-06-01 init 119
120 class DirectoryInfo:
44f6addd » Philip Dorrell 2008-06-04 pydocs, missing license and... 121 """Information about all the directories and files within a base directory
122 All directories are listed before any subdirectories or files contained within them.
123 """
27d6c3a9 » Philip Dorrell 2008-06-01 init 124 def __init__(self, path):
44f6addd » Philip Dorrell 2008-06-04 pydocs, missing license and... 125 """Construct from path base directory"""
128f941f » Philip Dorrell 2008-06-25 Make unicode file names work. 126 self.path = unicode(path)
27d6c3a9 » Philip Dorrell 2008-06-01 init 127 self.pathSummaries = []
128f941f » Philip Dorrell 2008-06-25 Make unicode file names work. 128 self.summarizeSubDir(u"")
27d6c3a9 » Philip Dorrell 2008-06-01 init 129
44f6addd » Philip Dorrell 2008-06-04 pydocs, missing license and... 130 def createDirSummary(self, relativePath):
131 """Create a path summary for a sub-directory"""
27d6c3a9 » Philip Dorrell 2008-06-01 init 132 return DirSummary (relativePath)
133
44f6addd » Philip Dorrell 2008-06-04 pydocs, missing license and... 134 def createFileSummary(self, relativePath):
135 """Create a path summary for a file in the base directory"""
27d6c3a9 » Philip Dorrell 2008-06-01 init 136 fileName = self.path + relativePath
137 content = readFileBytes(fileName)
84b538f0 » Philip Dorrell 2008-06-06 implement verifyIncremental... 138 fileHash = sha1Digest(content)
139 return FileSummary (relativePath, fileHash)
27d6c3a9 » Philip Dorrell 2008-06-01 init 140
141 def addSummary(self, pathSummary):
44f6addd » Philip Dorrell 2008-06-04 pydocs, missing license and... 142 """Add a path summary"""
128f941f » Philip Dorrell 2008-06-25 Make unicode file names work. 143 print u"%r" % pathSummary
27d6c3a9 » Philip Dorrell 2008-06-01 init 144 self.pathSummaries.append (pathSummary)
145
146 def getPathSummariesYamlData(self):
44f6addd » Philip Dorrell 2008-06-04 pydocs, missing license and... 147 """Return array of path summaries as YAML data"""
27d6c3a9 » Philip Dorrell 2008-06-01 init 148 return [summary.toYamlData() for summary in self.pathSummaries]
149
150 def summarizeSubDir(self, relativePath):
44f6addd » Philip Dorrell 2008-06-04 pydocs, missing license and... 151 """Recursively summarize a sub-directory specified by it's relative path,
152 adding the path summaries for all contained files and sub-directories to the list of path summaries."""
27d6c3a9 » Philip Dorrell 2008-06-01 init 153 for childName in os.listdir(self.path + relativePath):
154 childRelativePath = relativePath + "/" + childName;
155 childPath = self.path + childRelativePath
156 if os.path.isfile(childPath):
44f6addd » Philip Dorrell 2008-06-04 pydocs, missing license and... 157 self.addSummary(self.createFileSummary(childRelativePath))
27d6c3a9 » Philip Dorrell 2008-06-01 init 158 elif os.path.isdir(childPath):
44f6addd » Philip Dorrell 2008-06-04 pydocs, missing license and... 159 self.addSummary(self.createDirSummary(childRelativePath))
27d6c3a9 » Philip Dorrell 2008-06-01 init 160 self.summarizeSubDir (childRelativePath)
161 else:
5a35cb97 » Philip Dorrell 2008-06-25 fix logging errors 162 print "UNKNOWN OBJECT %r in %r" % (childName, self.path + relativePath)
27d6c3a9 » Philip Dorrell 2008-06-01 init 163
164 class HashVerificationRecords(object):
44f6addd » Philip Dorrell 2008-06-04 pydocs, missing license and... 165 """Records of verified hashes of backed up files (i.e. verified by actually reading
166 the file content out of the backup map and recalculating the hash).
167 Note that this class is not yet used, and nothing is yet writing the verification records
168 into the backup map."""
169
27d6c3a9 » Philip Dorrell 2008-06-01 init 170 def __init__(self, backupMap):
171 self.backupMap = backupMap
172 self.datetimeFileHashesMap = {}
173 self.datetimeUpdated = Set()
174
84b538f0 » Philip Dorrell 2008-06-06 implement verifyIncremental... 175 def getFileHashesMap(self, datetime):
27d6c3a9 » Philip Dorrell 2008-06-01 init 176 if datetime in self.datetimeFileHashesMap:
177 fileHashesMap = self.datetimeFileHashesMap[datetime]
178 else:
84b538f0 » Philip Dorrell 2008-06-06 implement verifyIncremental... 179 fileHashesRecordFilename = datetime + "/verifiedFileHashes.yaml"
27d6c3a9 » Philip Dorrell 2008-06-01 init 180 if fileHashesRecordFilename in self.backupMap:
181 fileHashesMap = yaml.safe_load(self.backupMap[fileHashesRecordFilename])
182 else:
183 fileHashesMap = {}
184 self.datetimeFileHashesMap[datetime] = fileHashesMap
84b538f0 » Philip Dorrell 2008-06-06 implement verifyIncremental... 185 return fileHashesMap
186
fabed135 » Philip Dorrell 2008-06-28 compute digest before markV... 187 def markVerified(self, datetime, filePath, contentHash):
84b538f0 » Philip Dorrell 2008-06-06 implement verifyIncremental... 188 fileHashesMap = self.getFileHashesMap(datetime)
fabed135 » Philip Dorrell 2008-06-28 compute digest before markV... 189 fileHashesMap[filePath] = contentHash
84b538f0 » Philip Dorrell 2008-06-06 implement verifyIncremental... 190 self.datetimeUpdated.add (datetime)
191
192 def getWrittenFileHash(self, datetime, filePath):
193 """Get the hash of a backed up file, either from an existing hash verification record,
194 or, read the file contents from the backup map and calculate the hash."""
195 fileHashesMap = self.getFileHashesMap(datetime)
27d6c3a9 » Philip Dorrell 2008-06-01 init 196 if filePath in fileHashesMap:
197 return fileHashesMap[filePath]
198 else:
199 content = self.backupMap[datetime + "/files" + filePath]
fabed135 » Philip Dorrell 2008-06-28 compute digest before markV... 200 contentHash = sha1Digest(content)
201 self.markVerified(datetime, filePath, contentHash)
202 return contentHash
27d6c3a9 » Philip Dorrell 2008-06-01 init 203
84b538f0 » Philip Dorrell 2008-06-06 implement verifyIncremental... 204 def updateRecords(self):
44f6addd » Philip Dorrell 2008-06-04 pydocs, missing license and... 205 """Update any newly verified hashes back into the backup map."""
84b538f0 » Philip Dorrell 2008-06-06 implement verifyIncremental... 206 print "Verified hashes were updated for %r" % self.datetimeUpdated
27d6c3a9 » Philip Dorrell 2008-06-01 init 207 for datetime in self.datetimeUpdated:
84b538f0 » Philip Dorrell 2008-06-06 implement verifyIncremental... 208 fileHashesRecordFilename = datetime + "/verifiedFileHashes.yaml"
27d6c3a9 » Philip Dorrell 2008-06-01 init 209 print "Updating verification records for %s = %s" % (datetime,
210 self.datetimeFileHashesMap[datetime])
128f941f » Philip Dorrell 2008-06-25 Make unicode file names work. 211 self.backupMap[fileHashesRecordFilename] = yaml.safe_dump (self.datetimeFileHashesMap[datetime])
27d6c3a9 » Philip Dorrell 2008-06-01 init 212
213 class BackupRecord:
44f6addd » Philip Dorrell 2008-06-04 pydocs, missing license and... 214 """A record of a backup made: it's date/time, and whether it was full or incremental."""
7ad6bad4 » Philip Dorrell 2008-06-09 restore details for specifi... 215 def __init__(self, type, datetime, completed):
44f6addd » Philip Dorrell 2008-06-04 pydocs, missing license and... 216 """construct from 'full' or 'incremental' and the date time"""
27d6c3a9 » Philip Dorrell 2008-06-01 init 217 self.type = type
218 self.datetime = datetime
7ad6bad4 » Philip Dorrell 2008-06-09 restore details for specifi... 219 self.completed = completed
220
27d6c3a9 » Philip Dorrell 2008-06-01 init 221 @staticmethod
222 def fromYamlData(data):
44f6addd » Philip Dorrell 2008-06-04 pydocs, missing license and... 223 """Construct backup record from YAML data (inverse of toYamlData)"""
7ad6bad4 » Philip Dorrell 2008-06-09 restore details for specifi... 224 # completed defaults to True because previous version of keevalback only recorded when complete
225 return BackupRecord(data["type"], data["datetime"], data.get("completed", True))
27d6c3a9 » Philip Dorrell 2008-06-01 init 226
227 def toYamlData(self):
44f6addd » Philip Dorrell 2008-06-04 pydocs, missing license and... 228 """Convert to data to be stored in YAML"""
7ad6bad4 » Philip Dorrell 2008-06-09 restore details for specifi... 229 return {"type": self.type, "datetime": self.datetime, "completed": self.completed}
bce0bbdd » Philip Dorrell 2008-06-03 implement listBackups and p... 230
231 def isFull(self):
232 return self.type == "full"
27d6c3a9 » Philip Dorrell 2008-06-01 init 233
234 def __str__(self):
1342abd3 » Philip Dorrell 2008-06-17 record incomplete backup & ... 235 return "[Backup: %s %s %s]" % (self.type, self.datetime, self.completed and "complete" or "INCOMPLETE")
27d6c3a9 » Philip Dorrell 2008-06-01 init 236
237 def __repr__(self):
238 return self.__str__()
239
5411dee5 » Philip Dorrell 2008-07-17 version checks 240 class InvalidBackupsVersion(Exception):
241 def __init__(self, backupRecord, version):
242 Exception.__init__(self, "Invalid backup for backup record %s version %d (this version = %d)" %
243 (backupRecord, version, BackupsVersion))
244 self.version = version
245
246 def getBackupsVersion(backupMap, backupRecord):
247 versionKey = backupRecord.datetime + "/version"
248 if versionKey in backupMap:
249 return int(backupMap[versionKey])
250 else:
251 return 1
252
253 def checkVersion(backupMap, backupRecord):
254 version = getBackupsVersion (backupMap, backupRecord)
255 if version != BackupsVersion:
256 raise InvalidBackupsVersion (backupRecord, version)
257
27d6c3a9 » Philip Dorrell 2008-06-01 init 258 class WrittenRecords:
44f6addd » Philip Dorrell 2008-06-04 pydocs, missing license and... 259 """Records of where file contents with a given SHA1 hash value was written to in backup map
260 (within the context of a particular set of backups, i.e. a full and following incrementals)"""
27d6c3a9 » Philip Dorrell 2008-06-01 init 261 def __init__(self):
262 self.written = {}
263
264 def recordHashWritten(self, hash, key):
44f6addd » Philip Dorrell 2008-06-04 pydocs, missing license and... 265 """Record that a contents with a particular hash were written to a particular key"""
128f941f » Philip Dorrell 2008-06-25 Make unicode file names work. 266 print " record hash %s written to %r" % (hash, key)
27d6c3a9 » Philip Dorrell 2008-06-01 init 267 self.written[hash] = key
268
269 def isWritten(self, hash):
44f6addd » Philip Dorrell 2008-06-04 pydocs, missing license and... 270 """Has a file contents with this hash value been written to the backup map?"""
27d6c3a9 » Philip Dorrell 2008-06-01 init 271 return hash in self.written
272
273 def locationWritten(self, hash):
44f6addd » Philip Dorrell 2008-06-04 pydocs, missing license and... 274 """Where a file contents with this hash value was written to"""
27d6c3a9 » Philip Dorrell 2008-06-01 init 275 return self.written[hash]
276
277 def recordBackup(self, backupMap, backupRecord):
44f6addd » Philip Dorrell 2008-06-04 pydocs, missing license and... 278 """For every file contents in a backup record recorded as written, record it's
d973ff37 » Philip Dorrell 2008-07-15 record written files separa... 279 hash value and backup map key in the written records.""" # todo: slow
5411dee5 » Philip Dorrell 2008-07-17 version checks 280 checkVersion(backupMap, backupRecord)
d6dc2e0e » Philip Dorrell 2008-07-15 use writtenPathList when av... 281 writtenPathListKey = backupRecord.datetime + "/writtenPathList"
639a9d6c » Philip Dorrell 2008-07-17 fix incremental backup with... 282 writtenFileSummariesYamlData = yaml.safe_load (backupMap[writtenPathListKey])
283 for fileData in writtenFileSummariesYamlData:
9505b90f » Philip Dorrell 2008-06-09 comment out logging 284 #print "Recording backup data %s/%r" % (backupRecord.datetime, pathData)
639a9d6c » Philip Dorrell 2008-07-17 fix incremental backup with... 285 self.recordHashWritten (fileData["hash"], backupRecord.datetime + fileData["path"])
27d6c3a9 » Philip Dorrell 2008-06-01 init 286
287 def recordPreviousBackups(self, backupMap, backupRecords):
44f6addd » Philip Dorrell 2008-06-04 pydocs, missing license and... 288 """Record the hashes of all files written from the last full backup onwards (or from the first
289 backup if for some reason there is no full backup."""
27d6c3a9 » Philip Dorrell 2008-06-01 init 290 fullFound = False
291 i = len(backupRecords)-1
292 while not fullFound and i >= 0:
293 backupRecord = backupRecords[i]
294 print "Recording backup %r ..." % backupRecord
295 self.recordBackup(backupMap, backupRecord)
296 if backupRecord.type == "full":
297 fullFound = True
298 i -= 1
299
300 class BaseFileHash(object):
44f6addd » Philip Dorrell 2008-06-04 pydocs, missing license and... 301 """Description of a file: it's (basic) name and hash"""
84b538f0 » Philip Dorrell 2008-06-06 implement verifyIncremental... 302 def __init__(self, name, hash, description):
27d6c3a9 » Philip Dorrell 2008-06-01 init 303 self.name = name
304 self.hash = hash
84b538f0 » Philip Dorrell 2008-06-06 implement verifyIncremental... 305 self.description = description
306
307 def isDir(self):
308 return False
27d6c3a9 » Philip Dorrell 2008-06-01 init 309
310 def printIndented(self, indent):
5a35cb97 » Philip Dorrell 2008-06-25 fix logging errors 311 print "%sFile %r: %s" % (indent, self.name, self.hash)
84b538f0 » Philip Dorrell 2008-06-06 implement verifyIncremental... 312
313 def compareToOtherFileHash (self, otherFileHash, indent, log, logDiff):
314 if self.hash != otherFileHash.hash:
bb1f92b7 » Philip Dorrell 2008-07-27 fix test logging, and log c... 315 logDiff ("File %r has hash %s in %r but hash %s in %r" %
81659615 » Philip Dorrell 2008-07-27 fix logDiff arguments 316 (self.name, self.hash, self.description,
317 otherFileHash.hash, otherFileHash.description))
84b538f0 » Philip Dorrell 2008-06-06 implement verifyIncremental... 318
27d6c3a9 » Philip Dorrell 2008-06-01 init 319 pathRegex = re.compile("[/]([^/]*)([/].*)?")
320
321 def analysePath(path):
44f6addd » Philip Dorrell 2008-06-04 pydocs, missing license and... 322 """Analyse a path starting with '/' and with '/' separators into 1st part and remainder
323 e.g. '/x/y' into 'x' and '/y' and '/x' into 'x' and None."""
27d6c3a9 » Philip Dorrell 2008-06-01 init 324 pathMatch = pathRegex.match(path)
325 rootPath = pathMatch.group(1)
326 remainderPath = pathMatch.group(2)
327 return (rootPath, remainderPath)
328
329 class BaseDirHash(object):
44f6addd » Philip Dorrell 2008-06-04 pydocs, missing license and... 330 """Description of a directory as a map of immediate sub-directories
331 and immediately contained files"""
84b538f0 » Philip Dorrell 2008-06-06 implement verifyIncremental... 332 def __init__(self, name, description):
27d6c3a9 » Philip Dorrell 2008-06-01 init 333 self.name = name
334 self.children = []
335 self.childrenMap = {}
84b538f0 » Philip Dorrell 2008-06-06 implement verifyIncremental... 336 self.description = description
27d6c3a9 » Philip Dorrell 2008-06-01 init 337
84b538f0 » Philip Dorrell 2008-06-06 implement verifyIncremental... 338 def isDir(self):
339 return True
340
27d6c3a9 » Philip Dorrell 2008-06-01 init 341 def addChild(self, childHash):
44f6addd » Philip Dorrell 2008-06-04 pydocs, missing license and... 342 """Add a child, i.e. a directory or file"""
27d6c3a9 » Philip Dorrell 2008-06-01 init 343 self.children.append (childHash)
344 self.childrenMap[childHash.name] = childHash
345
84b538f0 » Philip Dorrell 2008-06-06 implement verifyIncremental... 346 def hasChildNamed(self, childName):
347 return childName in self.childrenMap
348
27d6c3a9 » Philip Dorrell 2008-06-01 init 349 def printIndented(self, indent = ""):
5a35cb97 » Philip Dorrell 2008-06-25 fix logging errors 350 print "%sDir %r" % (indent, self.name)
27d6c3a9 » Philip Dorrell 2008-06-01 init 351 childIndent = " " + indent
352 for child in self.children:
353 child.printIndented(indent = childIndent)
354
355 def addFileSummary(self, path, hash):
44f6addd » Philip Dorrell 2008-06-04 pydocs, missing license and... 356 """Add a file given it's full path name relative to this directory
357 (necessarily constructing the intermediate sub-directories if they
358 are not already there)"""
27d6c3a9 » Philip Dorrell 2008-06-01 init 359 rootPath, remainderPath = analysePath(path)
360 if remainderPath is None:
84b538f0 » Philip Dorrell 2008-06-06 implement verifyIncremental... 361 self.addChild (BaseFileHash(rootPath, hash, self.description))
27d6c3a9 » Philip Dorrell 2008-06-01 init 362 else:
363 childDirHash = self.getOrCreateChildDirHash(rootPath)
364 childDirHash.addFileSummary (remainderPath, hash)
365
366 def getOrCreateChildDirHash(self, name):
44f6addd » Philip Dorrell 2008-06-04 pydocs, missing license and... 367 """Return DirHash for an immediate sub-directory, creating it if necessary"""
27d6c3a9 » Philip Dorrell 2008-06-01 init 368 if name in self.childrenMap:
369 return self.childrenMap[name]
370 else:
84b538f0 » Philip Dorrell 2008-06-06 implement verifyIncremental... 371 childDirHash = BaseDirHash(name, self.description)
27d6c3a9 » Philip Dorrell 2008-06-01 init 372 self.addChild(childDirHash)
373 return childDirHash
374
375 def addDirSummary(self, path):
44f6addd » Philip Dorrell 2008-06-04 pydocs, missing license and... 376 """Add a sub-directory given it's full path name relative to this directory
377 (necessarily constructing the intermediate sub-directories if they
378 are not already there)"""
27d6c3a9 » Philip Dorrell 2008-06-01 init 379 rootPath, remainderPath = analysePath(path)
380 if remainderPath is None:
84b538f0 » Philip Dorrell 2008-06-06 implement verifyIncremental... 381 self.addChild (BaseDirHash(rootPath, self.description))
27d6c3a9 » Philip Dorrell 2008-06-01 init 382 else:
383 childDirHash = self.getOrCreateChildDirHash(rootPath)
384 childDirHash.addDirSummary (remainderPath)
84b538f0 » Philip Dorrell 2008-06-06 implement verifyIncremental... 385
386 def compareToOtherDirHash(self, otherDirHash, indent, log, logDiff):
5a35cb97 » Philip Dorrell 2008-06-25 fix logging errors 387 log (indent, "comparing directory %r" % self.name)
84b538f0 » Philip Dorrell 2008-06-06 implement verifyIncremental... 388 for child1 in self.children:
389 name1 = child1.name
390 child2 = otherDirHash.childrenMap.get(name1, None)
391 if child1.isDir():
392 if child2 != None:
393 if not child2.isDir():
5a35cb97 » Philip Dorrell 2008-06-25 fix logging errors 394 logDiff ("%r is a directory in %r but a file in %r" %
395 (name1, self.description, otherDirHash.description))
84b538f0 » Philip Dorrell 2008-06-06 implement verifyIncremental... 396 else:
397 child1.compareToOtherDirHash (child2, indent+1, log, logDiff)
398 else:
5a35cb97 » Philip Dorrell 2008-06-25 fix logging errors 399 logDiff("%r is a directory in %r but does not exist in %r" %
400 (name1, self.description, otherDirHash.description))
84b538f0 » Philip Dorrell 2008-06-06 implement verifyIncremental... 401 else:
402 if child2 != None:
403 if child2.isDir():
5a35cb97 » Philip Dorrell 2008-06-25 fix logging errors 404 logDiff("%r is a file in %r but a directory in %r" %
405 (name1, self.description, otherDirHash.description))
84b538f0 » Philip Dorrell 2008-06-06 implement verifyIncremental... 406 else:
407 child1.compareToOtherFileHash (child2, indent+1, log, logDiff)
408 else:
5a35cb97 » Philip Dorrell 2008-06-25 fix logging errors 409 logDiff("%r is a file in %r but does not exist in %r" %
410 (name1, self.description, otherDirHash.description))
84b538f0 » Philip Dorrell 2008-06-06 implement verifyIncremental... 411 for child2 in otherDirHash.children:
412 if not self.hasChildNamed (child2.name):
413 if child2.isDir():
5a35cb97 » Philip Dorrell 2008-06-25 fix logging errors 414 logDiff("%r does not exist in %r but is a directory in %r" %
415 (child2.name, self.description, otherDirHash.description))
84b538f0 » Philip Dorrell 2008-06-06 implement verifyIncremental... 416 else:
5a35cb97 » Philip Dorrell 2008-06-25 fix logging errors 417 logDiff("%r does not exist in %r but is a file in %r" %
418 (child2.name, self.description, otherDirHash.description))
27d6c3a9 » Philip Dorrell 2008-06-01 init 419
420 class FileHash(BaseFileHash):
44f6addd » Philip Dorrell 2008-06-04 pydocs, missing license and... 421 """Information about a file with a relative path name based on actual
422 contents of actual file in actual file-system base directory"""
84b538f0 » Philip Dorrell 2008-06-06 implement verifyIncremental... 423 def __init__(self, dir, name, description):
27d6c3a9 » Philip Dorrell 2008-06-01 init 424 filename = dir + "/" + name
425 content = readFileBytes (filename)
84b538f0 » Philip Dorrell 2008-06-06 implement verifyIncremental... 426 super(FileHash, self).__init__(name, sha1Digest(content), description)
27d6c3a9 » Philip Dorrell 2008-06-01 init 427
428 class DirHash(BaseDirHash):
44f6addd » Philip Dorrell 2008-06-04 pydocs, missing license and... 429 """Information about files within a directory with a relative path name
430 based on actual contents of actual directory in actual file-system base directory"""
84b538f0 » Philip Dorrell 2008-06-06 implement verifyIncremental... 431 def __init__(self, dir, name, description):
432 super(DirHash, self).__init__(name, description)
5d9cfcc7 » Philip Dorrell 2008-06-26 use unicode for DirHash bas... 433 fullPath = unicode (name and (dir + "/" + name) or dir)
27d6c3a9 » Philip Dorrell 2008-06-01 init 434 for childName in os.listdir(fullPath):
435 childPath = fullPath + "/" + childName
436 if os.path.isfile(childPath):
84b538f0 » Philip Dorrell 2008-06-06 implement verifyIncremental... 437 self.addChild (FileHash(fullPath, childName, self.description))
27d6c3a9 » Philip Dorrell 2008-06-01 init 438 else:
84b538f0 » Philip Dorrell 2008-06-06 implement verifyIncremental... 439 self.addChild (DirHash(fullPath, childName, self.description))
27d6c3a9 » Philip Dorrell 2008-06-01 init 440
441 class ContentKey(object):
442 def __init__(self, datetime, filePath):
44f6addd » Philip Dorrell 2008-06-04 pydocs, missing license and... 443 """Parameters for key used to look up file contents from a particular backup within a backup map.
444 Note that filePath is expected to start with a '/'"""
27d6c3a9 » Philip Dorrell 2008-06-01 init 445 self.datetime = datetime
446 self.filePath = filePath
447
448 def fileKey(self):
44f6addd » Philip Dorrell 2008-06-04 pydocs, missing license and... 449 """The actual key.
450 Note: "/files" infix is used to allow for other meta-data to be associated with the datetime."""
27d6c3a9 » Philip Dorrell 2008-06-01 init 451 return self.datetime + "/files" + self.filePath
84b538f0 » Philip Dorrell 2008-06-06 implement verifyIncremental... 452
453 def __str__(self):
128f941f » Philip Dorrell 2008-06-25 Make unicode file names work. 454 return "[%s:%r]" % (self.datetime, self.filePath)
3ae44c66 » Philip Dorrell 2008-06-09 ContentKey.__repr__ 455
456 def __repr__(self):
457 return self.__str__()
1fc8bbfb » Philip Dorrell 2008-06-19 add BackupRecordUpdater 458
459 class BackupRecordUpdater:
460 """Object responsible for recording current state of backup in progress"""
10f09148 » Philip Dorrell 2008-06-19 record backup state if more... 461 def __init__(self, backups, backupRecords, currentBackupRecord, backupKeyBase,
c566e98f » Philip Dorrell 2008-06-25 recordTrigger 462 directoryInfo, recordTrigger = 1000000):
1fc8bbfb » Philip Dorrell 2008-06-19 add BackupRecordUpdater 463 self.backups = backups
464 self.backupRecords = backupRecords
465 self.currentBackupRecord = currentBackupRecord
466 self.backupKeyBase = backupKeyBase
467 self.directoryInfo = directoryInfo
10f09148 » Philip Dorrell 2008-06-19 record backup state if more... 468 self.bytesWritten = 0
469 self.unrecordedBytes = 0
470 self.recordTrigger = recordTrigger
531b141f » Philip Dorrell 2008-07-17 written file summaries accu... 471 self.writtenFileSummaries = []
1fc8bbfb » Philip Dorrell 2008-06-19 add BackupRecordUpdater 472
5411dee5 » Philip Dorrell 2008-07-17 version checks 473 def recordVersion(self):
474 self.backups.backupMap[self.backupKeyBase + "/version"] = str(BackupsVersion)
475
ab4cd02d » Philip Dorrell 2008-07-16 separate initialRecord, che... 476 def recordPathSummaries(self):
1fc8bbfb » Philip Dorrell 2008-06-19 add BackupRecordUpdater 477 self.backups.recordPathSummaries (self.backupKeyBase, self.directoryInfo)
d973ff37 » Philip Dorrell 2008-07-15 record written files separa... 478
74e704c5 » Philip Dorrell 2008-07-17 remove "written" attribute ... 479 def recordWrittenFileSummaries(self):
480 self.backups.recordWrittenFileSummaries (self.backupKeyBase, self.writtenFileSummaries)
59aa8ae0 » Philip Dorrell 2008-06-28 checkpointing 481
ab4cd02d » Philip Dorrell 2008-07-16 separate initialRecord, che... 482 def saveBackupRecords(self):
1fc8bbfb » Philip Dorrell 2008-06-19 add BackupRecordUpdater 483 self.backups.saveBackupRecords(self.backupRecords)
484
ab4cd02d » Philip Dorrell 2008-07-16 separate initialRecord, che... 485 def checkpoint(self):
74e704c5 » Philip Dorrell 2008-07-17 remove "written" attribute ... 486 self.recordWrittenFileSummaries()
ab4cd02d » Philip Dorrell 2008-07-16 separate initialRecord, che... 487
488 def initialRecord(self):
5411dee5 » Philip Dorrell 2008-07-17 version checks 489 self.recordVersion()
ab4cd02d » Philip Dorrell 2008-07-16 separate initialRecord, che... 490 self.recordPathSummaries()
74e704c5 » Philip Dorrell 2008-07-17 remove "written" attribute ... 491 self.recordWrittenFileSummaries()
ab4cd02d » Philip Dorrell 2008-07-16 separate initialRecord, che... 492 self.saveBackupRecords()
493
1fc8bbfb » Philip Dorrell 2008-06-19 add BackupRecordUpdater 494 def recordCompleted(self):
495 self.currentBackupRecord.completed = True
74e704c5 » Philip Dorrell 2008-07-17 remove "written" attribute ... 496 self.recordWrittenFileSummaries()
ab4cd02d » Philip Dorrell 2008-07-16 separate initialRecord, che... 497 self.saveBackupRecords()
1fc8bbfb » Philip Dorrell 2008-06-19 add BackupRecordUpdater 498
b147c7f4 » Philip Dorrell 2008-07-03 ThreadedTaskRunner inherits... 499 from ThreadedTaskRunner import ThreadedTaskRunner, TaskRunner
3fd43cb5 » Philip Dorrell 2008-06-29 using ThreadedTaskRunner af... 500
d973ff37 » Philip Dorrell 2008-07-15 record written files separa... 501 #taskRunner = TaskRunner(checkpointFreq = 30)
3fd43cb5 » Philip Dorrell 2008-06-29 using ThreadedTaskRunner af... 502
3091f7fe » Philip Dorrell 2008-07-15 change thread settings 503 taskRunner = ThreadedTaskRunner (checkpointFreq = 500, numThreads = 30)
b2451d80 » Philip Dorrell 2008-06-29 make deleteMapValues multi-... 504
505 class DeleteBackupMapValueTask:
506 def __init__(self, backupMap, key):
507 self.backupMap = backupMap
508 self.key = key
6c761b4b » Philip Dorrell 2008-06-28 TaskRunner 509
028e041c » Philip Dorrell 2008-07-03 tasks are now responsible f... 510 def getThreadLocals(self):
511 return {"backupMap": self.backupMap.clone()}
512
b2451d80 » Philip Dorrell 2008-06-29 make deleteMapValues multi-... 513 def doUnsynchronized(self):
514 print " delete %r ..." % self.key
515 del self.backupMap[self.key]
516
517 def doSynchronized(self):
518 pass
519
520 def deleteMapValues(backupMap, dryRun):
521 """Delete all keys from a map, or if dryRun is True, do a dry run"""
522 print "%sDeleting keys from map %s" % (dryRun and "DRYRUN: " or "", backupMap)
523 deleteTasks = []
524 for key in backupMap:
525 if dryRun:
526 print " delete %r ..." % key
527 else:
528 deleteTasks.append (DeleteBackupMapValueTask(backupMap, key))
529 if not dryRun:
530 taskRunner.runTasks (deleteTasks)
531 print "finished."
532
27d6c3a9 » Philip Dorrell 2008-06-01 init 533 class IncrementalBackups:
44f6addd » Philip Dorrell 2008-06-04 pydocs, missing license and... 534 """A set of dated full or incremental backups within a given backup map.
535 This object does _not_ (currently) record _where_ the file contents came from.
536 """
c566e98f » Philip Dorrell 2008-06-25 recordTrigger 537 def __init__(self, backupMap, recordTrigger = 10000000):
27d6c3a9 » Philip Dorrell 2008-06-01 init 538 self.backupMap = backupMap
c566e98f » Philip Dorrell 2008-06-25 recordTrigger 539 self.recordTrigger = recordTrigger
27d6c3a9 » Philip Dorrell 2008-06-01 init 540
541 def getDateTimeString(self):
44f6addd » Philip Dorrell 2008-06-04 pydocs, missing license and... 542 """Get a date time string to use for a new dated backup"""
27d6c3a9 » Philip Dorrell 2008-06-01 init 543 return time.strftime("%Y-%b-%d.%H-%M-%S")
544
545 def getBackupRecords(self):
44f6addd » Philip Dorrell 2008-06-04 pydocs, missing license and... 546 """Retrieve the BackupRecord objects describing any existing backups"""
27d6c3a9 » Philip Dorrell 2008-06-01 init 547 if "backupRecords" in self.backupMap:
548 backupsListYamlData = yaml.safe_load(self.backupMap["backupRecords"])
549 else:
550 backupsListYamlData = []
551 return [BackupRecord.fromYamlData(record) for record in backupsListYamlData]
bce0bbdd » Philip Dorrell 2008-06-03 implement listBackups and p... 552
1342abd3 » Philip Dorrell 2008-06-17 record incomplete backup & ... 553 def saveBackupRecords(self, backupRecords):
84b538f0 » Philip Dorrell 2008-06-06 implement verifyIncremental... 554 backupRecordsYamlData = [record.toYamlData() for record in backupRecords]
128f941f » Philip Dorrell 2008-06-25 Make unicode file names work. 555 self.backupMap["backupRecords"] = yaml.safe_dump(backupRecordsYamlData)
84b538f0 » Philip Dorrell 2008-06-06 implement verifyIncremental... 556 print "new backup records = %r" % backupRecords
557
bce0bbdd » Philip Dorrell 2008-06-03 implement listBackups and p... 558 def getBackupGroups(self):
44f6addd » Philip Dorrell 2008-06-04 pydocs, missing license and... 559 """Get backup groups, i.e. backup records grouped into lists of incremental backups with a preceding
560 full backup."""
bce0bbdd » Philip Dorrell 2008-06-03 implement listBackups and p... 561 backupGroups = []
562 records = self.getBackupRecords()
563 currentBackupGroup = []
564 for i, record in enumerate(records):
565 if record.isFull() or i == 0:
566 currentBackupGroup = [record]
567 backupGroups.append (currentBackupGroup)
568 else:
569 currentBackupGroup.append(record)
570 return backupGroups
571
572 def listBackups(self):
44f6addd » Philip Dorrell 2008-06-04 pydocs, missing license and... 573 """Print out list of all backups"""
bce0bbdd » Philip Dorrell 2008-06-03 implement listBackups and p... 574 recordGroups = self.getBackupGroups()
575 for recordGroup in recordGroups:
576 for i, record in enumerate(recordGroup):
577 if i == 0:
578 indent = "*"
579 else:
580 indent = " "
1342abd3 » Philip Dorrell 2008-06-17 record incomplete backup & ... 581 print "%s%s: %s %s" % (indent, record.type, record.datetime,
582 record.completed and "complete" or "INCOMPLETE")
bce0bbdd » Philip Dorrell 2008-06-03 implement listBackups and p... 583
584 def pruneBackup(self, backupRecord, dryRun):
44f6addd » Philip Dorrell 2008-06-04 pydocs, missing license and... 585 """Prune the backup indicated by the backup record (with dry-run option)"""
bce0bbdd » Philip Dorrell 2008-06-03 implement listBackups and p... 586 print " prune backup %r" % backupRecord
587 backupSubMap = self.backupMap.subMap(backupRecord.datetime)
588 deleteMapValues(backupSubMap, dryRun)
589
590 def pruneBackupGroup(self, recordGroup, dryRun):
44f6addd » Philip Dorrell 2008-06-04 pydocs, missing license and... 591 """Prune all backups in a backup group (with dry-run option)"""
bce0bbdd » Philip Dorrell 2008-06-03 implement listBackups and p... 592 print "Backup group to prune: %r" % recordGroup
593 for record in recordGroup:
594 self.pruneBackup(record, dryRun)
595
596 def pruneBackups(self, keep = 1, dryRun = True):
44f6addd » Philip Dorrell 2008-06-04 pydocs, missing license and... 597 """Prune previous backup groups, keeping only specified number of most
598 recent backup groups (but at least one)"""
bce0bbdd » Philip Dorrell 2008-06-03 implement listBackups and p... 599 print "Pruning backups, keep %d%s" % (keep, dryRun and ", DRY RUN" or "")
600 if keep < 1:
601 raise Exception ("Number of full backups to keep must be at least 1")
602 recordGroups = self.getBackupGroups()
603 if keep >= len(recordGroups):
604 print "Only %d full backups, and %d specified to keep, so none will be pruned" % (len(recordGroups), keep)
605 else:
606 numToPrune = len(recordGroups) - keep
607 groupsToPrune = recordGroups[:numToPrune]
608 for recordGroup in groupsToPrune:
609 self.pruneBackupGroup(recordGroup, dryRun = dryRun)
610 if not dryRun:
611 remainingGroups = recordGroups[numToPrune:]
612 remainingRecords = []
613 for group in remainingGroups:
614 remainingRecords += group
1342abd3 » Philip Dorrell 2008-06-17 record incomplete backup & ... 615 self.saveBackupRecords(remainingRecords)
7d6fee15 » Philip Dorrell 2008-06-09 recordPathSummaries method 616
617 def recordPathSummaries(self, backupKeyBase, directoryInfo):
02f349e6 » Philip Dorrell 2008-07-03 checkpointing in (single-th... 618 pathListKey = backupKeyBase + "/pathList"
619 print "Record path summaries to %s ..." % pathListKey
620 self.backupMap[pathListKey] = yaml.safe_dump(directoryInfo.getPathSummariesYamlData())
d973ff37 » Philip Dorrell 2008-07-15 record written files separa... 621
74e704c5 » Philip Dorrell 2008-07-17 remove "written" attribute ... 622 def recordWrittenFileSummaries(self, backupKeyBase, writtenFileSummaries):
d973ff37 » Philip Dorrell 2008-07-15 record written files separa... 623 writtenPathListKey = backupKeyBase + "/writtenPathList"
74e704c5 » Philip Dorrell 2008-07-17 remove "written" attribute ... 624 print "Record written file summaries to %s ..." % writtenPathListKey
625 writtenFileSummariesYamlData = [summary.toYamlData() for summary in writtenFileSummaries]
626 self.backupMap[writtenPathListKey] = yaml.safe_dump(writtenFileSummariesYamlData)
27d6c3a9 » Philip Dorrell 2008-06-01 init 627
1602e1b5 » Philip Dorrell 2008-06-28 BackupFileTask 628 class BackupFileTask:
531b141f » Philip Dorrell 2008-07-17 written file summaries accu... 629 def __init__(self, backupMap, backupFilesKeyBase, pathSummary, fileName, writtenRecords,
630 writtenFileSummaries):
1602e1b5 » Philip Dorrell 2008-06-28 BackupFileTask 631 self.backupMap = backupMap
632 self.backupFilesKeyBase = backupFilesKeyBase
633 self.pathSummary = pathSummary
634 self.fileName = fileName
635 self.writtenRecords = writtenRecords
531b141f » Philip Dorrell 2008-07-17 written file summaries accu... 636 self.writtenFileSummaries = writtenFileSummaries
1602e1b5 » Philip Dorrell 2008-06-28 BackupFileTask 637
028e041c » Philip Dorrell 2008-07-03 tasks are now responsible f... 638 def getThreadLocals(self):
639 return {"backupMap": self.backupMap.clone()}
640
1602e1b5 » Philip Dorrell 2008-06-28 BackupFileTask 641 def doUnsynchronized(self):
642 content = readFileBytes(self.fileName)
643 self.fileContentKey = self.backupFilesKeyBase + self.pathSummary.relativePath
644 print "Writing %r ..." % self.fileContentKey
645 self.backupMap[self.fileContentKey] = content
646
647 def doSynchronized(self):
531b141f » Philip Dorrell 2008-07-17 written file summaries accu... 648 self.writtenFileSummaries.append (self.pathSummary)
1602e1b5 » Philip Dorrell 2008-06-28 BackupFileTask 649 self.writtenRecords.recordHashWritten (self.pathSummary.hash, self.fileContentKey)
59aa8ae0 » Philip Dorrell 2008-06-28 checkpointing 650
27d6c3a9 » Philip Dorrell 2008-06-01 init 651 def doBackup(self, directoryInfo, full = True):
44f6addd » Philip Dorrell 2008-06-04 pydocs, missing license and... 652 """Create a new backup of a source directory (full or incremental).
653 Note: 'incremental' is based on comparing the hashes of file contents already marked as
654 written to previous backups in the same backup group. It is not based on any comparison
655 of files done on the source computer. If a given file contents has already been written,
656 then the relevant file written as a pointer to the previous file with the same contents
657 (which may or may not be the same file in the same place on the source computer).
658 """
27d6c3a9 » Philip Dorrell 2008-06-01 init 659 dateTimeString = self.getDateTimeString()
660 backupKeyBase = dateTimeString
661 backupFilesKeyBase = backupKeyBase + "/files"
7ad6bad4 » Philip Dorrell 2008-06-09 restore details for specifi... 662 print "retrieving existing backup records ..."
27d6c3a9 » Philip Dorrell 2008-06-01 init 663 backupRecords = self.getBackupRecords()
664 print "backup records = %r" % backupRecords
1342abd3 » Philip Dorrell 2008-06-17 record incomplete backup & ... 665 currentBackupRecord = BackupRecord(full and "full" or "incremental", dateTimeString, completed = False)
666 backupRecords.append(currentBackupRecord)
1fc8bbfb » Philip Dorrell 2008-06-19 add BackupRecordUpdater 667 backupRecordUpdater = BackupRecordUpdater (self, backupRecords, currentBackupRecord,
c566e98f » Philip Dorrell 2008-06-25 recordTrigger 668 backupKeyBase, directoryInfo, recordTrigger = self.recordTrigger)
ab4cd02d » Philip Dorrell 2008-07-16 separate initialRecord, che... 669 backupRecordUpdater.initialRecord()
27d6c3a9 » Philip Dorrell 2008-06-01 init 670 writtenRecords = WrittenRecords()
671 if not full:
672 if len(backupRecords) == 0:
673 full = True
674 print "No previous records, so backup will be FULL anyway"
675 else:
676 writtenRecords.recordPreviousBackups (self.backupMap, backupRecords)
1602e1b5 » Philip Dorrell 2008-06-28 BackupFileTask 677 backupFileTasks = []
27d6c3a9 » Philip Dorrell 2008-06-01 init 678 for pathSummary in directoryInfo.pathSummaries:
679 if not pathSummary.isDir:
680 fileName = pathSummary.fullPath(directoryInfo.path)
681 if not writtenRecords.isWritten(pathSummary.hash):
1602e1b5 » Philip Dorrell 2008-06-28 BackupFileTask 682 backupFileTask = IncrementalBackups.BackupFileTask(self.backupMap, backupFilesKeyBase,
531b141f » Philip Dorrell 2008-07-17 written file summaries accu... 683 pathSummary, fileName, writtenRecords,
684 backupRecordUpdater.writtenFileSummaries)
1602e1b5 » Philip Dorrell 2008-06-28 BackupFileTask 685 backupFileTasks.append (backupFileTask)
27d6c3a9 » Philip Dorrell 2008-06-01 init 686 else:
272f658e » Philip Dorrell 2008-06-25 log unicode location writte... 687 print "Content of %r already written to %r" % (pathSummary,
27d6c3a9 » Philip Dorrell 2008-06-01 init 688 writtenRecords.locationWritten (pathSummary.hash))
3fd43cb5 » Philip Dorrell 2008-06-29 using ThreadedTaskRunner af... 689 taskRunner.runTasks (backupFileTasks, checkpointTask = backupRecordUpdater)
1fc8bbfb » Philip Dorrell 2008-06-19 add BackupRecordUpdater 690 backupRecordUpdater.recordCompleted()
27d6c3a9 » Philip Dorrell 2008-06-01 init 691
692 def doFullBackup(self, directoryInfo):
44f6addd » Philip Dorrell 2008-06-04 pydocs, missing license and... 693 """Do a full backup of a source directory"""
27d6c3a9 » Philip Dorrell 2008-06-01 init 694 self.doBackup (directoryInfo, full = True)
695
696 def doIncrementalBackup(self, directoryInfo):
44f6addd » Philip Dorrell 2008-06-04 pydocs, missing license and... 697 """Do an incremental backup of a source directory"""
27d6c3a9 » Philip Dorrell 2008-06-01 init 698 self.doBackup (directoryInfo, full = False)
699
7ad6bad4 » Philip Dorrell 2008-06-09 restore details for specifi... 700 def getBackupRecordForDateTime(self, backupRecords, dateTimeString):
701 for index, backupRecord in enumerate(backupRecords):
702 if backupRecord.datetime == dateTimeString:
703 return index
704 raise "No backup record found for date-time %r" % dateTimeString
705
706 def getRestoreRecords(self, backupRecords, dateTimeString):
44f6addd » Philip Dorrell 2008-06-04 pydocs, missing license and... 707 """Return records for the most recent backup group"""
7ad6bad4 » Philip Dorrell 2008-06-09 restore details for specifi... 708 if dateTimeString is None:
709 restorePos = len(backupRecords)-1
710 else:
711 restorePos = self.getBackupRecordForDateTime (backupRecords, dateTimeString)
712 pos = restorePos
27d6c3a9 » Philip Dorrell 2008-06-01 init 713 while pos >= 0 and backupRecords[pos].type != "full":
714 pos -= 1
7ad6bad4 » Philip Dorrell 2008-06-09 restore details for specifi... 715 return backupRecords[pos:(restorePos+1)]
27d6c3a9 » Philip Dorrell 2008-06-01 init 716
717 def getPathSummaryDataList(self, backupRecord):
44f6addd » Philip Dorrell 2008-06-04 pydocs, missing license and... 718 """Get YAML data representing information about files and directories backed up
d6dc2e0e » Philip Dorrell 2008-07-15 use writtenPathList when av... 719 in a specified dated backup"""
27d6c3a9 » Philip Dorrell 2008-06-01 init 720 dateTimeString = backupRecord.datetime
721 backupKeyBase = dateTimeString
a2332fa5 » Philip Dorrell 2008-06-28 extra logging 722 print "getPathSummaryDataList for %r ..." % backupRecord
27d6c3a9 » Philip Dorrell 2008-06-01 init 723 pathSummariesData = yaml.safe_load(self.backupMap[backupKeyBase + "/pathList"])
724 return pathSummariesData
725
74e704c5 » Philip Dorrell 2008-07-17 remove "written" attribute ... 726 def getWrittenFileSummaryDataList(self, backupRecord):
d6dc2e0e » Philip Dorrell 2008-07-15 use writtenPathList when av... 727 """Get YAML data representing information about files and directories backed up
728 in a specified dated backup"""
729 dateTimeString = backupRecord.datetime
730 backupKeyBase = dateTimeString
74e704c5 » Philip Dorrell 2008-07-17 remove "written" attribute ... 731 print "getWrittenFileSummaryDataList for %r ..." % backupRecord
d6dc2e0e » Philip Dorrell 2008-07-15 use writtenPathList when av... 732 writtenPathListKey = backupKeyBase + "/writtenPathList"
74e704c5 » Philip Dorrell 2008-07-17 remove "written" attribute ... 733 writtenFileSummariesData = yaml.safe_load(self.backupMap[backupKeyBase + "/writtenPathList"])
734 return writtenFileSummariesData
d6dc2e0e » Philip Dorrell 2008-07-15 use writtenPathList when av... 735
74e704c5 » Philip Dorrell 2008-07-17 remove "written" attribute ... 736 def getHashContentKeyMap(self, restoreRecords, writtenFileSummaryLists):
44f6addd » Philip Dorrell 2008-06-04 pydocs, missing license and... 737 """Construct a map from hash keys to the backup keys to which those file contents
738 were written (within the given backup group which is being restored from)"""
27d6c3a9 » Philip Dorrell 2008-06-01 init 739 hashContentKeyMap = {}
74e704c5 » Philip Dorrell 2008-07-17 remove "written" attribute ... 740 for restoreRecord, writtenFileSummaryList in zip(restoreRecords, writtenFileSummaryLists):
741 for writtenFileSummary in writtenFileSummaryList:
742 hashContentKeyMap[writtenFileSummary.hash] = ContentKey(restoreRecord.datetime,
743 writtenFileSummary.relativePath)
27d6c3a9 » Philip Dorrell 2008-06-01 init 744 return hashContentKeyMap
745
03b64b81 » Philip Dorrell 2008-06-28 RestoreFileTask 746 class RestoreFileTask:
747 def __init__(self, backupMap, contentKey, fullPath, updateVerificationRecords, verificationRecords, overwrite):
748 self.backupMap = backupMap
749 self.contentKey = contentKey
750 self.fullPath = fullPath
751 self.updateVerificationRecords = updateVerificationRecords
752 self.verificationRecords = verificationRecords
753 self.overwrite = overwrite
754
028e041c » Philip Dorrell 2008-07-03 tasks are now responsible f... 755 def getThreadLocals(self):
756 return {"backupMap": self.backupMap.clone()}
757
03b64b81 » Philip Dorrell 2008-06-28 RestoreFileTask 758 def doUnsynchronized(self):
759 content = self.backupMap[self.contentKey.fileKey()]
760 if os.path.exists(self.fullPath) and self.overwrite:
761 os.remove (self.fullPath)
762 writeFileBytes(self.fullPath, content)
763 if self.updateVerificationRecords:
764 self.contentHash = sha1Digest(content)
765 print "Restored FILE %r" % self.fullPath
766
767 def doSynchronized(self):
768 if self.updateVerificationRecords:
769 self.verificationRecords.markVerified (self.contentKey.datetime,
770 self.contentKey.filePath, self.contentHash)
771 print "Mark verified FILE %r" % self.fullPath
772
84b538f0 » Philip Dorrell 2008-06-06 implement verifyIncremental... 773 def restoreDirectory(self, restoreDir, pathSummaryList, hashContentKeyMap, overwrite,
774 updateVerificationRecords = False):
44f6addd » Philip Dorrell 2008-06-04 pydocs, missing license and... 775 """Restore a directory using path summaries and hash content key map, with optional overwrite"""
5a35cb97 » Philip Dorrell 2008-06-25 fix logging errors 776 print "Restoring directory %r ..." % restoreDir
84b538f0 » Philip Dorrell 2008-06-06 implement verifyIncremental... 777 if updateVerificationRecords:
778 verificationRecords = HashVerificationRecords(self.backupMap)
d7210450 » Philip Dorrell 2008-06-28 batch the restoreFileTasks ... 779 restoreFileTasks = []
27d6c3a9 » Philip Dorrell 2008-06-01 init 780 for pathSummary in pathSummaryList:
781 fullPath = pathSummary.fullPath (restoreDir)
782 if pathSummary.isDir:
783 if not os.path.isdir(fullPath):
784 os.makedirs(fullPath)
5a35cb97 » Philip Dorrell 2008-06-25 fix logging errors 785 print "Restored DIR %r" % fullPath
27d6c3a9 » Philip Dorrell 2008-06-01 init 786 elif pathSummary.isFile:
787 if not pathSummary.hash in hashContentKeyMap:
5a35cb97 » Philip Dorrell 2008-06-25 fix logging errors 788 print "WARNING: No written content found for %r (hash %s)" % (pathSummary.relativePath,
27d6c3a9 » Philip Dorrell 2008-06-01 init 789 pathSummary.hash)
84b538f0 » Philip Dorrell 2008-06-06 implement verifyIncremental... 790 contentKey = hashContentKeyMap[pathSummary.hash]
d7210450 » Philip Dorrell 2008-06-28 batch the restoreFileTasks ... 791 restoreFileTasks.append (IncrementalBackups.RestoreFileTask (self.backupMap, contentKey,
792 fullPath, updateVerificationRecords,
793 verificationRecords, overwrite))
27d6c3a9 » Philip Dorrell 2008-06-01 init 794 else:
795 print "WARNING: Unknown path type %r" % pathSummary
3fd43cb5 » Philip Dorrell 2008-06-29 using ThreadedTaskRunner af... 796 taskRunner.runTasks (restoreFileTasks)
84b538f0 » Philip Dorrell 2008-06-06 implement verifyIncremental... 797 if updateVerificationRecords:
798 verificationRecords.updateRecords()
799
7ad6bad4 » Philip Dorrell 2008-06-09 restore details for specifi... 800 def getRestoreDetails(self, dateTimeString):
27d6c3a9 » Philip Dorrell 2008-06-01 init 801 backupRecords = self.getBackupRecords()
802 print "backupRecords = %r" % backupRecords
803 if len(backupRecords) == 0:
804 raise "No backup records found"
f704f927 » Philip Dorrell 2008-06-17 brackets to print correct m... 805 print "Get restore records for %s" % (dateTimeString or "(most recent backup)")
7ad6bad4 » Philip Dorrell 2008-06-09 restore details for specifi... 806 restoreRecords = self.getRestoreRecords(backupRecords, dateTimeString)
27d6c3a9 » Philip Dorrell 2008-06-01 init 807 print "restoreRecords = %r" % restoreRecords
5411dee5 » Philip Dorrell 2008-07-17 version checks 808 for restoreRecord in restoreRecords:
bb1f92b7 » Philip Dorrell 2008-07-27 fix test logging, and log c... 809 print "checkVersion for %r ..." % restoreRecord
5411dee5 » Philip Dorrell 2008-07-17 version checks 810 checkVersion(self.backupMap, restoreRecord)
74e704c5 » Philip Dorrell 2008-07-17 remove "written" attribute ... 811 writtenFileSummaryDataLists = [self.getWrittenFileSummaryDataList(record) for record in restoreRecords]
812 print "parsing writtenFileSummaryDataLists from YAML data ..."
813 writtenFileSummaryLists = [[PathSummary.fromYamlData(pathSummaryData) for pathSummaryData in pathSummaryDataList]
814 for pathSummaryDataList in writtenFileSummaryDataLists]
a2332fa5 » Philip Dorrell 2008-06-28 extra logging 815 print "calculating hashContentKeyMap ..."
74e704c5 » Philip Dorrell 2008-07-17 remove "written" attribute ... 816 hashContentKeyMap = self.getHashContentKeyMap(restoreRecords, writtenFileSummaryLists)
27d6c3a9 » Philip Dorrell 2008-06-01 init 817 print "hashContentKeyMap = %r" % hashContentKeyMap
818 backupToRestore = restoreRecords[-1]
84b538f0 » Philip Dorrell 2008-06-06 implement verifyIncremental... 819 print "Target backup for restore: %r" % backupToRestore
fae3e322 » Philip Dorrell 2008-07-15 fix list comprehension to g... 820 pathSummaryListToRestore = [PathSummary.fromYamlData (pathSummaryData) for pathSummaryData
821 in self.getPathSummaryDataList(backupToRestore)]
7ad6bad4 » Philip Dorrell 2008-06-09 restore details for specifi... 822 return pathSummaryListToRestore, hashContentKeyMap, backupToRestore
84b538f0 » Philip Dorrell 2008-06-06 implement verifyIncremental... 823
7ad6bad4 » Philip Dorrell 2008-06-09 restore details for specifi... 824 def getRestoredDirHash(self, dateTimeString = None):
825 pathSummaryList, hashContentKeyMap, backupToRestore = self.getRestoreDetails(dateTimeString)
84b538f0 » Philip Dorrell 2008-06-06 implement verifyIncremental... 826 verificationRecords = HashVerificationRecords(self.backupMap)
827 restoredDirHash = BaseDirHash(None, "backed up files")
828 for pathSummary in pathSummaryList:
829 if pathSummary.isDir:
830 restoredDirHash.addDirSummary(pathSummary.relativePath)
5a35cb97 » Philip Dorrell 2008-06-25 fix logging errors 831 print " DIR %r" % pathSummary.relativePath
84b538f0 » Philip Dorrell 2008-06-06 implement verifyIncremental... 832 elif pathSummary.isFile:
833 contentKey = hashContentKeyMap[pathSummary.hash]
834 # We could compare pathSummary.hash and fileHash,
835 # but the verified fileHash is what matters (to compare to local file)
836 fileHash = verificationRecords.getWrittenFileHash(contentKey.d