public
Description: Python backup classes to backup files to key/value store, such as Amazon S3
Homepage: http://www.1729.com/software/keevalbak/index.html
Clone URL: git://github.com/pdorrell/keevalbak.git
keevalbak / keevalbak / BackupOperations.py
27d6c3a9 » Philip Dorrell 2008-06-01 init 1 # Copyright (c) 2008 Philip Dorrell, http://www.1729.com/
2 #
3 # Permission is hereby granted, free of charge, to any person obtaining a copy
4 # of this software and associated documentation files (the "Software"), to deal
5 # in the Software without restriction, including without limitation the rights
6 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 # copies of the Software, and to permit persons to whom the Software is
8 # furnished to do so, subject to the following conditions:
9 #
10 # The above copyright notice and this permission notice shall be included in
11 # all copies or substantial portions of the Software.
12 #
13 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 # THE SOFTWARE.
20
21 import yaml
22 import hashlib
23 import os
24 import time
25 import datetime
26 import shutil
27 import CompareDirectories
28 import re
29 from sets import Set
30
31 def readFileBytes(filename):
44f6addd » Philip Dorrell 2008-06-04 pydocs, missing license and... 32 """Read named file and return contents as a byte string"""
27d6c3a9 » Philip Dorrell 2008-06-01 init 33 f = file(filename, "rb")
34 bytes = f.read()
35 f.close()
36 return bytes
37
38 def writeFileBytes(filename, bytes):
44f6addd » Philip Dorrell 2008-06-04 pydocs, missing license and... 39 """Write byte string as new contents of named file"""
27d6c3a9 » Philip Dorrell 2008-06-01 init 40 f = file(filename, "wb")
41 f.write(bytes)
42 f.close()
43
b2451d80 » Philip Dorrell 2008-06-29 make deleteMapValues multi-... 44
45
27d6c3a9 » Philip Dorrell 2008-06-01 init 46 class PathSummary(object):
44f6addd » Philip Dorrell 2008-06-04 pydocs, missing license and... 47 """Information about a file or directory specified as a relative path within some base directory
48 Note: all paths are '/' separated, whether or not we are in Microsoft Windows"""
27d6c3a9 » Philip Dorrell 2008-06-01 init 49 def __init__(self, relativePath):
50 self.relativePath = relativePath
51
52 def fullPath(self, basePath):
44f6addd » Philip Dorrell 2008-06-04 pydocs, missing license and... 53 """Return the full path given the path of the base directory"""
27d6c3a9 » Philip Dorrell 2008-06-01 init 54 return basePath + self.relativePath
55
56 @staticmethod
57 def fromYamlData(data):
44f6addd » Philip Dorrell 2008-06-04 pydocs, missing license and... 58 """Convert YAML data into FileSummary or DirSummary (inverse of toYamlData methods)"""
27d6c3a9 » Philip Dorrell 2008-06-01 init 59 pathType = data["type"]
60 if pathType == "file":
61 return FileSummary.fromYamlData(data)
62 elif pathType == "dir":
63 return DirSummary.fromYamlData(data)
64 else:
65 raise "Unknown path type: %s" % pathType
66
67 class FileSummary(PathSummary):
44f6addd » Philip Dorrell 2008-06-04 pydocs, missing license and... 68 """Information about a file specified as a relative path within some (unspecified) base directory,
69 including a SHA1 hash of the file's contents."""
27d6c3a9 » Philip Dorrell 2008-06-01 init 70 def __init__(self, relativePath, hash, written = False):
71 super(FileSummary, self).__init__(relativePath)
72 self.isDir = False
73 self.isFile = True
74 self.hash = hash
75 self.written = written
76
128f941f » Philip Dorrell 2008-06-25 Make unicode file names work. 77 def __unicode__(self):
5a35cb97 » Philip Dorrell 2008-06-25 fix logging errors 78 return u"FILE: %r : %s%s" % (self.relativePath, self.hash, self.written and " W" or "")
27d6c3a9 » Philip Dorrell 2008-06-01 init 79
2f24c84b » Philip Dorrell 2008-06-28 __repr__ for FileSummary,Di... 80 def __repr__(self):
81 return self.__unicode__()
82
27d6c3a9 » Philip Dorrell 2008-06-01 init 83 def toYamlData(self):
44f6addd » Philip Dorrell 2008-06-04 pydocs, missing license and... 84 """Convert to YAML"""
27d6c3a9 » Philip Dorrell 2008-06-01 init 85 return {"type": "file",
86 "path": self.relativePath,
87 "hash": self.hash,
88 "written": self.written
89 }
90
91 @staticmethod
92 def fromYamlData(data):
44f6addd » Philip Dorrell 2008-06-04 pydocs, missing license and... 93 """Create from YAML (inverse of toYamlData)"""
27d6c3a9 » Philip Dorrell 2008-06-01 init 94 return FileSummary(data["path"], data["hash"], written = data["written"])
95
96 class DirSummary(PathSummary):
44f6addd » Philip Dorrell 2008-06-04 pydocs, missing license and... 97 """Information about a file specified as a relative path within some (unspecified) base directory"""
27d6c3a9 » Philip Dorrell 2008-06-01 init 98 def __init__(self, relativePath):
99 super(DirSummary, self).__init__(relativePath)
100 self.isDir = True
101 self.isFile = False
102
128f941f » Philip Dorrell 2008-06-25 Make unicode file names work. 103 def __unicode__(self):
5a35cb97 » Philip Dorrell 2008-06-25 fix logging errors 104 return u"DIR: %r" % (self.relativePath)
27d6c3a9 » Philip Dorrell 2008-06-01 init 105
106 def toYamlData(self):
44f6addd » Philip Dorrell 2008-06-04 pydocs, missing license and... 107 """Convert to YAML"""
27d6c3a9 » Philip Dorrell 2008-06-01 init 108 return {"type": "dir",
109 "path": self.relativePath
110 }
111
2f24c84b » Philip Dorrell 2008-06-28 __repr__ for FileSummary,Di... 112 def __repr__(self):
113 return self.__unicode__()
114
27d6c3a9 » Philip Dorrell 2008-06-01 init 115 @staticmethod
116 def fromYamlData(data):
44f6addd » Philip Dorrell 2008-06-04 pydocs, missing license and... 117 """Create from YAML (inverse of toYamlData)"""
27d6c3a9 » Philip Dorrell 2008-06-01 init 118 return DirSummary(data["path"])
84b538f0 » Philip Dorrell 2008-06-06 implement verifyIncremental... 119
120 def sha1Digest(content):
121 return hashlib.sha1(content).hexdigest()
27d6c3a9 » Philip Dorrell 2008-06-01 init 122
123 class DirectoryInfo:
44f6addd » Philip Dorrell 2008-06-04 pydocs, missing license and... 124 """Information about all the directories and files within a base directory
125 All directories are listed before any subdirectories or files contained within them.
126 """
27d6c3a9 » Philip Dorrell 2008-06-01 init 127 def __init__(self, path):
44f6addd » Philip Dorrell 2008-06-04 pydocs, missing license and... 128 """Construct from path base directory"""
128f941f » Philip Dorrell 2008-06-25 Make unicode file names work. 129 self.path = unicode(path)
27d6c3a9 » Philip Dorrell 2008-06-01 init 130 self.pathSummaries = []
128f941f » Philip Dorrell 2008-06-25 Make unicode file names work. 131 self.summarizeSubDir(u"")
27d6c3a9 » Philip Dorrell 2008-06-01 init 132
44f6addd » Philip Dorrell 2008-06-04 pydocs, missing license and... 133 def createDirSummary(self, relativePath):
134 """Create a path summary for a sub-directory"""
27d6c3a9 » Philip Dorrell 2008-06-01 init 135 return DirSummary (relativePath)
136
44f6addd » Philip Dorrell 2008-06-04 pydocs, missing license and... 137 def createFileSummary(self, relativePath):
138 """Create a path summary for a file in the base directory"""
27d6c3a9 » Philip Dorrell 2008-06-01 init 139 fileName = self.path + relativePath
140 content = readFileBytes(fileName)
84b538f0 » Philip Dorrell 2008-06-06 implement verifyIncremental... 141 fileHash = sha1Digest(content)
142 return FileSummary (relativePath, fileHash)
27d6c3a9 » Philip Dorrell 2008-06-01 init 143
144 def addSummary(self, pathSummary):
44f6addd » Philip Dorrell 2008-06-04 pydocs, missing license and... 145 """Add a path summary"""
128f941f » Philip Dorrell 2008-06-25 Make unicode file names work. 146 print u"%r" % pathSummary
27d6c3a9 » Philip Dorrell 2008-06-01 init 147 self.pathSummaries.append (pathSummary)
148
149 def getPathSummariesYamlData(self):
44f6addd » Philip Dorrell 2008-06-04 pydocs, missing license and... 150 """Return array of path summaries as YAML data"""
27d6c3a9 » Philip Dorrell 2008-06-01 init 151 return [summary.toYamlData() for summary in self.pathSummaries]
152
153 def summarizeSubDir(self, relativePath):
44f6addd » Philip Dorrell 2008-06-04 pydocs, missing license and... 154 """Recursively summarize a sub-directory specified by it's relative path,
155 adding the path summaries for all contained files and sub-directories to the list of path summaries."""
27d6c3a9 » Philip Dorrell 2008-06-01 init 156 for childName in os.listdir(self.path + relativePath):
157 childRelativePath = relativePath + "/" + childName;
158 childPath = self.path + childRelativePath
159 if os.path.isfile(childPath):
44f6addd » Philip Dorrell 2008-06-04 pydocs, missing license and... 160 self.addSummary(self.createFileSummary(childRelativePath))
27d6c3a9 » Philip Dorrell 2008-06-01 init 161 elif os.path.isdir(childPath):
44f6addd » Philip Dorrell 2008-06-04 pydocs, missing license and... 162 self.addSummary(self.createDirSummary(childRelativePath))
27d6c3a9 » Philip Dorrell 2008-06-01 init 163 self.summarizeSubDir (childRelativePath)
164 else:
5a35cb97 » Philip Dorrell 2008-06-25 fix logging errors 165 print "UNKNOWN OBJECT %r in %r" % (childName, self.path + relativePath)
27d6c3a9 » Philip Dorrell 2008-06-01 init 166
167 class HashVerificationRecords(object):
44f6addd » Philip Dorrell 2008-06-04 pydocs, missing license and... 168 """Records of verified hashes of backed up files (i.e. verified by actually reading
169 the file content out of the backup map and recalculating the hash).
170 Note that this class is not yet used, and nothing is yet writing the verification records
171 into the backup map."""
172
27d6c3a9 » Philip Dorrell 2008-06-01 init 173 def __init__(self, backupMap):
174 self.backupMap = backupMap
175 self.datetimeFileHashesMap = {}
176 self.datetimeUpdated = Set()
177
84b538f0 » Philip Dorrell 2008-06-06 implement verifyIncremental... 178 def getFileHashesMap(self, datetime):
27d6c3a9 » Philip Dorrell 2008-06-01 init 179 if datetime in self.datetimeFileHashesMap:
180 fileHashesMap = self.datetimeFileHashesMap[datetime]
181 else:
84b538f0 » Philip Dorrell 2008-06-06 implement verifyIncremental... 182 fileHashesRecordFilename = datetime + "/verifiedFileHashes.yaml"
27d6c3a9 » Philip Dorrell 2008-06-01 init 183 if fileHashesRecordFilename in self.backupMap:
184 fileHashesMap = yaml.safe_load(self.backupMap[fileHashesRecordFilename])
185 else:
186 fileHashesMap = {}
187 self.datetimeFileHashesMap[datetime] = fileHashesMap
84b538f0 » Philip Dorrell 2008-06-06 implement verifyIncremental... 188 return fileHashesMap
189
fabed135 » Philip Dorrell 2008-06-28 compute digest before markV... 190 def markVerified(self, datetime, filePath, contentHash):
84b538f0 » Philip Dorrell 2008-06-06 implement verifyIncremental... 191 fileHashesMap = self.getFileHashesMap(datetime)
fabed135 » Philip Dorrell 2008-06-28 compute digest before markV... 192 fileHashesMap[filePath] = contentHash
84b538f0 » Philip Dorrell 2008-06-06 implement verifyIncremental... 193 self.datetimeUpdated.add (datetime)
194
195 def getWrittenFileHash(self, datetime, filePath):
196 """Get the hash of a backed up file, either from an existing hash verification record,
197 or, read the file contents from the backup map and calculate the hash."""
198 fileHashesMap = self.getFileHashesMap(datetime)
27d6c3a9 » Philip Dorrell 2008-06-01 init 199 if filePath in fileHashesMap:
200 return fileHashesMap[filePath]
201 else:
202 content = self.backupMap[datetime + "/files" + filePath]
fabed135 » Philip Dorrell 2008-06-28 compute digest before markV... 203 contentHash = sha1Digest(content)
204 self.markVerified(datetime, filePath, contentHash)
205 return contentHash
27d6c3a9 » Philip Dorrell 2008-06-01 init 206
84b538f0 » Philip Dorrell 2008-06-06 implement verifyIncremental... 207 def updateRecords(self):
44f6addd » Philip Dorrell 2008-06-04 pydocs, missing license and... 208 """Update any newly verified hashes back into the backup map."""
84b538f0 » Philip Dorrell 2008-06-06 implement verifyIncremental... 209 print "Verified hashes were updated for %r" % self.datetimeUpdated
27d6c3a9 » Philip Dorrell 2008-06-01 init 210 for datetime in self.datetimeUpdated:
84b538f0 » Philip Dorrell 2008-06-06 implement verifyIncremental... 211 fileHashesRecordFilename = datetime + "/verifiedFileHashes.yaml"
27d6c3a9 » Philip Dorrell 2008-06-01 init 212 print "Updating verification records for %s = %s" % (datetime,
213 self.datetimeFileHashesMap[datetime])
128f941f » Philip Dorrell 2008-06-25 Make unicode file names work. 214 self.backupMap[fileHashesRecordFilename] = yaml.safe_dump (self.datetimeFileHashesMap[datetime])
27d6c3a9 » Philip Dorrell 2008-06-01 init 215
216 class BackupRecord:
44f6addd » Philip Dorrell 2008-06-04 pydocs, missing license and... 217 """A record of a backup made: it's date/time, and whether it was full or incremental."""
7ad6bad4 » Philip Dorrell 2008-06-09 restore details for specifi... 218 def __init__(self, type, datetime, completed):
44f6addd » Philip Dorrell 2008-06-04 pydocs, missing license and... 219 """construct from 'full' or 'incremental' and the date time"""
27d6c3a9 » Philip Dorrell 2008-06-01 init 220 self.type = type
221 self.datetime = datetime
7ad6bad4 » Philip Dorrell 2008-06-09 restore details for specifi... 222 self.completed = completed
223
27d6c3a9 » Philip Dorrell 2008-06-01 init 224 @staticmethod
225 def fromYamlData(data):
44f6addd » Philip Dorrell 2008-06-04 pydocs, missing license and... 226 """Construct backup record from YAML data (inverse of toYamlData)"""
7ad6bad4 » Philip Dorrell 2008-06-09 restore details for specifi... 227 # completed defaults to True because previous version of keevalback only recorded when complete
228 return BackupRecord(data["type"], data["datetime"], data.get("completed", True))
27d6c3a9 » Philip Dorrell 2008-06-01 init 229
230 def toYamlData(self):
44f6addd » Philip Dorrell 2008-06-04 pydocs, missing license and... 231 """Convert to data to be stored in YAML"""
7ad6bad4 » Philip Dorrell 2008-06-09 restore details for specifi... 232 return {"type": self.type, "datetime": self.datetime, "completed": self.completed}
bce0bbdd » Philip Dorrell 2008-06-03 implement listBackups and p... 233
234 def isFull(self):
235 return self.type == "full"
27d6c3a9 » Philip Dorrell 2008-06-01 init 236
237 def __str__(self):
1342abd3 » Philip Dorrell 2008-06-17 record incomplete backup & ... 238 return "[Backup: %s %s %s]" % (self.type, self.datetime, self.completed and "complete" or "INCOMPLETE")
27d6c3a9 » Philip Dorrell 2008-06-01 init 239
240 def __repr__(self):
241 return self.__str__()
242
243 class WrittenRecords:
44f6addd » Philip Dorrell 2008-06-04 pydocs, missing license and... 244 """Records of where file contents with a given SHA1 hash value was written to in backup map
245 (within the context of a particular set of backups, i.e. a full and following incrementals)"""
27d6c3a9 » Philip Dorrell 2008-06-01 init 246 def __init__(self):
247 self.written = {}
248
249 def recordHashWritten(self, hash, key):
44f6addd » Philip Dorrell 2008-06-04 pydocs, missing license and... 250 """Record that a contents with a particular hash were written to a particular key"""
128f941f » Philip Dorrell 2008-06-25 Make unicode file names work. 251 print " record hash %s written to %r" % (hash, key)
27d6c3a9 » Philip Dorrell 2008-06-01 init 252 self.written[hash] = key
253
254 def isWritten(self, hash):
44f6addd » Philip Dorrell 2008-06-04 pydocs, missing license and... 255 """Has a file contents with this hash value been written to the backup map?"""
27d6c3a9 » Philip Dorrell 2008-06-01 init 256 return hash in self.written
257
258 def locationWritten(self, hash):
44f6addd » Philip Dorrell 2008-06-04 pydocs, missing license and... 259 """Where a file contents with this hash value was written to"""
27d6c3a9 » Philip Dorrell 2008-06-01 init 260 return self.written[hash]
261
262 def recordBackup(self, backupMap, backupRecord):
44f6addd » Philip Dorrell 2008-06-04 pydocs, missing license and... 263 """For every file contents in a backup record recorded as written, record it's
264 hash value and backup map key in the written records."""
27d6c3a9 » Philip Dorrell 2008-06-01 init 265 directoryInfoYamlData = yaml.safe_load (backupMap[backupRecord.datetime + "/pathList"])
266 for pathData in directoryInfoYamlData:
9505b90f » Philip Dorrell 2008-06-09 comment out logging 267 #print "Recording backup data %s/%r" % (backupRecord.datetime, pathData)
27d6c3a9 » Philip Dorrell 2008-06-01 init 268 if pathData["type"] == "file" and pathData["written"]:
269 self.recordHashWritten (pathData["hash"], backupRecord.datetime + pathData["path"])
270
271 def recordPreviousBackups(self, backupMap, backupRecords):
44f6addd » Philip Dorrell 2008-06-04 pydocs, missing license and... 272 """Record the hashes of all files written from the last full backup onwards (or from the first
273 backup if for some reason there is no full backup."""
27d6c3a9 » Philip Dorrell 2008-06-01 init 274 fullFound = False
275 i = len(backupRecords)-1
276 while not fullFound and i >= 0:
277 backupRecord = backupRecords[i]
278 print "Recording backup %r ..." % backupRecord
279 self.recordBackup(backupMap, backupRecord)
280 if backupRecord.type == "full":
281 fullFound = True
282 i -= 1
283
284 class BaseFileHash(object):
44f6addd » Philip Dorrell 2008-06-04 pydocs, missing license and... 285 """Description of a file: it's (basic) name and hash"""
84b538f0 » Philip Dorrell 2008-06-06 implement verifyIncremental... 286 def __init__(self, name, hash, description):
27d6c3a9 » Philip Dorrell 2008-06-01 init 287 self.name = name
288 self.hash = hash
84b538f0 » Philip Dorrell 2008-06-06 implement verifyIncremental... 289 self.description = description
290
291 def isDir(self):
292 return False
27d6c3a9 » Philip Dorrell 2008-06-01 init 293
294 def printIndented(self, indent):
5a35cb97 » Philip Dorrell 2008-06-25 fix logging errors 295 print "%sFile %r: %s" % (indent, self.name, self.hash)
84b538f0 » Philip Dorrell 2008-06-06 implement verifyIncremental... 296
297 def compareToOtherFileHash (self, otherFileHash, indent, log, logDiff):
298 if self.hash != otherFileHash.hash:
5a35cb97 » Philip Dorrell 2008-06-25 fix logging errors 299 self.logDiff ("File %r has hash %s in %r but hash %s in %r" %
84b538f0 » Philip Dorrell 2008-06-06 implement verifyIncremental... 300 self.name, self.hash, self.description,
301 otherFileHash.hash, otherFileHash.description)
302
27d6c3a9 » Philip Dorrell 2008-06-01 init 303 pathRegex = re.compile("[/]([^/]*)([/].*)?")
304
305 def analysePath(path):
44f6addd » Philip Dorrell 2008-06-04 pydocs, missing license and... 306 """Analyse a path starting with '/' and with '/' separators into 1st part and remainder
307 e.g. '/x/y' into 'x' and '/y' and '/x' into 'x' and None."""
27d6c3a9 » Philip Dorrell 2008-06-01 init 308 pathMatch = pathRegex.match(path)
309 rootPath = pathMatch.group(1)
310 remainderPath = pathMatch.group(2)
311 return (rootPath, remainderPath)
312
313 class BaseDirHash(object):
44f6addd » Philip Dorrell 2008-06-04 pydocs, missing license and... 314 """Description of a directory as a map of immediate sub-directories
315 and immediately contained files"""
84b538f0 » Philip Dorrell 2008-06-06 implement verifyIncremental... 316 def __init__(self, name, description):
27d6c3a9 » Philip Dorrell 2008-06-01 init 317 self.name = name
318 self.children = []
319 self.childrenMap = {}
84b538f0 » Philip Dorrell 2008-06-06 implement verifyIncremental... 320 self.description = description
27d6c3a9 » Philip Dorrell 2008-06-01 init 321
84b538f0 » Philip Dorrell 2008-06-06 implement verifyIncremental... 322 def isDir(self):
323 return True
324
27d6c3a9 » Philip Dorrell 2008-06-01 init 325 def addChild(self, childHash):
44f6addd » Philip Dorrell 2008-06-04 pydocs, missing license and... 326 """Add a child, i.e. a directory or file"""
27d6c3a9 » Philip Dorrell 2008-06-01 init 327 self.children.append (childHash)
328 self.childrenMap[childHash.name] = childHash
329
84b538f0 » Philip Dorrell 2008-06-06 implement verifyIncremental... 330 def hasChildNamed(self, childName):
331 return childName in self.childrenMap
332
27d6c3a9 » Philip Dorrell 2008-06-01 init 333 def printIndented(self, indent = ""):
5a35cb97 » Philip Dorrell 2008-06-25 fix logging errors 334 print "%sDir %r" % (indent, self.name)
27d6c3a9 » Philip Dorrell 2008-06-01 init 335 childIndent = " " + indent
336 for child in self.children:
337 child.printIndented(indent = childIndent)
338
339 def addFileSummary(self, path, hash):
44f6addd » Philip Dorrell 2008-06-04 pydocs, missing license and... 340 """Add a file given it's full path name relative to this directory
341 (necessarily constructing the intermediate sub-directories if they
342 are not already there)"""
27d6c3a9 » Philip Dorrell 2008-06-01 init 343 rootPath, remainderPath = analysePath(path)
344 if remainderPath is None:
84b538f0 » Philip Dorrell 2008-06-06 implement verifyIncremental... 345 self.addChild (BaseFileHash(rootPath, hash, self.description))
27d6c3a9 » Philip Dorrell 2008-06-01 init 346 else:
347 childDirHash = self.getOrCreateChildDirHash(rootPath)
348 childDirHash.addFileSummary (remainderPath, hash)
349
350 def getOrCreateChildDirHash(self, name):
44f6addd » Philip Dorrell 2008-06-04 pydocs, missing license and... 351 """Return DirHash for an immediate sub-directory, creating it if necessary"""
27d6c3a9 » Philip Dorrell 2008-06-01 init 352 if name in self.childrenMap:
353 return self.childrenMap[name]
354 else:
84b538f0 » Philip Dorrell 2008-06-06 implement verifyIncremental... 355 childDirHash = BaseDirHash(name, self.description)
27d6c3a9 » Philip Dorrell 2008-06-01 init 356 self.addChild(childDirHash)
357 return childDirHash
358
359 def addDirSummary(self, path):
44f6addd » Philip Dorrell 2008-06-04 pydocs, missing license and... 360 """Add a sub-directory given it's full path name relative to this directory
361 (necessarily constructing the intermediate sub-directories if they
362 are not already there)"""
27d6c3a9 » Philip Dorrell 2008-06-01 init 363 rootPath, remainderPath = analysePath(path)
364 if remainderPath is None:
84b538f0 » Philip Dorrell 2008-06-06 implement verifyIncremental... 365 self.addChild (BaseDirHash(rootPath, self.description))
27d6c3a9 » Philip Dorrell 2008-06-01 init 366 else:
367 childDirHash = self.getOrCreateChildDirHash(rootPath)
368 childDirHash.addDirSummary (remainderPath)
84b538f0 » Philip Dorrell 2008-06-06 implement verifyIncremental... 369
370 def compareToOtherDirHash(self, otherDirHash, indent, log, logDiff):
5a35cb97 » Philip Dorrell 2008-06-25 fix logging errors 371 log (indent, "comparing directory %r" % self.name)
84b538f0 » Philip Dorrell 2008-06-06 implement verifyIncremental... 372 for child1 in self.children:
373 name1 = child1.name
374 child2 = otherDirHash.childrenMap.get(name1, None)
375 if child1.isDir():
376 if child2 != None:
377 if not child2.isDir():
5a35cb97 » Philip Dorrell 2008-06-25 fix logging errors 378 logDiff ("%r is a directory in %r but a file in %r" %
379 (name1, self.description, otherDirHash.description))
84b538f0 » Philip Dorrell 2008-06-06 implement verifyIncremental... 380 else:
381 child1.compareToOtherDirHash (child2, indent+1, log, logDiff)
382 else:
5a35cb97 » Philip Dorrell 2008-06-25 fix logging errors 383 logDiff("%r is a directory in %r but does not exist in %r" %
384 (name1, self.description, otherDirHash.description))
84b538f0 » Philip Dorrell 2008-06-06 implement verifyIncremental... 385 else:
386 if child2 != None:
387 if child2.isDir():
5a35cb97 » Philip Dorrell 2008-06-25 fix logging errors 388 logDiff("%r is a file in %r but a directory in %r" %
389 (name1, self.description, otherDirHash.description))
84b538f0 » Philip Dorrell 2008-06-06 implement verifyIncremental... 390 else:
391 child1.compareToOtherFileHash (child2, indent+1, log, logDiff)
392 else:
5a35cb97 » Philip Dorrell 2008-06-25 fix logging errors 393 logDiff("%r is a file in %r but does not exist in %r" %
394 (name1, self.description, otherDirHash.description))
84b538f0 » Philip Dorrell 2008-06-06 implement verifyIncremental... 395 for child2 in otherDirHash.children:
396 if not self.hasChildNamed (child2.name):
397 if child2.isDir():
5a35cb97 » Philip Dorrell 2008-06-25 fix logging errors 398 logDiff("%r does not exist in %r but is a directory in %r" %
399 (child2.name, self.description, otherDirHash.description))
84b538f0 » Philip Dorrell 2008-06-06 implement verifyIncremental... 400 else:
5a35cb97 » Philip Dorrell 2008-06-25 fix logging errors 401 logDiff("%r does not exist in %r but is a file in %r" %
402 (child2.name, self.description, otherDirHash.description))
27d6c3a9 » Philip Dorrell 2008-06-01 init 403
404 class FileHash(BaseFileHash):
44f6addd » Philip Dorrell 2008-06-04 pydocs, missing license and... 405 """Information about a file with a relative path name based on actual
406 contents of actual file in actual file-system base directory"""
84b538f0 » Philip Dorrell 2008-06-06 implement verifyIncremental... 407 def __init__(self, dir, name, description):
27d6c3a9 » Philip Dorrell 2008-06-01 init 408 filename = dir + "/" + name
409 content = readFileBytes (filename)
84b538f0 » Philip Dorrell 2008-06-06 implement verifyIncremental... 410 super(FileHash, self).__init__(name, sha1Digest(content), description)
27d6c3a9 » Philip Dorrell 2008-06-01 init 411
412 class DirHash(BaseDirHash):
44f6addd » Philip Dorrell 2008-06-04 pydocs, missing license and... 413 """Information about files within a directory with a relative path name
414 based on actual contents of actual directory in actual file-system base directory"""
84b538f0 » Philip Dorrell 2008-06-06 implement verifyIncremental... 415 def __init__(self, dir, name, description):
416 super(DirHash, self).__init__(name, description)
5d9cfcc7 » Philip Dorrell 2008-06-26 use unicode for DirHash bas... 417 fullPath = unicode (name and (dir + "/" + name) or dir)
27d6c3a9 » Philip Dorrell 2008-06-01 init 418 for childName in os.listdir(fullPath):
419 childPath = fullPath + "/" + childName
420 if os.path.isfile(childPath):
84b538f0 » Philip Dorrell 2008-06-06 implement verifyIncremental... 421 self.addChild (FileHash(fullPath, childName, self.description))
27d6c3a9 » Philip Dorrell 2008-06-01 init 422 else:
84b538f0 » Philip Dorrell 2008-06-06 implement verifyIncremental... 423 self.addChild (DirHash(fullPath, childName, self.description))
27d6c3a9 » Philip Dorrell 2008-06-01 init 424
425 class ContentKey(object):
426 def __init__(self, datetime, filePath):
44f6addd » Philip Dorrell 2008-06-04 pydocs, missing license and... 427 """Parameters for key used to look up file contents from a particular backup within a backup map.
428 Note that filePath is expected to start with a '/'"""
27d6c3a9 » Philip Dorrell 2008-06-01 init 429 self.datetime = datetime
430 self.filePath = filePath
431
432 def fileKey(self):
44f6addd » Philip Dorrell 2008-06-04 pydocs, missing license and... 433 """The actual key.
434 Note: "/files" infix is used to allow for other meta-data to be associated with the datetime."""
27d6c3a9 » Philip Dorrell 2008-06-01 init 435 return self.datetime + "/files" + self.filePath
84b538f0 » Philip Dorrell 2008-06-06 implement verifyIncremental... 436
437 def __str__(self):
128f941f » Philip Dorrell 2008-06-25 Make unicode file names work. 438 return "[%s:%r]" % (self.datetime, self.filePath)
3ae44c66 » Philip Dorrell 2008-06-09 ContentKey.__repr__ 439
440 def __repr__(self):
441 return self.__str__()
1fc8bbfb » Philip Dorrell 2008-06-19 add BackupRecordUpdater 442
443 class BackupRecordUpdater:
444 """Object responsible for recording current state of backup in progress"""
10f09148 » Philip Dorrell 2008-06-19 record backup state if more... 445 def __init__(self, backups, backupRecords, currentBackupRecord, backupKeyBase,
c566e98f » Philip Dorrell 2008-06-25 recordTrigger 446 directoryInfo, recordTrigger = 1000000):
1fc8bbfb » Philip Dorrell 2008-06-19 add BackupRecordUpdater 447 self.backups = backups
448 self.backupRecords = backupRecords
449 self.currentBackupRecord = currentBackupRecord
450 self.backupKeyBase = backupKeyBase
451 self.directoryInfo = directoryInfo
10f09148 » Philip Dorrell 2008-06-19 record backup state if more... 452 self.bytesWritten = 0
453 self.unrecordedBytes = 0
454 self.recordTrigger = recordTrigger
1fc8bbfb » Philip Dorrell 2008-06-19 add BackupRecordUpdater 455
59aa8ae0 » Philip Dorrell 2008-06-28 checkpointing 456 def checkpoint(self):
1fc8bbfb » Philip Dorrell 2008-06-19 add BackupRecordUpdater 457 self.backups.recordPathSummaries (self.backupKeyBase, self.directoryInfo)
59aa8ae0 » Philip Dorrell 2008-06-28 checkpointing 458
459 def record(self):
460 self.checkpoint()
1fc8bbfb » Philip Dorrell 2008-06-19 add BackupRecordUpdater 461 self.backups.saveBackupRecords(self.backupRecords)
462
10f09148 » Philip Dorrell 2008-06-19 record backup state if more... 463 def recordContentWrittenSize(self, contentWrittenSize):
464 self.bytesWritten += contentWrittenSize
465 print " wrote %d bytes, total now %d ..." % (contentWrittenSize, self.bytesWritten)
466 self.unrecordedBytes += contentWrittenSize
467 if self.unrecordedBytes >= self.recordTrigger:
468 print " unrecordedBytes = %d, recording backup state ..." % self.unrecordedBytes
59aa8ae0 » Philip Dorrell 2008-06-28 checkpointing 469 self.checkpoint()
10f09148 » Philip Dorrell 2008-06-19 record backup state if more... 470 self.unrecordedBytes = 0
471
1fc8bbfb » Philip Dorrell 2008-06-19 add BackupRecordUpdater 472 def recordCompleted(self):
473 self.currentBackupRecord.completed = True
474 self.record()
475
6c761b4b » Philip Dorrell 2008-06-28 TaskRunner 476 class TaskRunner:
477 """Simple task runner: runs both parts of tasks synchronously"""
59aa8ae0 » Philip Dorrell 2008-06-28 checkpointing 478 def __init__(self, checkpointFreq = None):
479 self.checkpointFreq = checkpointFreq
480
1602e1b5 » Philip Dorrell 2008-06-28 BackupFileTask 481 def runTasks(self, tasks, checkpointTask = None):
6c761b4b » Philip Dorrell 2008-06-28 TaskRunner 482 for task in tasks:
483 task.doUnsynchronized()
484 for task in tasks:
485 task.doSynchronized()
3fd43cb5 » Philip Dorrell 2008-06-29 using ThreadedTaskRunner af... 486
487 #taskRunner = TaskRunner(checkpointFreq = 20)
488
489 from ThreadedTaskRunner import ThreadedTaskRunner
490
b2451d80 » Philip Dorrell 2008-06-29 make deleteMapValues multi-... 491 taskRunner = ThreadedTaskRunner (numThreads = 30, clonedAttributes = ["backupMap"])
492
493 class DeleteBackupMapValueTask:
494 def __init__(self, backupMap, key):
495 self.backupMap = backupMap
496 self.key = key
6c761b4b » Philip Dorrell 2008-06-28 TaskRunner 497
b2451d80 » Philip Dorrell 2008-06-29 make deleteMapValues multi-... 498 def doUnsynchronized(self):
499 print " delete %r ..." % self.key
500 del self.backupMap[self.key]
501
502 def doSynchronized(self):
503 pass
504
505 def deleteMapValues(backupMap, dryRun):
506 """Delete all keys from a map, or if dryRun is True, do a dry run"""
507 print "%sDeleting keys from map %s" % (dryRun and "DRYRUN: " or "", backupMap)
508 deleteTasks = []
509 for key in backupMap:
510 if dryRun:
511 print " delete %r ..." % key
512 else:
513 deleteTasks.append (DeleteBackupMapValueTask(backupMap, key))
514 if not dryRun:
515 taskRunner.runTasks (deleteTasks)
516 print "finished."
517
27d6c3a9 » Philip Dorrell 2008-06-01 init 518 class IncrementalBackups:
44f6addd » Philip Dorrell 2008-06-04 pydocs, missing license and... 519 """A set of dated full or incremental backups within a given backup map.
520 This object does _not_ (currently) record _where_ the file contents came from.
521 """
c566e98f » Philip Dorrell 2008-06-25 recordTrigger 522 def __init__(self, backupMap, recordTrigger = 10000000):
27d6c3a9 » Philip Dorrell 2008-06-01 init 523 self.backupMap = backupMap
c566e98f » Philip Dorrell 2008-06-25 recordTrigger 524 self.recordTrigger = recordTrigger
27d6c3a9 » Philip Dorrell 2008-06-01 init 525
526 def getDateTimeString(self):
44f6addd » Philip Dorrell 2008-06-04 pydocs, missing license and... 527 """Get a date time string to use for a new dated backup"""
27d6c3a9 » Philip Dorrell 2008-06-01 init 528 return time.strftime("%Y-%b-%d.%H-%M-%S")
529
530 def getBackupRecords(self):
44f6addd » Philip Dorrell 2008-06-04 pydocs, missing license and... 531 """Retrieve the BackupRecord objects describing any existing backups"""
27d6c3a9 » Philip Dorrell 2008-06-01 init 532 if "backupRecords" in self.backupMap:
533 backupsListYamlData = yaml.safe_load(self.backupMap["backupRecords"])
534 else:
535 backupsListYamlData = []
536 return [BackupRecord.fromYamlData(record) for record in backupsListYamlData]
bce0bbdd » Philip Dorrell 2008-06-03 implement listBackups and p... 537
1342abd3 » Philip Dorrell 2008-06-17 record incomplete backup & ... 538 def saveBackupRecords(self, backupRecords):
84b538f0 » Philip Dorrell 2008-06-06 implement verifyIncremental... 539 backupRecordsYamlData = [record.toYamlData() for record in backupRecords]
128f941f » Philip Dorrell 2008-06-25 Make unicode file names work. 540 self.backupMap["backupRecords"] = yaml.safe_dump(backupRecordsYamlData)
84b538f0 » Philip Dorrell 2008-06-06 implement verifyIncremental... 541 print "new backup records = %r" % backupRecords
542
bce0bbdd » Philip Dorrell 2008-06-03 implement listBackups and p... 543 def getBackupGroups(self):
44f6addd » Philip Dorrell 2008-06-04 pydocs, missing license and... 544 """Get backup groups, i.e. backup records grouped into lists of incremental backups with a preceding
545 full backup."""
bce0bbdd » Philip Dorrell 2008-06-03 implement listBackups and p... 546 backupGroups = []
547 records = self.getBackupRecords()
548 currentBackupGroup = []
549 for i, record in enumerate(records):
550 if record.isFull() or i == 0:
551 currentBackupGroup = [record]
552 backupGroups.append (currentBackupGroup)
553 else:
554 currentBackupGroup.append(record)
555 return backupGroups
556
557 def listBackups(self):
44f6addd » Philip Dorrell 2008-06-04 pydocs, missing license and... 558 """Print out list of all backups"""
bce0bbdd » Philip Dorrell 2008-06-03 implement listBackups and p... 559 recordGroups = self.getBackupGroups()
560 for recordGroup in recordGroups:
561 for i, record in enumerate(recordGroup):
562 if i == 0:
563 indent = "*"
564 else:
565 indent = " "
1342abd3 » Philip Dorrell 2008-06-17 record incomplete backup & ... 566 print "%s%s: %s %s" % (indent, record.type, record.datetime,
567 record.completed and "complete" or "INCOMPLETE")
bce0bbdd » Philip Dorrell 2008-06-03 implement listBackups and p... 568
569 def pruneBackup(self, backupRecord, dryRun):
44f6addd » Philip Dorrell 2008-06-04 pydocs, missing license and... 570 """Prune the backup indicated by the backup record (with dry-run option)"""
bce0bbdd » Philip Dorrell 2008-06-03 implement listBackups and p... 571 print " prune backup %r" % backupRecord
572 backupSubMap = self.backupMap.subMap(backupRecord.datetime)
573 deleteMapValues(backupSubMap, dryRun)
574
575 def pruneBackupGroup(self, recordGroup, dryRun):
44f6addd » Philip Dorrell 2008-06-04 pydocs, missing license and... 576 """Prune all backups in a backup group (with dry-run option)"""
bce0bbdd » Philip Dorrell 2008-06-03 implement listBackups and p... 577 print "Backup group to prune: %r" % recordGroup
578 for record in recordGroup:
579 self.pruneBackup(record, dryRun)
580
581 def pruneBackups(self, keep = 1, dryRun = True):
44f6addd » Philip Dorrell 2008-06-04 pydocs, missing license and... 582 """Prune previous backup groups, keeping only specified number of most
583 recent backup groups (but at least one)"""
bce0bbdd » Philip Dorrell 2008-06-03 implement listBackups and p... 584 print "Pruning backups, keep %d%s" % (keep, dryRun and ", DRY RUN" or "")
585 if keep < 1:
586 raise Exception ("Number of full backups to keep must be at least 1")
587 recordGroups = self.getBackupGroups()
588 if keep >= len(recordGroups):
589 print "Only %d full backups, and %d specified to keep, so none will be pruned" % (len(recordGroups), keep)
590 else:
591 numToPrune = len(recordGroups) - keep
592 groupsToPrune = recordGroups[:numToPrune]
593 for recordGroup in groupsToPrune:
594 self.pruneBackupGroup(recordGroup, dryRun = dryRun)
595 if not dryRun:
596 remainingGroups = recordGroups[numToPrune:]
597 remainingRecords = []
598 for group in remainingGroups:
599 remainingRecords += group
1342abd3 » Philip Dorrell 2008-06-17 record incomplete backup & ... 600 self.saveBackupRecords(remainingRecords)
7d6fee15 » Philip Dorrell 2008-06-09 recordPathSummaries method 601
602 def recordPathSummaries(self, backupKeyBase, directoryInfo):
128f941f » Philip Dorrell 2008-06-25 Make unicode file names work. 603 self.backupMap[backupKeyBase + "/pathList"] = yaml.safe_dump(directoryInfo.getPathSummariesYamlData())
27d6c3a9 » Philip Dorrell 2008-06-01 init 604
1602e1b5 » Philip Dorrell 2008-06-28 BackupFileTask 605 class BackupFileTask:
606 def __init__(self, backupMap, backupFilesKeyBase, pathSummary, fileName, writtenRecords):
607 self.backupMap = backupMap
608 self.backupFilesKeyBase = backupFilesKeyBase
609 self.pathSummary = pathSummary
610 self.fileName = fileName
611 self.writtenRecords = writtenRecords
612
613 def doUnsynchronized(self):
614 content = readFileBytes(self.fileName)
615 self.fileContentKey = self.backupFilesKeyBase + self.pathSummary.relativePath
616 print "Writing %r ..." % self.fileContentKey
617 self.pathSummary.written = True
618 self.backupMap[self.fileContentKey] = content
619
620 def doSynchronized(self):
621 self.writtenRecords.recordHashWritten (self.pathSummary.hash, self.fileContentKey)
59aa8ae0 » Philip Dorrell 2008-06-28 checkpointing 622
27d6c3a9 » Philip Dorrell 2008-06-01 init 623 def doBackup(self, directoryInfo, full = True):
44f6addd » Philip Dorrell 2008-06-04 pydocs, missing license and... 624 """Create a new backup of a source directory (full or incremental).
625 Note: 'incremental' is based on comparing the hashes of file contents already marked as
626 written to previous backups in the same backup group. It is not based on any comparison
627 of files done on the source computer. If a given file contents has already been written,
628 then the relevant file written as a pointer to the previous file with the same contents
629 (which may or may not be the same file in the same place on the source computer).
630 """
27d6c3a9 » Philip Dorrell 2008-06-01 init 631 dateTimeString = self.getDateTimeString()
632 backupKeyBase = dateTimeString
633 backupFilesKeyBase = backupKeyBase + "/files"
7ad6bad4 » Philip Dorrell 2008-06-09 restore details for specifi... 634 print "retrieving existing backup records ..."
27d6c3a9 » Philip Dorrell 2008-06-01 init 635 backupRecords = self.getBackupRecords()
636 print "backup records = %r" % backupRecords
1342abd3 » Philip Dorrell 2008-06-17 record incomplete backup & ... 637 currentBackupRecord = BackupRecord(full and "full" or "incremental", dateTimeString, completed = False)
638 backupRecords.append(currentBackupRecord)
1fc8bbfb » Philip Dorrell 2008-06-19 add BackupRecordUpdater 639 backupRecordUpdater = BackupRecordUpdater (self, backupRecords, currentBackupRecord,
c566e98f » Philip Dorrell 2008-06-25 recordTrigger 640 backupKeyBase, directoryInfo, recordTrigger = self.recordTrigger)
1fc8bbfb » Philip Dorrell 2008-06-19 add BackupRecordUpdater 641 backupRecordUpdater.record()
27d6c3a9 » Philip Dorrell 2008-06-01 init 642 writtenRecords = WrittenRecords()
643 if not full:
644 if len(backupRecords) == 0:
645 full = True
646 print "No previous records, so backup will be FULL anyway"
647 else:
648 writtenRecords.recordPreviousBackups (self.backupMap, backupRecords)
1602e1b5 » Philip Dorrell 2008-06-28 BackupFileTask 649 backupFileTasks = []
27d6c3a9 » Philip Dorrell 2008-06-01 init 650 for pathSummary in directoryInfo.pathSummaries:
651 if not pathSummary.isDir:
652 fileName = pathSummary.fullPath(directoryInfo.path)
653 if not writtenRecords.isWritten(pathSummary.hash):
1602e1b5 » Philip Dorrell 2008-06-28 BackupFileTask 654 backupFileTask = IncrementalBackups.BackupFileTask(self.backupMap, backupFilesKeyBase,
655 pathSummary, fileName, writtenRecords)
656 backupFileTasks.append (backupFileTask)
27d6c3a9 » Philip Dorrell 2008-06-01 init 657 else:
272f658e » Philip Dorrell 2008-06-25 log unicode location writte... 658 print "Content of %r already written to %r" % (pathSummary,
27d6c3a9 » Philip Dorrell 2008-06-01 init 659 writtenRecords.locationWritten (pathSummary.hash))
3fd43cb5 » Philip Dorrell 2008-06-29 using ThreadedTaskRunner af... 660 taskRunner.runTasks (backupFileTasks, checkpointTask = backupRecordUpdater)
1fc8bbfb » Philip Dorrell 2008-06-19 add BackupRecordUpdater 661 backupRecordUpdater.recordCompleted()
27d6c3a9 » Philip Dorrell 2008-06-01 init 662
663 def doFullBackup(self, directoryInfo):
44f6addd » Philip Dorrell 2008-06-04 pydocs, missing license and... 664 """Do a full backup of a source directory"""
27d6c3a9 » Philip Dorrell 2008-06-01 init 665 self.doBackup (directoryInfo, full = True)
666
667 def doIncrementalBackup(self, directoryInfo):
44f6addd » Philip Dorrell 2008-06-04 pydocs, missing license and... 668 """Do an incremental backup of a source directory"""
27d6c3a9 » Philip Dorrell 2008-06-01 init 669 self.doBackup (directoryInfo, full = False)
670
7ad6bad4 » Philip Dorrell 2008-06-09 restore details for specifi... 671 def getBackupRecordForDateTime(self, backupRecords, dateTimeString):
672 for index, backupRecord in enumerate(backupRecords):
673 if backupRecord.datetime == dateTimeString:
674 return index
675 raise "No backup record found for date-time %r" % dateTimeString
676
677 def getRestoreRecords(self, backupRecords, dateTimeString):
44f6addd » Philip Dorrell 2008-06-04 pydocs, missing license and... 678 """Return records for the most recent backup group"""
7ad6bad4 » Philip Dorrell 2008-06-09 restore details for specifi... 679 if dateTimeString is None:
680 restorePos = len(backupRecords)-1
681 else:
682 restorePos = self.getBackupRecordForDateTime (backupRecords, dateTimeString)
683 pos = restorePos
27d6c3a9 » Philip Dorrell 2008-06-01 init 684 while pos >= 0 and backupRecords[pos].type != "full":
685 pos -= 1
7ad6bad4 » Philip Dorrell 2008-06-09 restore details for specifi... 686 return backupRecords[pos:(restorePos+1)]
27d6c3a9 » Philip Dorrell 2008-06-01 init 687
688 def getPathSummaryDataList(self, backupRecord):
44f6addd » Philip Dorrell 2008-06-04 pydocs, missing license and... 689 """Get YAML data representing information about files and directories backed up
690 in a specified dated backup"""
27d6c3a9 » Philip Dorrell 2008-06-01 init 691 dateTimeString = backupRecord.datetime
692 backupKeyBase = dateTimeString
a2332fa5 » Philip Dorrell 2008-06-28 extra logging 693 print "getPathSummaryDataList for %r ..." % backupRecord
27d6c3a9 » Philip Dorrell 2008-06-01 init 694 pathSummariesData = yaml.safe_load(self.backupMap[backupKeyBase + "/pathList"])
695 return pathSummariesData
696
697 def getHashContentKeyMap(self, restoreRecords, pathSummaryLists):
44f6addd » Philip Dorrell 2008-06-04 pydocs, missing license and... 698 """Construct a map from hash keys to the backup keys to which those file contents
699 were written (within the given backup group which is being restored from)"""
27d6c3a9 » Philip Dorrell 2008-06-01 init 700 hashContentKeyMap = {}
701 for restoreRecord, pathSummaryList in zip(restoreRecords, pathSummaryLists):
702 for pathSummary in pathSummaryList:
703 if pathSummary.isFile and pathSummary.written:
704 hashContentKeyMap[pathSummary.hash] = ContentKey(restoreRecord.datetime, pathSummary.relativePath)
705 return hashContentKeyMap
706
03b64b81 » Philip Dorrell 2008-06-28 RestoreFileTask 707 class RestoreFileTask:
708 def __init__(self, backupMap, contentKey, fullPath, updateVerificationRecords, verificationRecords, overwrite):
709 self.backupMap = backupMap
710 self.contentKey = contentKey
711 self.fullPath = fullPath
712 self.updateVerificationRecords = updateVerificationRecords
713 self.verificationRecords = verificationRecords
714 self.overwrite = overwrite
715
716 def doUnsynchronized(self):
717 content = self.backupMap[self.contentKey.fileKey()]
718 if os.path.exists(self.fullPath) and self.overwrite:
719 os.remove (self.fullPath)
720 writeFileBytes(self.fullPath, content)
721 if self.updateVerificationRecords:
722 self.contentHash = sha1Digest(content)
723 print "Restored FILE %r" % self.fullPath
724
725 def doSynchronized(self):
726 if self.updateVerificationRecords:
727 self.verificationRecords.markVerified (self.contentKey.datetime,
728 self.contentKey.filePath, self.contentHash)
729 print "Mark verified FILE %r" % self.fullPath
730
84b538f0 » Philip Dorrell 2008-06-06 implement verifyIncremental... 731 def restoreDirectory(self, restoreDir, pathSummaryList, hashContentKeyMap, overwrite,
732 updateVerificationRecords = False):
44f6addd » Philip Dorrell 2008-06-04 pydocs, missing license and... 733 """Restore a directory using path summaries and hash content key map, with optional overwrite"""
5a35cb97 » Philip Dorrell 2008-06-25 fix logging errors 734 print "Restoring directory %r ..." % restoreDir
84b538f0 » Philip Dorrell 2008-06-06 implement verifyIncremental... 735 if updateVerificationRecords:
736 verificationRecords = HashVerificationRecords(self.backupMap)
d7210450 » Philip Dorrell 2008-06-28 batch the restoreFileTasks ... 737 restoreFileTasks = []
27d6c3a9 » Philip Dorrell 2008-06-01 init 738 for pathSummary in pathSummaryList:
739 fullPath = pathSummary.fullPath (restoreDir)
740 if pathSummary.isDir:
741 if not os.path.isdir(fullPath):
742 os.makedirs(fullPath)
5a35cb97 » Philip Dorrell 2008-06-25 fix logging errors 743 print "Restored DIR %r" % fullPath
27d6c3a9 » Philip Dorrell 2008-06-01 init 744 elif pathSummary.isFile:
745 if not pathSummary.hash in hashContentKeyMap:
5a35cb97 » Philip Dorrell 2008-06-25 fix logging errors 746 print "WARNING: No written content found for %r (hash %s)" % (pathSummary.relativePath,
27d6c3a9 » Philip Dorrell 2008-06-01 init 747 pathSummary.hash)
84b538f0 » Philip Dorrell 2008-06-06 implement verifyIncremental... 748 contentKey = hashContentKeyMap[pathSummary.hash]
d7210450 » Philip Dorrell 2008-06-28 batch the restoreFileTasks ... 749 restoreFileTasks.append (IncrementalBackups.RestoreFileTask (self.backupMap, contentKey,
750 fullPath, updateVerificationRecords,
751 verificationRecords, overwrite))
27d6c3a9 » Philip Dorrell 2008-06-01 init 752 else:
753 print "WARNING: Unknown path type %r" % pathSummary
3fd43cb5 » Philip Dorrell 2008-06-29 using ThreadedTaskRunner af... 754 taskRunner.runTasks (restoreFileTasks)
84b538f0 » Philip Dorrell 2008-06-06 implement verifyIncremental... 755 if updateVerificationRecords:
756 verificationRecords.updateRecords()
757
7ad6bad4 » Philip Dorrell 2008-06-09 restore details for specifi... 758 def getRestoreDetails(self, dateTimeString):
27d6c3a9 » Philip Dorrell 2008-06-01 init 759 backupRecords = self.getBackupRecords()
760 print "backupRecords = %r" % backupRecords
761 if len(backupRecords) == 0:
762 raise "No backup records found"
f704f927 » Philip Dorrell 2008-06-17 brackets to print correct m... 763 print "Get restore records for %s" % (dateTimeString or "(most recent backup)")
7ad6bad4 » Philip Dorrell 2008-06-09 restore details for specifi... 764 restoreRecords = self.getRestoreRecords(backupRecords, dateTimeString)
27d6c3a9 » Philip Dorrell 2008-06-01 init 765 print "restoreRecords = %r" % restoreRecords
766 pathSummaryDataLists = [self.getPathSummaryDataList(record) for record in restoreRecords]
a2332fa5 » Philip Dorrell 2008-06-28 extra logging 767 print "parsing pathSummaryLists from YAML data ..."
27d6c3a9 » Philip Dorrell 2008-06-01 init 768 pathSummaryLists = [[PathSummary.fromYamlData(pathSummaryData) for pathSummaryData in pathSummaryDataList]
769 for pathSummaryDataList in pathSummaryDataLists]
a2332fa5 » Philip Dorrell 2008-06-28 extra logging 770 print "calculating hashContentKeyMap ..."
27d6c3a9 » Philip Dorrell 2008-06-01 init 771 hashContentKeyMap = self.getHashContentKeyMap(restoreRecords, pathSummaryLists)
772 print "hashContentKeyMap = %r" % hashContentKeyMap
773 backupToRestore = restoreRecords[-1]
84b538f0 » Philip Dorrell 2008-06-06 implement verifyIncremental... 774 print "Target backup for restore: %r" % backupToRestore
27d6c3a9 » Philip Dorrell 2008-06-01 init 775 pathSummaryListToRestore = pathSummaryLists[-1]
7ad6bad4 » Philip Dorrell 2008-06-09 restore details for specifi... 776 return pathSummaryListToRestore, hashContentKeyMap, backupToRestore
84b538f0 » Philip Dorrell 2008-06-06 implement verifyIncremental... 777
7ad6bad4 » Philip Dorrell 2008-06-09 restore details for specifi... 778 def getRestoredDirHash(self, dateTimeString = None):
779 pathSummaryList, hashContentKeyMap, backupToRestore = self.getRestoreDetails(dateTimeString)
84b538f0 » Philip Dorrell 2008-06-06 implement verifyIncremental... 780 verificationRecords = HashVerificationRecords(self.backupMap)
781 restoredDirHash = BaseDirHash(None, "backed up files")
782 for pathSummary in pathSummaryList:
783 if pathSummary.isDir:
784 restoredDirHash.addDirSummary(pathSummary.relativePath)
5a35cb97 » Philip Dorrell 2008-06-25 fix logging errors 785 print " DIR %r" % pathSummary.relativePath
84b538f0 » Philip Dorrell 2008-06-06 implement verifyIncremental... 786 elif pathSummary.isFile:
787 contentKey = hashContentKeyMap[pathSummary.hash]
788 # We could compare pathSummary.hash and fileHash,
789 # but the verified fileHash is what matters (to compare to local file)
790 fileHash = verificationRecords.getWrittenFileHash(contentKey.datetime, contentKey.filePath)
791 restoredDirHash.addFileSummary(pathSummary.relativePath, fileHash)
5a35cb97 » Philip Dorrell 2008-06-25 fix logging errors 792 print " FILE %r" % pathSummary.relativePath
84b538f0 » Philip Dorrell 2008-06-06 implement verifyIncremental... 793 else:
794 print "WARNING: Unknown path type %r" % pathSummary
795 verificationRecords.updateRecords()
796 return restoredDirHash
797
798 def incrementalVerify(self, sourceDir):
799 """Incrementally verify a directory using path summaries and hash content key map, with optional overwrite"""
5a35cb97 » Philip Dorrell 2008-06-25 fix logging errors 800 print "Incrementally verifying against directory %r ..." % sourceDir
84b538f0 » Philip Dorrell 2008-06-06 implement verifyIncremental... 801 restoredDirHash = self.getRestoredDirHash()
802 print "RESTORE DIR HASH:"
803 restoredDirHash.printIndented()
804 print ""
5a35cb97 » Philip Dorrell 2008-06-25 fix logging errors 805 print "LOCAL DIR HASH for %r" % sourceDir
84b538f0 » Philip Dorrell 2008-06-06 implement verifyIncremental... 806 localDirHash = DirHash(sourceDir, None, sourceDir)
807 localDirHash.printIndented()
808 errorDiff = CompareDirectories.ErrorDiff()
809 localDirHash.compareToOtherDirHash (restoredDirHash, 0, CompareDirectories.printLog, errorDiff)
810 errorDiff.logAndCheck (localDirHash.description, restoredDirHash.description)
811
7ad6bad4 » Philip Dorrell 2008-06-09 restore details for specifi... 812 def restore(self, restoreDir, dateTimeString = None,
813 overwrite = False, updateVerificationRecords = False, allowIncomplete = False):
814 """Restore the specified (or otherwise the most recent) backup to a
815 destination directory (with optional overwrite)"""
84b538f0 » Philip Dorrell 2008-06-06 implement verifyIncremental... 816 if not os.path.exists(restoreDir):
817 os.makedirs(restoreDir)
818 if not os.path.isdir(restoreDir):
5a35cb97 » Philip Dorrell 2008-06-25 fix logging errors 819 raise "Restore target %r is not a directory" % restoreDir
84b538f0 » Philip Dorrell 2008-06-06 implement verifyIncremental... 820 if not overwrite and len(os.listdir(restoreDir)) > 0:
5a35cb97 » Philip Dorrell 2008-06-25 fix logging errors 821 raise "Restore target %r is not empty" % restoreDir
7ad6bad4 » Philip Dorrell 2008-06-09 restore details for specifi... 822 pathSummaryListToRestore, hashContentKeyMap, backupToRestore = self.getRestoreDetails(dateTimeString)
1342abd3 » Philip Dorrell 2008-06-17 record incomplete backup & ... 823 if not allowIncomplete and not backupToRestore.completed:
7ad6bad4 » Philip Dorrell 2008-06-09 restore details for specifi... 824 raise "Backup dated %s is not complete and allowIncomplete is set to false" % backupToRestore.datetime
84b538f0 » Philip Dorrell 2008-06-06 implement verifyIncremental... 825 self.restoreDirectory (restoreDir, pathSummaryListToRestore, hashContentKeyMap,
826 overwrite, updateVerificationRecords)
5a35cb97 » Philip Dorrell 2008-06-25 fix logging errors 827 print "Restored data to %r" % restoreDir
27d6c3a9 » Philip Dorrell 2008-06-01 init 828
bce0bbdd » Philip Dorrell 2008-06-03 implement listBackups and p... 829 def listBackups(backupMap):
44f6addd » Philip Dorrell 2008-06-04 pydocs, missing license and... 830 """List all backups in a backup map"""
bce0bbdd » Philip Dorrell 2008-06-03 implement listBackups and p... 831 IncrementalBackups(backupMap).listBackups()
832
833 def pruneBackups(backupMap, keep = 1, dryRun = True):
44f6addd » Philip Dorrell 2008-06-04 pydocs, missing license and... 834 """Prune backups in a backup map, keeping specified number of backup groups (minimum 1)"""
bce0bbdd » Philip Dorrell 2008-06-03 implement listBackups and p... 835 IncrementalBackups(backupMap).pruneBackups(keep = keep, dryRun = dryRun)
836
27d6c3a9 » Philip Dorrell 2008-06-01 init 837 def doBackup(sourceDirectory, backupMap, testRestoreDir = None, full = False, verify = False,
c566e98f » Philip Dorrell 2008-06-25 recordTrigger 838 doTheBackup = True, verifyIncrementally = False, recordTrigger = 10000000):
44f6addd » Philip Dorrell 2008-06-04 pydocs, missing license and... 839 """Do a backup from source directory to backup map, with options 'full' (or incremental)
840 and 'verify' (in which case a test restore is done to the test restore directory).
841 Also, if 'doTheBackup' is set to false, only do the test restore and verify.
842 """
27d6c3a9 » Philip Dorrell 2008-06-01 init 843 startTime = datetime.datetime.now()
844 print ""
845 print "Started %s" % startTime
846 print ""
847 if verify and testRestoreDir == None:
848 raise "Must supply testRestoreDir argument if verify option is chosen"
5a35cb97 » Philip Dorrell 2008-06-25 fix logging errors 849 print "Backing up %r ..." % sourceDirectory
c566e98f » Philip Dorrell 2008-06-25 recordTrigger 850 backups = IncrementalBackups(backupMap, recordTrigger)
27d6c3a9 » Philip Dorrell 2008-06-01 init 851 srcDirInfo = DirectoryInfo(sourceDirectory)
852 if doTheBackup:
853 backups.doBackup (srcDirInfo, full = full)
854 backupFinishedTime = datetime.datetime.now()
b2451d80 » Philip Dorrell 2008-06-29 make deleteMapValues multi-... 855 backupTimeTaken = backupFinishedTime - startTime
856 backupFinishedMessage = "Backup finished %s (started %s, took %s)" % (backupFinishedTime,
857 startTime, backupTimeTaken)
27d6c3a9 » Philip Dorrell 2008-06-01 init 858 print ""
859 print backupFinishedMessage
860 restoreStartTime = datetime.datetime.now()
861 if verify:
862 print ""
863 print "Verifying ..."
84b538f0 » Philip Dorrell 2008-06-06 implement verifyIncremental... 864 if verifyIncrementally:
865 print " incrementally ..."
866 backups.incrementalVerify (sourceDirectory)
867 else:
868 print " fully ..."
869 shutil.rmtree(testRestoreDir)
870 backups.restore(testRestoreDir, overwrite = False, updateVerificationRecords = True)
871 CompareDirectories.verifyIdentical(testRestoreDir, srcDirInfo.path)
27d6c3a9 » Philip Dorrell 2008-06-01 init 872 verifyFinishedTime = datetime.datetime.now()
873 print ""
874 if doTheBackup:
875 print backupFinishedMessage
b2451d80 » Philip Dorrell 2008-06-29 make deleteMapValues multi-... 876 restoreTimeTaken = verifyFinishedTime - restoreStartTime
877 print "Verify finished %s (started %s, took %s)" % (verifyFinishedTime, restoreStartTime, restoreTimeTaken)