This repository is private.
All pages are served over SSL and all pushing and pulling is done over SSH.
No one may fork, clone, or view it unless they are added as a member.
Every repository with this icon (
) is private.
Every repository with this icon (
This repository is public.
Anyone may fork, clone, or view it.
Every repository with this icon (
) is public.
Every repository with this icon (
Philip Dorrell (author)
Sun Jul 27 02:42:09 -0700 2008
| 27d6c3a9 » | Philip Dorrell | 2008-06-01 | 1 | # Copyright (c) 2008 Philip Dorrell, http://www.1729.com/ | |
| 2 | # | ||||
| 3 | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
| 4 | # of this software and associated documentation files (the "Software"), to deal | ||||
| 5 | # in the Software without restriction, including without limitation the rights | ||||
| 6 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | ||||
| 7 | # copies of the Software, and to permit persons to whom the Software is | ||||
| 8 | # furnished to do so, subject to the following conditions: | ||||
| 9 | # | ||||
| 10 | # The above copyright notice and this permission notice shall be included in | ||||
| 11 | # all copies or substantial portions of the Software. | ||||
| 12 | # | ||||
| 13 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||||
| 14 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||||
| 15 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||||
| 16 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||||
| 17 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||||
| 18 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | ||||
| 19 | # THE SOFTWARE. | ||||
| 20 | |||||
| 21 | import yaml | ||||
| 22 | import hashlib | ||||
| 23 | import os | ||||
| 24 | import time | ||||
| 25 | import datetime | ||||
| 26 | import shutil | ||||
| 27 | import CompareDirectories | ||||
| 28 | import re | ||||
| 29 | from sets import Set | ||||
| 30 | |||||
| 31 | def readFileBytes(filename): | ||||
| 44f6addd » | Philip Dorrell | 2008-06-04 | 32 | """Read named file and return contents as a byte string""" | |
| 27d6c3a9 » | Philip Dorrell | 2008-06-01 | 33 | f = file(filename, "rb") | |
| 34 | bytes = f.read() | ||||
| 35 | f.close() | ||||
| 36 | return bytes | ||||
| 37 | |||||
| 38 | def writeFileBytes(filename, bytes): | ||||
| 44f6addd » | Philip Dorrell | 2008-06-04 | 39 | """Write byte string as new contents of named file""" | |
| 27d6c3a9 » | Philip Dorrell | 2008-06-01 | 40 | f = file(filename, "wb") | |
| 41 | f.write(bytes) | ||||
| 42 | f.close() | ||||
| 43 | |||||
| 5411dee5 » | Philip Dorrell | 2008-07-17 | 44 | BackupsVersion = 2 | |
| b2451d80 » | Philip Dorrell | 2008-06-29 | 45 | ||
| 27d6c3a9 » | Philip Dorrell | 2008-06-01 | 46 | class PathSummary(object): | |
| 44f6addd » | Philip Dorrell | 2008-06-04 | 47 | """Information about a file or directory specified as a relative path within some base directory | |
| 48 | Note: all paths are '/' separated, whether or not we are in Microsoft Windows""" | ||||
| 27d6c3a9 » | Philip Dorrell | 2008-06-01 | 49 | def __init__(self, relativePath): | |
| 50 | self.relativePath = relativePath | ||||
| 51 | |||||
| 52 | def fullPath(self, basePath): | ||||
| 44f6addd » | Philip Dorrell | 2008-06-04 | 53 | """Return the full path given the path of the base directory""" | |
| 27d6c3a9 » | Philip Dorrell | 2008-06-01 | 54 | return basePath + self.relativePath | |
| 55 | |||||
| 56 | @staticmethod | ||||
| 57 | def fromYamlData(data): | ||||
| 44f6addd » | Philip Dorrell | 2008-06-04 | 58 | """Convert YAML data into FileSummary or DirSummary (inverse of toYamlData methods)""" | |
| 27d6c3a9 » | Philip Dorrell | 2008-06-01 | 59 | pathType = data["type"] | |
| 60 | if pathType == "file": | ||||
| 61 | return FileSummary.fromYamlData(data) | ||||
| 62 | elif pathType == "dir": | ||||
| 63 | return DirSummary.fromYamlData(data) | ||||
| 64 | else: | ||||
| 65 | raise "Unknown path type: %s" % pathType | ||||
| 66 | |||||
| 67 | class FileSummary(PathSummary): | ||||
| 44f6addd » | Philip Dorrell | 2008-06-04 | 68 | """Information about a file specified as a relative path within some (unspecified) base directory, | |
| 69 | including a SHA1 hash of the file's contents.""" | ||||
| 74e704c5 » | Philip Dorrell | 2008-07-17 | 70 | def __init__(self, relativePath, hash): | |
| 27d6c3a9 » | Philip Dorrell | 2008-06-01 | 71 | super(FileSummary, self).__init__(relativePath) | |
| 72 | self.isDir = False | ||||
| 73 | self.isFile = True | ||||
| 74 | self.hash = hash | ||||
| 75 | |||||
| 128f941f » | Philip Dorrell | 2008-06-25 | 76 | def __unicode__(self): | |
| 74e704c5 » | Philip Dorrell | 2008-07-17 | 77 | return u"FILE: %r : %s" % (self.relativePath, self.hash) | |
| 27d6c3a9 » | Philip Dorrell | 2008-06-01 | 78 | ||
| 2f24c84b » | Philip Dorrell | 2008-06-28 | 79 | def __repr__(self): | |
| 80 | return self.__unicode__() | ||||
| 81 | |||||
| 27d6c3a9 » | Philip Dorrell | 2008-06-01 | 82 | def toYamlData(self): | |
| 44f6addd » | Philip Dorrell | 2008-06-04 | 83 | """Convert to YAML""" | |
| 27d6c3a9 » | Philip Dorrell | 2008-06-01 | 84 | return {"type": "file", | |
| 85 | "path": self.relativePath, | ||||
| 74e704c5 » | Philip Dorrell | 2008-07-17 | 86 | "hash": self.hash } | |
| 27d6c3a9 » | Philip Dorrell | 2008-06-01 | 87 | ||
| 88 | @staticmethod | ||||
| 89 | def fromYamlData(data): | ||||
| 44f6addd » | Philip Dorrell | 2008-06-04 | 90 | """Create from YAML (inverse of toYamlData)""" | |
| 74e704c5 » | Philip Dorrell | 2008-07-17 | 91 | return FileSummary(data["path"], data["hash"]) | |
| 27d6c3a9 » | Philip Dorrell | 2008-06-01 | 92 | ||
| 93 | class DirSummary(PathSummary): | ||||
| 44f6addd » | Philip Dorrell | 2008-06-04 | 94 | """Information about a file specified as a relative path within some (unspecified) base directory""" | |
| 27d6c3a9 » | Philip Dorrell | 2008-06-01 | 95 | def __init__(self, relativePath): | |
| 96 | super(DirSummary, self).__init__(relativePath) | ||||
| 97 | self.isDir = True | ||||
| 98 | self.isFile = False | ||||
| 99 | |||||
| 128f941f » | Philip Dorrell | 2008-06-25 | 100 | def __unicode__(self): | |
| 5a35cb97 » | Philip Dorrell | 2008-06-25 | 101 | return u"DIR: %r" % (self.relativePath) | |
| 27d6c3a9 » | Philip Dorrell | 2008-06-01 | 102 | ||
| 103 | def toYamlData(self): | ||||
| 44f6addd » | Philip Dorrell | 2008-06-04 | 104 | """Convert to YAML""" | |
| 27d6c3a9 » | Philip Dorrell | 2008-06-01 | 105 | return {"type": "dir", | |
| 106 | "path": self.relativePath | ||||
| 107 | } | ||||
| 108 | |||||
| 2f24c84b » | Philip Dorrell | 2008-06-28 | 109 | def __repr__(self): | |
| 110 | return self.__unicode__() | ||||
| 111 | |||||
| 27d6c3a9 » | Philip Dorrell | 2008-06-01 | 112 | @staticmethod | |
| 113 | def fromYamlData(data): | ||||
| 44f6addd » | Philip Dorrell | 2008-06-04 | 114 | """Create from YAML (inverse of toYamlData)""" | |
| 27d6c3a9 » | Philip Dorrell | 2008-06-01 | 115 | return DirSummary(data["path"]) | |
| 84b538f0 » | Philip Dorrell | 2008-06-06 | 116 | ||
| 117 | def sha1Digest(content): | ||||
| 118 | return hashlib.sha1(content).hexdigest() | ||||
| 27d6c3a9 » | Philip Dorrell | 2008-06-01 | 119 | ||
| 120 | class DirectoryInfo: | ||||
| 44f6addd » | Philip Dorrell | 2008-06-04 | 121 | """Information about all the directories and files within a base directory | |
| 122 | All directories are listed before any subdirectories or files contained within them. | ||||
| 123 | """ | ||||
| 27d6c3a9 » | Philip Dorrell | 2008-06-01 | 124 | def __init__(self, path): | |
| 44f6addd » | Philip Dorrell | 2008-06-04 | 125 | """Construct from path base directory""" | |
| 128f941f » | Philip Dorrell | 2008-06-25 | 126 | self.path = unicode(path) | |
| 27d6c3a9 » | Philip Dorrell | 2008-06-01 | 127 | self.pathSummaries = [] | |
| 128f941f » | Philip Dorrell | 2008-06-25 | 128 | self.summarizeSubDir(u"") | |
| 27d6c3a9 » | Philip Dorrell | 2008-06-01 | 129 | ||
| 44f6addd » | Philip Dorrell | 2008-06-04 | 130 | def createDirSummary(self, relativePath): | |
| 131 | """Create a path summary for a sub-directory""" | ||||
| 27d6c3a9 » | Philip Dorrell | 2008-06-01 | 132 | return DirSummary (relativePath) | |
| 133 | |||||
| 44f6addd » | Philip Dorrell | 2008-06-04 | 134 | def createFileSummary(self, relativePath): | |
| 135 | """Create a path summary for a file in the base directory""" | ||||
| 27d6c3a9 » | Philip Dorrell | 2008-06-01 | 136 | fileName = self.path + relativePath | |
| 137 | content = readFileBytes(fileName) | ||||
| 84b538f0 » | Philip Dorrell | 2008-06-06 | 138 | fileHash = sha1Digest(content) | |
| 139 | return FileSummary (relativePath, fileHash) | ||||
| 27d6c3a9 » | Philip Dorrell | 2008-06-01 | 140 | ||
| 141 | def addSummary(self, pathSummary): | ||||
| 44f6addd » | Philip Dorrell | 2008-06-04 | 142 | """Add a path summary""" | |
| 128f941f » | Philip Dorrell | 2008-06-25 | 143 | print u"%r" % pathSummary | |
| 27d6c3a9 » | Philip Dorrell | 2008-06-01 | 144 | self.pathSummaries.append (pathSummary) | |
| 145 | |||||
| 146 | def getPathSummariesYamlData(self): | ||||
| 44f6addd » | Philip Dorrell | 2008-06-04 | 147 | """Return array of path summaries as YAML data""" | |
| 27d6c3a9 » | Philip Dorrell | 2008-06-01 | 148 | return [summary.toYamlData() for summary in self.pathSummaries] | |
| 149 | |||||
| 150 | def summarizeSubDir(self, relativePath): | ||||
| 44f6addd » | Philip Dorrell | 2008-06-04 | 151 | """Recursively summarize a sub-directory specified by it's relative path, | |
| 152 | adding the path summaries for all contained files and sub-directories to the list of path summaries.""" | ||||
| 27d6c3a9 » | Philip Dorrell | 2008-06-01 | 153 | for childName in os.listdir(self.path + relativePath): | |
| 154 | childRelativePath = relativePath + "/" + childName; | ||||
| 155 | childPath = self.path + childRelativePath | ||||
| 156 | if os.path.isfile(childPath): | ||||
| 44f6addd » | Philip Dorrell | 2008-06-04 | 157 | self.addSummary(self.createFileSummary(childRelativePath)) | |
| 27d6c3a9 » | Philip Dorrell | 2008-06-01 | 158 | elif os.path.isdir(childPath): | |
| 44f6addd » | Philip Dorrell | 2008-06-04 | 159 | self.addSummary(self.createDirSummary(childRelativePath)) | |
| 27d6c3a9 » | Philip Dorrell | 2008-06-01 | 160 | self.summarizeSubDir (childRelativePath) | |
| 161 | else: | ||||
| 5a35cb97 » | Philip Dorrell | 2008-06-25 | 162 | print "UNKNOWN OBJECT %r in %r" % (childName, self.path + relativePath) | |
| 27d6c3a9 » | Philip Dorrell | 2008-06-01 | 163 | ||
| 164 | class HashVerificationRecords(object): | ||||
| 44f6addd » | Philip Dorrell | 2008-06-04 | 165 | """Records of verified hashes of backed up files (i.e. verified by actually reading | |
| 166 | the file content out of the backup map and recalculating the hash). | ||||
| 167 | Note that this class is not yet used, and nothing is yet writing the verification records | ||||
| 168 | into the backup map.""" | ||||
| 169 | |||||
| 27d6c3a9 » | Philip Dorrell | 2008-06-01 | 170 | def __init__(self, backupMap): | |
| 171 | self.backupMap = backupMap | ||||
| 172 | self.datetimeFileHashesMap = {} | ||||
| 173 | self.datetimeUpdated = Set() | ||||
| 174 | |||||
| 84b538f0 » | Philip Dorrell | 2008-06-06 | 175 | def getFileHashesMap(self, datetime): | |
| 27d6c3a9 » | Philip Dorrell | 2008-06-01 | 176 | if datetime in self.datetimeFileHashesMap: | |
| 177 | fileHashesMap = self.datetimeFileHashesMap[datetime] | ||||
| 178 | else: | ||||
| 84b538f0 » | Philip Dorrell | 2008-06-06 | 179 | fileHashesRecordFilename = datetime + "/verifiedFileHashes.yaml" | |
| 27d6c3a9 » | Philip Dorrell | 2008-06-01 | 180 | if fileHashesRecordFilename in self.backupMap: | |
| 181 | fileHashesMap = yaml.safe_load(self.backupMap[fileHashesRecordFilename]) | ||||
| 182 | else: | ||||
| 183 | fileHashesMap = {} | ||||
| 184 | self.datetimeFileHashesMap[datetime] = fileHashesMap | ||||
| 84b538f0 » | Philip Dorrell | 2008-06-06 | 185 | return fileHashesMap | |
| 186 | |||||
| fabed135 » | Philip Dorrell | 2008-06-28 | 187 | def markVerified(self, datetime, filePath, contentHash): | |
| 84b538f0 » | Philip Dorrell | 2008-06-06 | 188 | fileHashesMap = self.getFileHashesMap(datetime) | |
| fabed135 » | Philip Dorrell | 2008-06-28 | 189 | fileHashesMap[filePath] = contentHash | |
| 84b538f0 » | Philip Dorrell | 2008-06-06 | 190 | self.datetimeUpdated.add (datetime) | |
| 191 | |||||
| 192 | def getWrittenFileHash(self, datetime, filePath): | ||||
| 193 | """Get the hash of a backed up file, either from an existing hash verification record, | ||||
| 194 | or, read the file contents from the backup map and calculate the hash.""" | ||||
| 195 | fileHashesMap = self.getFileHashesMap(datetime) | ||||
| 27d6c3a9 » | Philip Dorrell | 2008-06-01 | 196 | if filePath in fileHashesMap: | |
| 197 | return fileHashesMap[filePath] | ||||
| 198 | else: | ||||
| 199 | content = self.backupMap[datetime + "/files" + filePath] | ||||
| fabed135 » | Philip Dorrell | 2008-06-28 | 200 | contentHash = sha1Digest(content) | |
| 201 | self.markVerified(datetime, filePath, contentHash) | ||||
| 202 | return contentHash | ||||
| 27d6c3a9 » | Philip Dorrell | 2008-06-01 | 203 | ||
| 84b538f0 » | Philip Dorrell | 2008-06-06 | 204 | def updateRecords(self): | |
| 44f6addd » | Philip Dorrell | 2008-06-04 | 205 | """Update any newly verified hashes back into the backup map.""" | |
| 84b538f0 » | Philip Dorrell | 2008-06-06 | 206 | print "Verified hashes were updated for %r" % self.datetimeUpdated | |
| 27d6c3a9 » | Philip Dorrell | 2008-06-01 | 207 | for datetime in self.datetimeUpdated: | |
| 84b538f0 » | Philip Dorrell | 2008-06-06 | 208 | fileHashesRecordFilename = datetime + "/verifiedFileHashes.yaml" | |
| 27d6c3a9 » | Philip Dorrell | 2008-06-01 | 209 | print "Updating verification records for %s = %s" % (datetime, | |
| 210 | self.datetimeFileHashesMap[datetime]) | ||||
| 128f941f » | Philip Dorrell | 2008-06-25 | 211 | self.backupMap[fileHashesRecordFilename] = yaml.safe_dump (self.datetimeFileHashesMap[datetime]) | |
| 27d6c3a9 » | Philip Dorrell | 2008-06-01 | 212 | ||
| 213 | class BackupRecord: | ||||
| 44f6addd » | Philip Dorrell | 2008-06-04 | 214 | """A record of a backup made: it's date/time, and whether it was full or incremental.""" | |
| 7ad6bad4 » | Philip Dorrell | 2008-06-09 | 215 | def __init__(self, type, datetime, completed): | |
| 44f6addd » | Philip Dorrell | 2008-06-04 | 216 | """construct from 'full' or 'incremental' and the date time""" | |
| 27d6c3a9 » | Philip Dorrell | 2008-06-01 | 217 | self.type = type | |
| 218 | self.datetime = datetime | ||||
| 7ad6bad4 » | Philip Dorrell | 2008-06-09 | 219 | self.completed = completed | |
| 220 | |||||
| 27d6c3a9 » | Philip Dorrell | 2008-06-01 | 221 | @staticmethod | |
| 222 | def fromYamlData(data): | ||||
| 44f6addd » | Philip Dorrell | 2008-06-04 | 223 | """Construct backup record from YAML data (inverse of toYamlData)""" | |
| 7ad6bad4 » | Philip Dorrell | 2008-06-09 | 224 | # completed defaults to True because previous version of keevalback only recorded when complete | |
| 225 | return BackupRecord(data["type"], data["datetime"], data.get("completed", True)) | ||||
| 27d6c3a9 » | Philip Dorrell | 2008-06-01 | 226 | ||
| 227 | def toYamlData(self): | ||||
| 44f6addd » | Philip Dorrell | 2008-06-04 | 228 | """Convert to data to be stored in YAML""" | |
| 7ad6bad4 » | Philip Dorrell | 2008-06-09 | 229 | return {"type": self.type, "datetime": self.datetime, "completed": self.completed} | |
| bce0bbdd » | Philip Dorrell | 2008-06-03 | 230 | ||
| 231 | def isFull(self): | ||||
| 232 | return self.type == "full" | ||||
| 27d6c3a9 » | Philip Dorrell | 2008-06-01 | 233 | ||
| 234 | def __str__(self): | ||||
| 1342abd3 » | Philip Dorrell | 2008-06-17 | 235 | return "[Backup: %s %s %s]" % (self.type, self.datetime, self.completed and "complete" or "INCOMPLETE") | |
| 27d6c3a9 » | Philip Dorrell | 2008-06-01 | 236 | ||
| 237 | def __repr__(self): | ||||
| 238 | return self.__str__() | ||||
| 239 | |||||
| 5411dee5 » | Philip Dorrell | 2008-07-17 | 240 | class InvalidBackupsVersion(Exception): | |
| 241 | def __init__(self, backupRecord, version): | ||||
| 242 | Exception.__init__(self, "Invalid backup for backup record %s version %d (this version = %d)" % | ||||
| 243 | (backupRecord, version, BackupsVersion)) | ||||
| 244 | self.version = version | ||||
| 245 | |||||
| 246 | def getBackupsVersion(backupMap, backupRecord): | ||||
| 247 | versionKey = backupRecord.datetime + "/version" | ||||
| 248 | if versionKey in backupMap: | ||||
| 249 | return int(backupMap[versionKey]) | ||||
| 250 | else: | ||||
| 251 | return 1 | ||||
| 252 | |||||
| 253 | def checkVersion(backupMap, backupRecord): | ||||
| 254 | version = getBackupsVersion (backupMap, backupRecord) | ||||
| 255 | if version != BackupsVersion: | ||||
| 256 | raise InvalidBackupsVersion (backupRecord, version) | ||||
| 257 | |||||
| 27d6c3a9 » | Philip Dorrell | 2008-06-01 | 258 | class WrittenRecords: | |
| 44f6addd » | Philip Dorrell | 2008-06-04 | 259 | """Records of where file contents with a given SHA1 hash value was written to in backup map | |
| 260 | (within the context of a particular set of backups, i.e. a full and following incrementals)""" | ||||
| 27d6c3a9 » | Philip Dorrell | 2008-06-01 | 261 | def __init__(self): | |
| 262 | self.written = {} | ||||
| 263 | |||||
| 264 | def recordHashWritten(self, hash, key): | ||||
| 44f6addd » | Philip Dorrell | 2008-06-04 | 265 | """Record that a contents with a particular hash were written to a particular key""" | |
| 128f941f » | Philip Dorrell | 2008-06-25 | 266 | print " record hash %s written to %r" % (hash, key) | |
| 27d6c3a9 » | Philip Dorrell | 2008-06-01 | 267 | self.written[hash] = key | |
| 268 | |||||
| 269 | def isWritten(self, hash): | ||||
| 44f6addd » | Philip Dorrell | 2008-06-04 | 270 | """Has a file contents with this hash value been written to the backup map?""" | |
| 27d6c3a9 » | Philip Dorrell | 2008-06-01 | 271 | return hash in self.written | |
| 272 | |||||
| 273 | def locationWritten(self, hash): | ||||
| 44f6addd » | Philip Dorrell | 2008-06-04 | 274 | """Where a file contents with this hash value was written to""" | |
| 27d6c3a9 » | Philip Dorrell | 2008-06-01 | 275 | return self.written[hash] | |
| 276 | |||||
| 277 | def recordBackup(self, backupMap, backupRecord): | ||||
| 44f6addd » | Philip Dorrell | 2008-06-04 | 278 | """For every file contents in a backup record recorded as written, record it's | |
| d973ff37 » | Philip Dorrell | 2008-07-15 | 279 | hash value and backup map key in the written records.""" # todo: slow | |
| 5411dee5 » | Philip Dorrell | 2008-07-17 | 280 | checkVersion(backupMap, backupRecord) | |
| d6dc2e0e » | Philip Dorrell | 2008-07-15 | 281 | writtenPathListKey = backupRecord.datetime + "/writtenPathList" | |
| 639a9d6c » | Philip Dorrell | 2008-07-17 | 282 | writtenFileSummariesYamlData = yaml.safe_load (backupMap[writtenPathListKey]) | |
| 283 | for fileData in writtenFileSummariesYamlData: | ||||
| 9505b90f » | Philip Dorrell | 2008-06-09 | 284 | #print "Recording backup data %s/%r" % (backupRecord.datetime, pathData) | |
| 639a9d6c » | Philip Dorrell | 2008-07-17 | 285 | self.recordHashWritten (fileData["hash"], backupRecord.datetime + fileData["path"]) | |
| 27d6c3a9 » | Philip Dorrell | 2008-06-01 | 286 | ||
| 287 | def recordPreviousBackups(self, backupMap, backupRecords): | ||||
| 44f6addd » | Philip Dorrell | 2008-06-04 | 288 | """Record the hashes of all files written from the last full backup onwards (or from the first | |
| 289 | backup if for some reason there is no full backup.""" | ||||
| 27d6c3a9 » | Philip Dorrell | 2008-06-01 | 290 | fullFound = False | |
| 291 | i = len(backupRecords)-1 | ||||
| 292 | while not fullFound and i >= 0: | ||||
| 293 | backupRecord = backupRecords[i] | ||||
| 294 | print "Recording backup %r ..." % backupRecord | ||||
| 295 | self.recordBackup(backupMap, backupRecord) | ||||
| 296 | if backupRecord.type == "full": | ||||
| 297 | fullFound = True | ||||
| 298 | i -= 1 | ||||
| 299 | |||||
| 300 | class BaseFileHash(object): | ||||
| 44f6addd » | Philip Dorrell | 2008-06-04 | 301 | """Description of a file: it's (basic) name and hash""" | |
| 84b538f0 » | Philip Dorrell | 2008-06-06 | 302 | def __init__(self, name, hash, description): | |
| 27d6c3a9 » | Philip Dorrell | 2008-06-01 | 303 | self.name = name | |
| 304 | self.hash = hash | ||||
| 84b538f0 » | Philip Dorrell | 2008-06-06 | 305 | self.description = description | |
| 306 | |||||
| 307 | def isDir(self): | ||||
| 308 | return False | ||||
| 27d6c3a9 » | Philip Dorrell | 2008-06-01 | 309 | ||
| 310 | def printIndented(self, indent): | ||||
| 5a35cb97 » | Philip Dorrell | 2008-06-25 | 311 | print "%sFile %r: %s" % (indent, self.name, self.hash) | |
| 84b538f0 » | Philip Dorrell | 2008-06-06 | 312 | ||
| 313 | def compareToOtherFileHash (self, otherFileHash, indent, log, logDiff): | ||||
| 314 | if self.hash != otherFileHash.hash: | ||||
| bb1f92b7 » | Philip Dorrell | 2008-07-27 | 315 | logDiff ("File %r has hash %s in %r but hash %s in %r" % | |
| 81659615 » | Philip Dorrell | 2008-07-27 | 316 | (self.name, self.hash, self.description, | |
| 317 | otherFileHash.hash, otherFileHash.description)) | ||||
| 84b538f0 » | Philip Dorrell | 2008-06-06 | 318 | ||
| 27d6c3a9 » | Philip Dorrell | 2008-06-01 | 319 | pathRegex = re.compile("[/]([^/]*)([/].*)?") | |
| 320 | |||||
| 321 | def analysePath(path): | ||||
| 44f6addd » | Philip Dorrell | 2008-06-04 | 322 | """Analyse a path starting with '/' and with '/' separators into 1st part and remainder | |
| 323 | e.g. '/x/y' into 'x' and '/y' and '/x' into 'x' and None.""" | ||||
| 27d6c3a9 » | Philip Dorrell | 2008-06-01 | 324 | pathMatch = pathRegex.match(path) | |
| 325 | rootPath = pathMatch.group(1) | ||||
| 326 | remainderPath = pathMatch.group(2) | ||||
| 327 | return (rootPath, remainderPath) | ||||
| 328 | |||||
| 329 | class BaseDirHash(object): | ||||
| 44f6addd » | Philip Dorrell | 2008-06-04 | 330 | """Description of a directory as a map of immediate sub-directories | |
| 331 | and immediately contained files""" | ||||
| 84b538f0 » | Philip Dorrell | 2008-06-06 | 332 | def __init__(self, name, description): | |
| 27d6c3a9 » | Philip Dorrell | 2008-06-01 | 333 | self.name = name | |
| 334 | self.children = [] | ||||
| 335 | self.childrenMap = {} | ||||
| 84b538f0 » | Philip Dorrell | 2008-06-06 | 336 | self.description = description | |
| 27d6c3a9 » | Philip Dorrell | 2008-06-01 | 337 | ||
| 84b538f0 » | Philip Dorrell | 2008-06-06 | 338 | def isDir(self): | |
| 339 | return True | ||||
| 340 | |||||
| 27d6c3a9 » | Philip Dorrell | 2008-06-01 | 341 | def addChild(self, childHash): | |
| 44f6addd » | Philip Dorrell | 2008-06-04 | 342 | """Add a child, i.e. a directory or file""" | |
| 27d6c3a9 » | Philip Dorrell | 2008-06-01 | 343 | self.children.append (childHash) | |
| 344 | self.childrenMap[childHash.name] = childHash | ||||
| 345 | |||||
| 84b538f0 » | Philip Dorrell | 2008-06-06 | 346 | def hasChildNamed(self, childName): | |
| 347 | return childName in self.childrenMap | ||||
| 348 | |||||
| 27d6c3a9 » | Philip Dorrell | 2008-06-01 | 349 | def printIndented(self, indent = ""): | |
| 5a35cb97 » | Philip Dorrell | 2008-06-25 | 350 | print "%sDir %r" % (indent, self.name) | |
| 27d6c3a9 » | Philip Dorrell | 2008-06-01 | 351 | childIndent = " " + indent | |
| 352 | for child in self.children: | ||||
| 353 | child.printIndented(indent = childIndent) | ||||
| 354 | |||||
| 355 | def addFileSummary(self, path, hash): | ||||
| 44f6addd » | Philip Dorrell | 2008-06-04 | 356 | """Add a file given it's full path name relative to this directory | |
| 357 | (necessarily constructing the intermediate sub-directories if they | ||||
| 358 | are not already there)""" | ||||
| 27d6c3a9 » | Philip Dorrell | 2008-06-01 | 359 | rootPath, remainderPath = analysePath(path) | |
| 360 | if remainderPath is None: | ||||
| 84b538f0 » | Philip Dorrell | 2008-06-06 | 361 | self.addChild (BaseFileHash(rootPath, hash, self.description)) | |
| 27d6c3a9 » | Philip Dorrell | 2008-06-01 | 362 | else: | |
| 363 | childDirHash = self.getOrCreateChildDirHash(rootPath) | ||||
| 364 | childDirHash.addFileSummary (remainderPath, hash) | ||||
| 365 | |||||
| 366 | def getOrCreateChildDirHash(self, name): | ||||
| 44f6addd » | Philip Dorrell | 2008-06-04 | 367 | """Return DirHash for an immediate sub-directory, creating it if necessary""" | |
| 27d6c3a9 » | Philip Dorrell | 2008-06-01 | 368 | if name in self.childrenMap: | |
| 369 | return self.childrenMap[name] | ||||
| 370 | else: | ||||
| 84b538f0 » | Philip Dorrell | 2008-06-06 | 371 | childDirHash = BaseDirHash(name, self.description) | |
| 27d6c3a9 » | Philip Dorrell | 2008-06-01 | 372 | self.addChild(childDirHash) | |
| 373 | return childDirHash | ||||
| 374 | |||||
| 375 | def addDirSummary(self, path): | ||||
| 44f6addd » | Philip Dorrell | 2008-06-04 | 376 | """Add a sub-directory given it's full path name relative to this directory | |
| 377 | (necessarily constructing the intermediate sub-directories if they | ||||
| 378 | are not already there)""" | ||||
| 27d6c3a9 » | Philip Dorrell | 2008-06-01 | 379 | rootPath, remainderPath = analysePath(path) | |
| 380 | if remainderPath is None: | ||||
| 84b538f0 » | Philip Dorrell | 2008-06-06 | 381 | self.addChild (BaseDirHash(rootPath, self.description)) | |
| 27d6c3a9 » | Philip Dorrell | 2008-06-01 | 382 | else: | |
| 383 | childDirHash = self.getOrCreateChildDirHash(rootPath) | ||||
| 384 | childDirHash.addDirSummary (remainderPath) | ||||
| 84b538f0 » | Philip Dorrell | 2008-06-06 | 385 | ||
| 386 | def compareToOtherDirHash(self, otherDirHash, indent, log, logDiff): | ||||
| 5a35cb97 » | Philip Dorrell | 2008-06-25 | 387 | log (indent, "comparing directory %r" % self.name) | |
| 84b538f0 » | Philip Dorrell | 2008-06-06 | 388 | for child1 in self.children: | |
| 389 | name1 = child1.name | ||||
| 390 | child2 = otherDirHash.childrenMap.get(name1, None) | ||||
| 391 | if child1.isDir(): | ||||
| 392 | if child2 != None: | ||||
| 393 | if not child2.isDir(): | ||||
| 5a35cb97 » | Philip Dorrell | 2008-06-25 | 394 | logDiff ("%r is a directory in %r but a file in %r" % | |
| 395 | (name1, self.description, otherDirHash.description)) | ||||
| 84b538f0 » | Philip Dorrell | 2008-06-06 | 396 | else: | |
| 397 | child1.compareToOtherDirHash (child2, indent+1, log, logDiff) | ||||
| 398 | else: | ||||
| 5a35cb97 » | Philip Dorrell | 2008-06-25 | 399 | logDiff("%r is a directory in %r but does not exist in %r" % | |
| 400 | (name1, self.description, otherDirHash.description)) | ||||
| 84b538f0 » | Philip Dorrell | 2008-06-06 | 401 | else: | |
| 402 | if child2 != None: | ||||
| 403 | if child2.isDir(): | ||||
| 5a35cb97 » | Philip Dorrell | 2008-06-25 | 404 | logDiff("%r is a file in %r but a directory in %r" % | |
| 405 | (name1, self.description, otherDirHash.description)) | ||||
| 84b538f0 » | Philip Dorrell | 2008-06-06 | 406 | else: | |
| 407 | child1.compareToOtherFileHash (child2, indent+1, log, logDiff) | ||||
| 408 | else: | ||||
| 5a35cb97 » | Philip Dorrell | 2008-06-25 | 409 | logDiff("%r is a file in %r but does not exist in %r" % | |
| 410 | (name1, self.description, otherDirHash.description)) | ||||
| 84b538f0 » | Philip Dorrell | 2008-06-06 | 411 | for child2 in otherDirHash.children: | |
| 412 | if not self.hasChildNamed (child2.name): | ||||
| 413 | if child2.isDir(): | ||||
| 5a35cb97 » | Philip Dorrell | 2008-06-25 | 414 | logDiff("%r does not exist in %r but is a directory in %r" % | |
| 415 | (child2.name, self.description, otherDirHash.description)) | ||||
| 84b538f0 » | Philip Dorrell | 2008-06-06 | 416 | else: | |
| 5a35cb97 » | Philip Dorrell | 2008-06-25 | 417 | logDiff("%r does not exist in %r but is a file in %r" % | |
| 418 | (child2.name, self.description, otherDirHash.description)) | ||||
| 27d6c3a9 » | Philip Dorrell | 2008-06-01 | 419 | ||
| 420 | class FileHash(BaseFileHash): | ||||
| 44f6addd » | Philip Dorrell | 2008-06-04 | 421 | """Information about a file with a relative path name based on actual | |
| 422 | contents of actual file in actual file-system base directory""" | ||||
| 84b538f0 » | Philip Dorrell | 2008-06-06 | 423 | def __init__(self, dir, name, description): | |
| 27d6c3a9 » | Philip Dorrell | 2008-06-01 | 424 | filename = dir + "/" + name | |
| 425 | content = readFileBytes (filename) | ||||
| 84b538f0 » | Philip Dorrell | 2008-06-06 | 426 | super(FileHash, self).__init__(name, sha1Digest(content), description) | |
| 27d6c3a9 » | Philip Dorrell | 2008-06-01 | 427 | ||
| 428 | class DirHash(BaseDirHash): | ||||
| 44f6addd » | Philip Dorrell | 2008-06-04 | 429 | """Information about files within a directory with a relative path name | |
| 430 | based on actual contents of actual directory in actual file-system base directory""" | ||||
| 84b538f0 » | Philip Dorrell | 2008-06-06 | 431 | def __init__(self, dir, name, description): | |
| 432 | super(DirHash, self).__init__(name, description) | ||||
| 5d9cfcc7 » | Philip Dorrell | 2008-06-26 | 433 | fullPath = unicode (name and (dir + "/" + name) or dir) | |
| 27d6c3a9 » | Philip Dorrell | 2008-06-01 | 434 | for childName in os.listdir(fullPath): | |
| 435 | childPath = fullPath + "/" + childName | ||||
| 436 | if os.path.isfile(childPath): | ||||
| 84b538f0 » | Philip Dorrell | 2008-06-06 | 437 | self.addChild (FileHash(fullPath, childName, self.description)) | |
| 27d6c3a9 » | Philip Dorrell | 2008-06-01 | 438 | else: | |
| 84b538f0 » | Philip Dorrell | 2008-06-06 | 439 | self.addChild (DirHash(fullPath, childName, self.description)) | |
| 27d6c3a9 » | Philip Dorrell | 2008-06-01 | 440 | ||
| 441 | class ContentKey(object): | ||||
| 442 | def __init__(self, datetime, filePath): | ||||
| 44f6addd » | Philip Dorrell | 2008-06-04 | 443 | """Parameters for key used to look up file contents from a particular backup within a backup map. | |
| 444 | Note that filePath is expected to start with a '/'""" | ||||
| 27d6c3a9 » | Philip Dorrell | 2008-06-01 | 445 | self.datetime = datetime | |
| 446 | self.filePath = filePath | ||||
| 447 | |||||
| 448 | def fileKey(self): | ||||
| 44f6addd » | Philip Dorrell | 2008-06-04 | 449 | """The actual key. | |
| 450 | Note: "/files" infix is used to allow for other meta-data to be associated with the datetime.""" | ||||
| 27d6c3a9 » | Philip Dorrell | 2008-06-01 | 451 | return self.datetime + "/files" + self.filePath | |
| 84b538f0 » | Philip Dorrell | 2008-06-06 | 452 | ||
| 453 | def __str__(self): | ||||
| 128f941f » | Philip Dorrell | 2008-06-25 | 454 | return "[%s:%r]" % (self.datetime, self.filePath) | |
| 3ae44c66 » | Philip Dorrell | 2008-06-09 | 455 | ||
| 456 | def __repr__(self): | ||||
| 457 | return self.__str__() | ||||
| 1fc8bbfb » | Philip Dorrell | 2008-06-19 | 458 | ||
| 459 | class BackupRecordUpdater: | ||||
| 460 | """Object responsible for recording current state of backup in progress""" | ||||
| 10f09148 » | Philip Dorrell | 2008-06-19 | 461 | def __init__(self, backups, backupRecords, currentBackupRecord, backupKeyBase, | |
| c566e98f » | Philip Dorrell | 2008-06-25 | 462 | directoryInfo, recordTrigger = 1000000): | |
| 1fc8bbfb » | Philip Dorrell | 2008-06-19 | 463 | self.backups = backups | |
| 464 | self.backupRecords = backupRecords | ||||
| 465 | self.currentBackupRecord = currentBackupRecord | ||||
| 466 | self.backupKeyBase = backupKeyBase | ||||
| 467 | self.directoryInfo = directoryInfo | ||||
| 10f09148 » | Philip Dorrell | 2008-06-19 | 468 | self.bytesWritten = 0 | |
| 469 | self.unrecordedBytes = 0 | ||||
| 470 | self.recordTrigger = recordTrigger | ||||
| 531b141f » | Philip Dorrell | 2008-07-17 | 471 | self.writtenFileSummaries = [] | |
| 1fc8bbfb » | Philip Dorrell | 2008-06-19 | 472 | ||
| 5411dee5 » | Philip Dorrell | 2008-07-17 | 473 | def recordVersion(self): | |
| 474 | self.backups.backupMap[self.backupKeyBase + "/version"] = str(BackupsVersion) | ||||
| 475 | |||||
| ab4cd02d » | Philip Dorrell | 2008-07-16 | 476 | def recordPathSummaries(self): | |
| 1fc8bbfb » | Philip Dorrell | 2008-06-19 | 477 | self.backups.recordPathSummaries (self.backupKeyBase, self.directoryInfo) | |
| d973ff37 » | Philip Dorrell | 2008-07-15 | 478 | ||
| 74e704c5 » | Philip Dorrell | 2008-07-17 | 479 | def recordWrittenFileSummaries(self): | |
| 480 | self.backups.recordWrittenFileSummaries (self.backupKeyBase, self.writtenFileSummaries) | ||||
| 59aa8ae0 » | Philip Dorrell | 2008-06-28 | 481 | ||
| ab4cd02d » | Philip Dorrell | 2008-07-16 | 482 | def saveBackupRecords(self): | |
| 1fc8bbfb » | Philip Dorrell | 2008-06-19 | 483 | self.backups.saveBackupRecords(self.backupRecords) | |
| 484 | |||||
| ab4cd02d » | Philip Dorrell | 2008-07-16 | 485 | def checkpoint(self): | |
| 74e704c5 » | Philip Dorrell | 2008-07-17 | 486 | self.recordWrittenFileSummaries() | |
| ab4cd02d » | Philip Dorrell | 2008-07-16 | 487 | ||
| 488 | def initialRecord(self): | ||||
| 5411dee5 » | Philip Dorrell | 2008-07-17 | 489 | self.recordVersion() | |
| ab4cd02d » | Philip Dorrell | 2008-07-16 | 490 | self.recordPathSummaries() | |
| 74e704c5 » | Philip Dorrell | 2008-07-17 | 491 | self.recordWrittenFileSummaries() | |
| ab4cd02d » | Philip Dorrell | 2008-07-16 | 492 | self.saveBackupRecords() | |
| 493 | |||||
| 1fc8bbfb » | Philip Dorrell | 2008-06-19 | 494 | def recordCompleted(self): | |
| 495 | self.currentBackupRecord.completed = True | ||||
| 74e704c5 » | Philip Dorrell | 2008-07-17 | 496 | self.recordWrittenFileSummaries() | |
| ab4cd02d » | Philip Dorrell | 2008-07-16 | 497 | self.saveBackupRecords() | |
| 1fc8bbfb » | Philip Dorrell | 2008-06-19 | 498 | ||
| b147c7f4 » | Philip Dorrell | 2008-07-03 | 499 | from ThreadedTaskRunner import ThreadedTaskRunner, TaskRunner | |
| 3fd43cb5 » | Philip Dorrell | 2008-06-29 | 500 | ||
| d973ff37 » | Philip Dorrell | 2008-07-15 | 501 | #taskRunner = TaskRunner(checkpointFreq = 30) | |
| 3fd43cb5 » | Philip Dorrell | 2008-06-29 | 502 | ||
| 3091f7fe » | Philip Dorrell | 2008-07-15 | 503 | taskRunner = ThreadedTaskRunner (checkpointFreq = 500, numThreads = 30) | |
| b2451d80 » | Philip Dorrell | 2008-06-29 | 504 | ||
| 505 | class DeleteBackupMapValueTask: | ||||
| 506 | def __init__(self, backupMap, key): | ||||
| 507 | self.backupMap = backupMap | ||||
| 508 | self.key = key | ||||
| 6c761b4b » | Philip Dorrell | 2008-06-28 | 509 | ||
| 028e041c » | Philip Dorrell | 2008-07-03 | 510 | def getThreadLocals(self): | |
| 511 | return {"backupMap": self.backupMap.clone()} | ||||
| 512 | |||||
| b2451d80 » | Philip Dorrell | 2008-06-29 | 513 | def doUnsynchronized(self): | |
| 514 | print " delete %r ..." % self.key | ||||
| 515 | del self.backupMap[self.key] | ||||
| 516 | |||||
| 517 | def doSynchronized(self): | ||||
| 518 | pass | ||||
| 519 | |||||
| 520 | def deleteMapValues(backupMap, dryRun): | ||||
| 521 | """Delete all keys from a map, or if dryRun is True, do a dry run""" | ||||
| 522 | print "%sDeleting keys from map %s" % (dryRun and "DRYRUN: " or "", backupMap) | ||||
| 523 | deleteTasks = [] | ||||
| 524 | for key in backupMap: | ||||
| 525 | if dryRun: | ||||
| 526 | print " delete %r ..." % key | ||||
| 527 | else: | ||||
| 528 | deleteTasks.append (DeleteBackupMapValueTask(backupMap, key)) | ||||
| 529 | if not dryRun: | ||||
| 530 | taskRunner.runTasks (deleteTasks) | ||||
| 531 | print "finished." | ||||
| 532 | |||||
| 27d6c3a9 » | Philip Dorrell | 2008-06-01 | 533 | class IncrementalBackups: | |
| 44f6addd » | Philip Dorrell | 2008-06-04 | 534 | """A set of dated full or incremental backups within a given backup map. | |
| 535 | This object does _not_ (currently) record _where_ the file contents came from. | ||||
| 536 | """ | ||||
| c566e98f » | Philip Dorrell | 2008-06-25 | 537 | def __init__(self, backupMap, recordTrigger = 10000000): | |
| 27d6c3a9 » | Philip Dorrell | 2008-06-01 | 538 | self.backupMap = backupMap | |
| c566e98f » | Philip Dorrell | 2008-06-25 | 539 | self.recordTrigger = recordTrigger | |
| 27d6c3a9 » | Philip Dorrell | 2008-06-01 | 540 | ||
| 541 | def getDateTimeString(self): | ||||
| 44f6addd » | Philip Dorrell | 2008-06-04 | 542 | """Get a date time string to use for a new dated backup""" | |
| 27d6c3a9 » | Philip Dorrell | 2008-06-01 | 543 | return time.strftime("%Y-%b-%d.%H-%M-%S") | |
| 544 | |||||
| 545 | def getBackupRecords(self): | ||||
| 44f6addd » | Philip Dorrell | 2008-06-04 | 546 | """Retrieve the BackupRecord objects describing any existing backups""" | |
| 27d6c3a9 » | Philip Dorrell | 2008-06-01 | 547 | if "backupRecords" in self.backupMap: | |
| 548 | backupsListYamlData = yaml.safe_load(self.backupMap["backupRecords"]) | ||||
| 549 | else: | ||||
| 550 | backupsListYamlData = [] | ||||
| 551 | return [BackupRecord.fromYamlData(record) for record in backupsListYamlData] | ||||
| bce0bbdd » | Philip Dorrell | 2008-06-03 | 552 | ||
| 1342abd3 » | Philip Dorrell | 2008-06-17 | 553 | def saveBackupRecords(self, backupRecords): | |
| 84b538f0 » | Philip Dorrell | 2008-06-06 | 554 | backupRecordsYamlData = [record.toYamlData() for record in backupRecords] | |
| 128f941f » | Philip Dorrell | 2008-06-25 | 555 | self.backupMap["backupRecords"] = yaml.safe_dump(backupRecordsYamlData) | |
| 84b538f0 » | Philip Dorrell | 2008-06-06 | 556 | print "new backup records = %r" % backupRecords | |
| 557 | |||||
| bce0bbdd » | Philip Dorrell | 2008-06-03 | 558 | def getBackupGroups(self): | |
| 44f6addd » | Philip Dorrell | 2008-06-04 | 559 | """Get backup groups, i.e. backup records grouped into lists of incremental backups with a preceding | |
| 560 | full backup.""" | ||||
| bce0bbdd » | Philip Dorrell | 2008-06-03 | 561 | backupGroups = [] | |
| 562 | records = self.getBackupRecords() | ||||
| 563 | currentBackupGroup = [] | ||||
| 564 | for i, record in enumerate(records): | ||||
| 565 | if record.isFull() or i == 0: | ||||
| 566 | currentBackupGroup = [record] | ||||
| 567 | backupGroups.append (currentBackupGroup) | ||||
| 568 | else: | ||||
| 569 | currentBackupGroup.append(record) | ||||
| 570 | return backupGroups | ||||
| 571 | |||||
| 572 | def listBackups(self): | ||||
| 44f6addd » | Philip Dorrell | 2008-06-04 | 573 | """Print out list of all backups""" | |
| bce0bbdd » | Philip Dorrell | 2008-06-03 | 574 | recordGroups = self.getBackupGroups() | |
| 575 | for recordGroup in recordGroups: | ||||
| 576 | for i, record in enumerate(recordGroup): | ||||
| 577 | if i == 0: | ||||
| 578 | indent = "*" | ||||
| 579 | else: | ||||
| 580 | indent = " " | ||||
| 1342abd3 » | Philip Dorrell | 2008-06-17 | 581 | print "%s%s: %s %s" % (indent, record.type, record.datetime, | |
| 582 | record.completed and "complete" or "INCOMPLETE") | ||||
| bce0bbdd » | Philip Dorrell | 2008-06-03 | 583 | ||
| 584 | def pruneBackup(self, backupRecord, dryRun): | ||||
| 44f6addd » | Philip Dorrell | 2008-06-04 | 585 | """Prune the backup indicated by the backup record (with dry-run option)""" | |
| bce0bbdd » | Philip Dorrell | 2008-06-03 | 586 | print " prune backup %r" % backupRecord | |
| 587 | backupSubMap = self.backupMap.subMap(backupRecord.datetime) | ||||
| 588 | deleteMapValues(backupSubMap, dryRun) | ||||
| 589 | |||||
| 590 | def pruneBackupGroup(self, recordGroup, dryRun): | ||||
| 44f6addd » | Philip Dorrell | 2008-06-04 | 591 | """Prune all backups in a backup group (with dry-run option)""" | |
| bce0bbdd » | Philip Dorrell | 2008-06-03 | 592 | print "Backup group to prune: %r" % recordGroup | |
| 593 | for record in recordGroup: | ||||
| 594 | self.pruneBackup(record, dryRun) | ||||
| 595 | |||||
| 596 | def pruneBackups(self, keep = 1, dryRun = True): | ||||
| 44f6addd » | Philip Dorrell | 2008-06-04 | 597 | """Prune previous backup groups, keeping only specified number of most | |
| 598 | recent backup groups (but at least one)""" | ||||
| bce0bbdd » | Philip Dorrell | 2008-06-03 | 599 | print "Pruning backups, keep %d%s" % (keep, dryRun and ", DRY RUN" or "") | |
| 600 | if keep < 1: | ||||
| 601 | raise Exception ("Number of full backups to keep must be at least 1") | ||||
| 602 | recordGroups = self.getBackupGroups() | ||||
| 603 | if keep >= len(recordGroups): | ||||
| 604 | print "Only %d full backups, and %d specified to keep, so none will be pruned" % (len(recordGroups), keep) | ||||
| 605 | else: | ||||
| 606 | numToPrune = len(recordGroups) - keep | ||||
| 607 | groupsToPrune = recordGroups[:numToPrune] | ||||
| 608 | for recordGroup in groupsToPrune: | ||||
| 609 | self.pruneBackupGroup(recordGroup, dryRun = dryRun) | ||||
| 610 | if not dryRun: | ||||
| 611 | remainingGroups = recordGroups[numToPrune:] | ||||
| 612 | remainingRecords = [] | ||||
| 613 | for group in remainingGroups: | ||||
| 614 | remainingRecords += group | ||||
| 1342abd3 » | Philip Dorrell | 2008-06-17 | 615 | self.saveBackupRecords(remainingRecords) | |
| 7d6fee15 » | Philip Dorrell | 2008-06-09 | 616 | ||
| 617 | def recordPathSummaries(self, backupKeyBase, directoryInfo): | ||||
| 02f349e6 » | Philip Dorrell | 2008-07-03 | 618 | pathListKey = backupKeyBase + "/pathList" | |
| 619 | print "Record path summaries to %s ..." % pathListKey | ||||
| 620 | self.backupMap[pathListKey] = yaml.safe_dump(directoryInfo.getPathSummariesYamlData()) | ||||
| d973ff37 » | Philip Dorrell | 2008-07-15 | 621 | ||
| 74e704c5 » | Philip Dorrell | 2008-07-17 | 622 | def recordWrittenFileSummaries(self, backupKeyBase, writtenFileSummaries): | |
| d973ff37 » | Philip Dorrell | 2008-07-15 | 623 | writtenPathListKey = backupKeyBase + "/writtenPathList" | |
| 74e704c5 » | Philip Dorrell | 2008-07-17 | 624 | print "Record written file summaries to %s ..." % writtenPathListKey | |
| 625 | writtenFileSummariesYamlData = [summary.toYamlData() for summary in writtenFileSummaries] | ||||
| 626 | self.backupMap[writtenPathListKey] = yaml.safe_dump(writtenFileSummariesYamlData) | ||||
| 27d6c3a9 » | Philip Dorrell | 2008-06-01 | 627 | ||
| 1602e1b5 » | Philip Dorrell | 2008-06-28 | 628 | class BackupFileTask: | |
| 531b141f » | Philip Dorrell | 2008-07-17 | 629 | def __init__(self, backupMap, backupFilesKeyBase, pathSummary, fileName, writtenRecords, | |
| 630 | writtenFileSummaries): | ||||
| 1602e1b5 » | Philip Dorrell | 2008-06-28 | 631 | self.backupMap = backupMap | |
| 632 | self.backupFilesKeyBase = backupFilesKeyBase | ||||
| 633 | self.pathSummary = pathSummary | ||||
| 634 | self.fileName = fileName | ||||
| 635 | self.writtenRecords = writtenRecords | ||||
| 531b141f » | Philip Dorrell | 2008-07-17 | 636 | self.writtenFileSummaries = writtenFileSummaries | |
| 1602e1b5 » | Philip Dorrell | 2008-06-28 | 637 | ||
| 028e041c » | Philip Dorrell | 2008-07-03 | 638 | def getThreadLocals(self): | |
| 639 | return {"backupMap": self.backupMap.clone()} | ||||
| 640 | |||||
| 1602e1b5 » | Philip Dorrell | 2008-06-28 | 641 | def doUnsynchronized(self): | |
| 642 | content = readFileBytes(self.fileName) | ||||
| 643 | self.fileContentKey = self.backupFilesKeyBase + self.pathSummary.relativePath | ||||
| 644 | print "Writing %r ..." % self.fileContentKey | ||||
| 645 | self.backupMap[self.fileContentKey] = content | ||||
| 646 | |||||
| 647 | def doSynchronized(self): | ||||
| 531b141f » | Philip Dorrell | 2008-07-17 | 648 | self.writtenFileSummaries.append (self.pathSummary) | |
| 1602e1b5 » | Philip Dorrell | 2008-06-28 | 649 | self.writtenRecords.recordHashWritten (self.pathSummary.hash, self.fileContentKey) | |
| 59aa8ae0 » | Philip Dorrell | 2008-06-28 | 650 | ||
| 27d6c3a9 » | Philip Dorrell | 2008-06-01 | 651 | def doBackup(self, directoryInfo, full = True): | |
| 44f6addd » | Philip Dorrell | 2008-06-04 | 652 | """Create a new backup of a source directory (full or incremental). | |
| 653 | Note: 'incremental' is based on comparing the hashes of file contents already marked as | ||||
| 654 | written to previous backups in the same backup group. It is not based on any comparison | ||||
| 655 | of files done on the source computer. If a given file contents has already been written, | ||||
| 656 | then the relevant file written as a pointer to the previous file with the same contents | ||||
| 657 | (which may or may not be the same file in the same place on the source computer). | ||||
| 658 | """ | ||||
| 27d6c3a9 » | Philip Dorrell | 2008-06-01 | 659 | dateTimeString = self.getDateTimeString() | |
| 660 | backupKeyBase = dateTimeString | ||||
| 661 | backupFilesKeyBase = backupKeyBase + "/files" | ||||
| 7ad6bad4 » | Philip Dorrell | 2008-06-09 | 662 | print "retrieving existing backup records ..." | |
| 27d6c3a9 » | Philip Dorrell | 2008-06-01 | 663 | backupRecords = self.getBackupRecords() | |
| 664 | print "backup records = %r" % backupRecords | ||||
| 1342abd3 » | Philip Dorrell | 2008-06-17 | 665 | currentBackupRecord = BackupRecord(full and "full" or "incremental", dateTimeString, completed = False) | |
| 666 | backupRecords.append(currentBackupRecord) | ||||
| 1fc8bbfb » | Philip Dorrell | 2008-06-19 | 667 | backupRecordUpdater = BackupRecordUpdater (self, backupRecords, currentBackupRecord, | |
| c566e98f » | Philip Dorrell | 2008-06-25 | 668 | backupKeyBase, directoryInfo, recordTrigger = self.recordTrigger) | |
| ab4cd02d » | Philip Dorrell | 2008-07-16 | 669 | backupRecordUpdater.initialRecord() | |
| 27d6c3a9 » | Philip Dorrell | 2008-06-01 | 670 | writtenRecords = WrittenRecords() | |
| 671 | if not full: | ||||
| 672 | if len(backupRecords) == 0: | ||||
| 673 | full = True | ||||
| 674 | print "No previous records, so backup will be FULL anyway" | ||||
| 675 | else: | ||||
| 676 | writtenRecords.recordPreviousBackups (self.backupMap, backupRecords) | ||||
| 1602e1b5 » | Philip Dorrell | 2008-06-28 | 677 | backupFileTasks = [] | |
| 27d6c3a9 » | Philip Dorrell | 2008-06-01 | 678 | for pathSummary in directoryInfo.pathSummaries: | |
| 679 | if not pathSummary.isDir: | ||||
| 680 | fileName = pathSummary.fullPath(directoryInfo.path) | ||||
| 681 | if not writtenRecords.isWritten(pathSummary.hash): | ||||
| 1602e1b5 » | Philip Dorrell | 2008-06-28 | 682 | backupFileTask = IncrementalBackups.BackupFileTask(self.backupMap, backupFilesKeyBase, | |
| 531b141f » | Philip Dorrell | 2008-07-17 | 683 | pathSummary, fileName, writtenRecords, | |
| 684 | backupRecordUpdater.writtenFileSummaries) | ||||
| 1602e1b5 » | Philip Dorrell | 2008-06-28 | 685 | backupFileTasks.append (backupFileTask) | |
| 27d6c3a9 » | Philip Dorrell | 2008-06-01 | 686 | else: | |
| 272f658e » | Philip Dorrell | 2008-06-25 | 687 | print "Content of %r already written to %r" % (pathSummary, | |
| 27d6c3a9 » | Philip Dorrell | 2008-06-01 | 688 | writtenRecords.locationWritten (pathSummary.hash)) | |
| 3fd43cb5 » | Philip Dorrell | 2008-06-29 | 689 | taskRunner.runTasks (backupFileTasks, checkpointTask = backupRecordUpdater) | |
| 1fc8bbfb » | Philip Dorrell | 2008-06-19 | 690 | backupRecordUpdater.recordCompleted() | |
| 27d6c3a9 » | Philip Dorrell | 2008-06-01 | 691 | ||
| 692 | def doFullBackup(self, directoryInfo): | ||||
| 44f6addd » | Philip Dorrell | 2008-06-04 | 693 | """Do a full backup of a source directory""" | |
| 27d6c3a9 » | Philip Dorrell | 2008-06-01 | 694 | self.doBackup (directoryInfo, full = True) | |
| 695 | |||||
| 696 | def doIncrementalBackup(self, directoryInfo): | ||||
| 44f6addd » | Philip Dorrell | 2008-06-04 | 697 | """Do an incremental backup of a source directory""" | |
| 27d6c3a9 » | Philip Dorrell | 2008-06-01 | 698 | self.doBackup (directoryInfo, full = False) | |
| 699 | |||||
| 7ad6bad4 » | Philip Dorrell | 2008-06-09 | 700 | def getBackupRecordForDateTime(self, backupRecords, dateTimeString): | |
| 701 | for index, backupRecord in enumerate(backupRecords): | ||||
| 702 | if backupRecord.datetime == dateTimeString: | ||||
| 703 | return index | ||||
| 704 | raise "No backup record found for date-time %r" % dateTimeString | ||||
| 705 | |||||
| 706 | def getRestoreRecords(self, backupRecords, dateTimeString): | ||||
| 44f6addd » | Philip Dorrell | 2008-06-04 | 707 | """Return records for the most recent backup group""" | |
| 7ad6bad4 » | Philip Dorrell | 2008-06-09 | 708 | if dateTimeString is None: | |
| 709 | restorePos = len(backupRecords)-1 | ||||
| 710 | else: | ||||
| 711 | restorePos = self.getBackupRecordForDateTime (backupRecords, dateTimeString) | ||||
| 712 | pos = restorePos | ||||
| 27d6c3a9 » | Philip Dorrell | 2008-06-01 | 713 | while pos >= 0 and backupRecords[pos].type != "full": | |
| 714 | pos -= 1 | ||||
| 7ad6bad4 » | Philip Dorrell | 2008-06-09 | 715 | return backupRecords[pos:(restorePos+1)] | |
| 27d6c3a9 » | Philip Dorrell | 2008-06-01 | 716 | ||
| 717 | def getPathSummaryDataList(self, backupRecord): | ||||
| 44f6addd » | Philip Dorrell | 2008-06-04 | 718 | """Get YAML data representing information about files and directories backed up | |
| d6dc2e0e » | Philip Dorrell | 2008-07-15 | 719 | in a specified dated backup""" | |
| 27d6c3a9 » | Philip Dorrell | 2008-06-01 | 720 | dateTimeString = backupRecord.datetime | |
| 721 | backupKeyBase = dateTimeString | ||||
| a2332fa5 » | Philip Dorrell | 2008-06-28 | 722 | print "getPathSummaryDataList for %r ..." % backupRecord | |
| 27d6c3a9 » | Philip Dorrell | 2008-06-01 | 723 | pathSummariesData = yaml.safe_load(self.backupMap[backupKeyBase + "/pathList"]) | |
| 724 | return pathSummariesData | ||||
| 725 | |||||
| 74e704c5 » | Philip Dorrell | 2008-07-17 | 726 | def getWrittenFileSummaryDataList(self, backupRecord): | |
| d6dc2e0e » | Philip Dorrell | 2008-07-15 | 727 | """Get YAML data representing information about files and directories backed up | |
| 728 | in a specified dated backup""" | ||||
| 729 | dateTimeString = backupRecord.datetime | ||||
| 730 | backupKeyBase = dateTimeString | ||||
| 74e704c5 » | Philip Dorrell | 2008-07-17 | 731 | print "getWrittenFileSummaryDataList for %r ..." % backupRecord | |
| d6dc2e0e » | Philip Dorrell | 2008-07-15 | 732 | writtenPathListKey = backupKeyBase + "/writtenPathList" | |
| 74e704c5 » | Philip Dorrell | 2008-07-17 | 733 | writtenFileSummariesData = yaml.safe_load(self.backupMap[backupKeyBase + "/writtenPathList"]) | |
| 734 | return writtenFileSummariesData | ||||
| d6dc2e0e » | Philip Dorrell | 2008-07-15 | 735 | ||
| 74e704c5 » | Philip Dorrell | 2008-07-17 | 736 | def getHashContentKeyMap(self, restoreRecords, writtenFileSummaryLists): | |
| 44f6addd » | Philip Dorrell | 2008-06-04 | 737 | """Construct a map from hash keys to the backup keys to which those file contents | |
| 738 | were written (within the given backup group which is being restored from)""" | ||||
| 27d6c3a9 » | Philip Dorrell | 2008-06-01 | 739 | hashContentKeyMap = {} | |
| 74e704c5 » | Philip Dorrell | 2008-07-17 | 740 | for restoreRecord, writtenFileSummaryList in zip(restoreRecords, writtenFileSummaryLists): | |
| 741 | for writtenFileSummary in writtenFileSummaryList: | ||||
| 742 | hashContentKeyMap[writtenFileSummary.hash] = ContentKey(restoreRecord.datetime, | ||||
| 743 | writtenFileSummary.relativePath) | ||||
| 27d6c3a9 » | Philip Dorrell | 2008-06-01 | 744 | return hashContentKeyMap | |
| 745 | |||||
| 03b64b81 » | Philip Dorrell | 2008-06-28 | 746 | class RestoreFileTask: | |
| 747 | def __init__(self, backupMap, contentKey, fullPath, updateVerificationRecords, verificationRecords, overwrite): | ||||
| 748 | self.backupMap = backupMap | ||||
| 749 | self.contentKey = contentKey | ||||
| 750 | self.fullPath = fullPath | ||||
| 751 | self.updateVerificationRecords = updateVerificationRecords | ||||
| 752 | self.verificationRecords = verificationRecords | ||||
| 753 | self.overwrite = overwrite | ||||
| 754 | |||||
| 028e041c » | Philip Dorrell | 2008-07-03 | 755 | def getThreadLocals(self): | |
| 756 | return {"backupMap": self.backupMap.clone()} | ||||
| 757 | |||||
| 03b64b81 » | Philip Dorrell | 2008-06-28 | 758 | def doUnsynchronized(self): | |
| 759 | content = self.backupMap[self.contentKey.fileKey()] | ||||
| 760 | if os.path.exists(self.fullPath) and self.overwrite: | ||||
| 761 | os.remove (self.fullPath) | ||||
| 762 | writeFileBytes(self.fullPath, content) | ||||
| 763 | if self.updateVerificationRecords: | ||||
| 764 | self.contentHash = sha1Digest(content) | ||||
| 765 | print "Restored FILE %r" % self.fullPath | ||||
| 766 | |||||
| 767 | def doSynchronized(self): | ||||
| 768 | if self.updateVerificationRecords: | ||||
| 769 | self.verificationRecords.markVerified (self.contentKey.datetime, | ||||
| 770 | self.contentKey.filePath, self.contentHash) | ||||
| 771 | print "Mark verified FILE %r" % self.fullPath | ||||
| 772 | |||||
| 84b538f0 » | Philip Dorrell | 2008-06-06 | 773 | def restoreDirectory(self, restoreDir, pathSummaryList, hashContentKeyMap, overwrite, | |
| 774 | updateVerificationRecords = False): | ||||
| 44f6addd » | Philip Dorrell | 2008-06-04 | 775 | """Restore a directory using path summaries and hash content key map, with optional overwrite""" | |
| 5a35cb97 » | Philip Dorrell | 2008-06-25 | 776 | print "Restoring directory %r ..." % restoreDir | |
| 84b538f0 » | Philip Dorrell | 2008-06-06 | 777 | if updateVerificationRecords: | |
| 778 | verificationRecords = HashVerificationRecords(self.backupMap) | ||||
| d7210450 » | Philip Dorrell | 2008-06-28 | 779 | restoreFileTasks = [] | |
| 27d6c3a9 » | Philip Dorrell | 2008-06-01 | 780 | for pathSummary in pathSummaryList: | |
| 781 | fullPath = pathSummary.fullPath (restoreDir) | ||||
| 782 | if pathSummary.isDir: | ||||
| 783 | if not os.path.isdir(fullPath): | ||||
| 784 | os.makedirs(fullPath) | ||||
| 5a35cb97 » | Philip Dorrell | 2008-06-25 | 785 | print "Restored DIR %r" % fullPath | |
| 27d6c3a9 » | Philip Dorrell | 2008-06-01 | 786 | elif pathSummary.isFile: | |
| 787 | if not pathSummary.hash in hashContentKeyMap: | ||||
| 5a35cb97 » | Philip Dorrell | 2008-06-25 | 788 | print "WARNING: No written content found for %r (hash %s)" % (pathSummary.relativePath, | |
| 27d6c3a9 » | Philip Dorrell | 2008-06-01 | 789 | pathSummary.hash) | |
| 84b538f0 » | Philip Dorrell | 2008-06-06 | 790 | contentKey = hashContentKeyMap[pathSummary.hash] | |
| d7210450 » | Philip Dorrell | 2008-06-28 | 791 | restoreFileTasks.append (IncrementalBackups.RestoreFileTask (self.backupMap, contentKey, | |
| 792 | fullPath, updateVerificationRecords, | ||||
| 793 | verificationRecords, overwrite)) | ||||
| 27d6c3a9 » | Philip Dorrell | 2008-06-01 | 794 | else: | |
| 795 | print "WARNING: Unknown path type %r" % pathSummary | ||||
| 3fd43cb5 » | Philip Dorrell | 2008-06-29 | 796 | taskRunner.runTasks (restoreFileTasks) | |
| 84b538f0 » | Philip Dorrell | 2008-06-06 | 797 | if updateVerificationRecords: | |
| 798 | verificationRecords.updateRecords() | ||||
| 799 | |||||
| 7ad6bad4 » | Philip Dorrell | 2008-06-09 | 800 | def getRestoreDetails(self, dateTimeString): | |
| 27d6c3a9 » | Philip Dorrell | 2008-06-01 | 801 | backupRecords = self.getBackupRecords() | |
| 802 | print "backupRecords = %r" % backupRecords | ||||
| 803 | if len(backupRecords) == 0: | ||||
| 804 | raise "No backup records found" | ||||
| f704f927 » | Philip Dorrell | 2008-06-17 | 805 | print "Get restore records for %s" % (dateTimeString or "(most recent backup)") | |
| 7ad6bad4 » | Philip Dorrell | 2008-06-09 | 806 | restoreRecords = self.getRestoreRecords(backupRecords, dateTimeString) | |
| 27d6c3a9 » | Philip Dorrell | 2008-06-01 | 807 | print "restoreRecords = %r" % restoreRecords | |
| 5411dee5 » | Philip Dorrell | 2008-07-17 | 808 | for restoreRecord in restoreRecords: | |
| bb1f92b7 » | Philip Dorrell | 2008-07-27 | 809 | print "checkVersion for %r ..." % restoreRecord | |
| 5411dee5 » | Philip Dorrell | 2008-07-17 | 810 | checkVersion(self.backupMap, restoreRecord) | |
| 74e704c5 » | Philip Dorrell | 2008-07-17 | 811 | writtenFileSummaryDataLists = [self.getWrittenFileSummaryDataList(record) for record in restoreRecords] | |
| 812 | print "parsing writtenFileSummaryDataLists from YAML data ..." | ||||
| 813 | writtenFileSummaryLists = [[PathSummary.fromYamlData(pathSummaryData) for pathSummaryData in pathSummaryDataList] | ||||
| 814 | for pathSummaryDataList in writtenFileSummaryDataLists] | ||||
| a2332fa5 » | Philip Dorrell | 2008-06-28 | 815 | print "calculating hashContentKeyMap ..." | |
| 74e704c5 » | Philip Dorrell | 2008-07-17 | 816 | hashContentKeyMap = self.getHashContentKeyMap(restoreRecords, writtenFileSummaryLists) | |
| 27d6c3a9 » | Philip Dorrell | 2008-06-01 | 817 | print "hashContentKeyMap = %r" % hashContentKeyMap | |
| 818 | backupToRestore = restoreRecords[-1] | ||||
| 84b538f0 » | Philip Dorrell | 2008-06-06 | 819 | print "Target backup for restore: %r" % backupToRestore | |
| fae3e322 » | Philip Dorrell | 2008-07-15 | 820 | pathSummaryListToRestore = [PathSummary.fromYamlData (pathSummaryData) for pathSummaryData | |
| 821 | in self.getPathSummaryDataList(backupToRestore)] | ||||
| 7ad6bad4 » | Philip Dorrell | 2008-06-09 | 822 | return pathSummaryListToRestore, hashContentKeyMap, backupToRestore | |
| 84b538f0 » | Philip Dorrell | 2008-06-06 | 823 | ||
| 7ad6bad4 » | Philip Dorrell | 2008-06-09 | 824 | def getRestoredDirHash(self, dateTimeString = None): | |
| 825 | pathSummaryList, hashContentKeyMap, backupToRestore = self.getRestoreDetails(dateTimeString) | ||||
| 84b538f0 » | Philip Dorrell | 2008-06-06 | 826 | verificationRecords = HashVerificationRecords(self.backupMap) | |
| 827 | restoredDirHash = BaseDirHash(None, "backed up files") | ||||
| 828 | for pathSummary in pathSummaryList: | ||||
| 829 | if pathSummary.isDir: | ||||
| 830 | restoredDirHash.addDirSummary(pathSummary.relativePath) | ||||
| 5a35cb97 » | Philip Dorrell | 2008-06-25 | 831 | print " DIR %r" % pathSummary.relativePath | |
| 84b538f0 » | Philip Dorrell | 2008-06-06 | 832 | elif pathSummary.isFile: | |
| 833 | contentKey = hashContentKeyMap[pathSummary.hash] | ||||
| 834 | # We could compare pathSummary.hash and fileHash, | ||||
| 835 | # but the verified fileHash is what matters (to compare to local file) | ||||
| 836 | fileHash = verificationRecords.getWrittenFileHash(contentKey.d | ||||







