Skip to content
Find file
Fetching contributors…
Cannot retrieve contributors at this time
609 lines (538 sloc) 23.4 KB
#!/usr/bin/env python
__version__ = "0.6"
import base64
import codecs
import io
import locale
import os
import re
import shutil
import stat
import sys
import time
from datetime import datetime
from io import IOBase
from optparse import OptionParser
from xml.dom.pulldom import START_ELEMENT, END_ELEMENT, parse
from xml.dom.minidom import Node
try:
import pyexiv2
except ImportError:
pyexiv2 = None
# To allow Unicode characters to be displayed
# (see http://wiki.python.org/moin/PrintFails)
sys.stdout = codecs.getwriter(locale.getpreferredencoding())(sys.stdout)
sys.stderr = codecs.getwriter(locale.getpreferredencoding())(sys.stderr)
class iPhotoLibraryError(Exception):
pass
# Some AlbumData.xml files contain null bytes. Strip them so the SAX parser
# doesn't fail with an Invalid Token error.
class RemoveNullsStream(IOBase):
def __init__(self, filename):
self.file = open(filename, 'r')
def read(self, bufsize=2**20):
return self.file.read(bufsize).translate(None,"\0")
def close(self):
self.file.close()
class iPhotoLibrary(object):
def __init__(self, albumDir, destDir, use_album=False, use_date=False,
use_faces=False, use_metadata=False, deconflict=False, quiet=False,
year_dir=False, import_missing=False, import_from_date=None, test=False,
date_delimiter="-", ignore_time_delta=False):
self.use_album = use_album
self.use_date = use_date
self.use_faces = use_faces
self.use_metadata = use_metadata
self.deconflict = deconflict
self.dest_dir = destDir
self.output_dirs = set()
self.output_files = set()
self.quiet = quiet
self.albums = []
self.keywords = {}
self.faces = {}
self.images = {}
self.test = test
self.year_dir = year_dir
self.import_missing = import_missing
self.ignore_time_delta = ignore_time_delta
self.date_delimiter = date_delimiter
self.import_albums = []
if import_from_date:
self.import_from_date = datetime.strptime(import_from_date, "%Y-%m-%d")
else:
self.import_from_date = None
if self.import_missing:
self.build_import_list()
albumDataXml = os.path.join(albumDir, "AlbumData.xml")
albumDataStream = RemoveNullsStream(albumDataXml)
self.status("* Parsing iPhoto Library data... ")
self.parseAlbumData(albumDataStream)
albumDataStream.close()
self.status("Done.\n")
major_version = 2
minor_version = 0
interesting_image_keys = [
'ImagePath', 'Rating', 'Keywords', 'Caption', 'Comment', 'Faces',
'face key'
]
apple_epoch = 978307200
def parseAlbumData(self, filename):
"""
Parse an iPhoto AlbumData.xml file, keeping the interesting
bits.
"""
doc = parse(filename)
stack = []
last_top_key = None
if self.use_album:
album_list_key = "List of Albums"
else:
album_list_key = "List of Rolls"
for event, node in doc:
if event == START_ELEMENT:
stack.append(node)
level = len(stack)
if level == 3:
if node.nodeName == 'key':
doc.expandNode(node)
last_top_key = self.getText(node)
stack.pop()
elif last_top_key == 'List of Keywords':
doc.expandNode(node)
self.keywords = self.dePlist(node)
stack.pop()
elif last_top_key == 'List of Faces':
doc.expandNode(node)
self.faces = dict([
(k, v['name']) for k,v in
self.dePlist(node, ['name']).items()
])
stack.pop()
elif last_top_key == 'Major Version':
doc.expandNode(node)
major_version = self.dePlist(node)
stack.pop()
if major_version != self.major_version:
raise iPhotoLibraryError, \
"Sorry, I can't understand version %i iPhoto Libraries." % major_version
elif last_top_key == 'Minor Version':
doc.expandNode(node)
minor_version = self.dePlist(node)
stack.pop()
if minor_version > self.minor_version:
self.status(
"\nI don't recognise iPhoto libraries when the minor version is %i, but let's try anyway.\n" % minor_version,
force=True
)
elif level == 4:
# process large items individually so we don't
# load them all into memory.
if last_top_key == album_list_key:
doc.expandNode(node)
self.albums.append(self.dePlist(node))
stack.pop()
elif last_top_key == 'Master Image List':
doc.expandNode(node)
if node.nodeName == 'key':
last_image_key = self.getText(node)
else:
self.images[last_image_key] = self.dePlist(
node, self.interesting_image_keys
)
stack.pop()
elif event == END_ELEMENT:
stack.pop()
def dePlist(self, node, interesting_keys=None):
"""
Given a DOM node, convert the plist (fragment) it refers to and
return the corresponding Python data structure.
If interesting_keys is a list, "dict" keys will be filtered so that
only those nominated are returned (for ALL descendant dicts). Numeric
keys aren't filtered.
"""
ik = interesting_keys
dtype = node.nodeName
if dtype == 'string':
return self.getText(node)
elif dtype == 'integer':
try:
return int(self.getText(node))
except ValueError:
raise iPhotoLibraryError, \
"Corrupted Library; unexpected value '%s' for integer" % \
self.getText(node)
elif dtype == 'real':
try:
return float(self.getText(node))
except ValueError:
raise iPhotoLibraryError, \
"Corrupted Library; unexpected value '%s' for real" % \
self.getText(node)
elif dtype == 'array':
return [self.dePlist(c, ik) for c in node.childNodes \
if c.nodeType == Node.ELEMENT_NODE]
elif dtype == 'dict':
d = {}
last_key = None
for c in node.childNodes:
if c.nodeType != Node.ELEMENT_NODE:
continue
# TODO: catch out-of-order keys/values
if c.nodeName == 'key':
last_key = self.getText(c)
else: # value
if interesting_keys: # check to see if we're interested
if last_key not in interesting_keys \
and not last_key.isdigit():
continue # nope.
d[intern(str(last_key))] = self.dePlist(c, ik)
return d
elif dtype == 'true':
return True
elif dtype == 'false':
return False
elif dtype == 'data':
return base64.decodestring(self.getText(c))
elif dtype == 'date':
return self.appleDate(self.getText(c))
else:
raise Exception, "Don't know what a %s is." % dtype
@staticmethod
def getText(element, default=None):
if element is None:
return default
if len(element.childNodes) == 0:
return None
else:
return "".join([n.nodeValue for n in element.childNodes])
def walk(self, funcs):
"""
Walk through the events or albums (depending on the value of albums)
in this library and apply each function in the list funcs to each
image, calling it as:
func(folderName, folderDate, imageId)
where:
- folderName is the name the folder,
- folderDate is the date of the folder, and
- imageId is the string identifier for the image.
"""
if self.use_album:
targetName = "AlbumName"
albums = [a for a in self.albums if
a.get("Album Type", None) == "Regular"]
else:
targetName = "RollName"
albums = self.albums
i = 0
for folder in albums:
i += 1
if self.use_album:
folderDate = None
else:
folderDate = self.appleDate(folder["RollDateAsTimerInterval"])
images = folder["KeyList"]
folderName = folder[targetName]
#as we process albums/events in the iPhoto library, remove that album
#from the list of import_albums we'll be importing at the end
if self.import_albums:
for ia in self.import_albums:
for album_name in ia['album_names']:
album_name = unicode(album_name, 'utf-8')
if folderName == album_name:
self.import_albums.remove(ia)
if folderDate and self.use_date:
date = '%(year)d%(delim)s%(month)02d%(delim)s%(day)02d' % {
'year': folderDate.year,
'month': folderDate.month,
'day': folderDate.day,
'delim': self.date_delimiter
}
if re.match("[A-Z][a-z]{2} [0-9]{1,2}, [0-9]{4}", folderName):
outputPath = date
elif re.match("[0-9]{4}.[0-9]{2}.[0-9]{2} ?.*", folderName):
outputPath = folderName
else:
outputPath = date + " " + folderName
if self.year_dir:
outputPath = os.path.join(str(folderDate.year), outputPath)
else:
outputPath = folderName
# Deconflict output directories
targetFileDir = os.path.join(self.dest_dir, outputPath)
if self.deconflict:
j = 1
while targetFileDir in self.output_dirs:
targetFileDir = os.path.join(self.dest_dir, outputPath + " %02d"%j)
j += 1
self.output_dirs.add(targetFileDir)
self.status("* Processing %i of %i: %s (%i images)...\n" % (
i,
len(albums),
folderName,
len(images)
))
for imageId in images:
for func in funcs:
func(imageId, targetFileDir, folderDate)
self.status("\n")
if self.import_missing:
self.status("importing folders:\n")
for ia in self.import_albums:
self.status(ia["album_dir"] + "\n")
#using the "Auto Import" dir in iPhoto was unpredictable with respect to the resulting event name.
#Using AppleScript to import the event, seams to always result in the event being properly named
if not self.test:
#There is probably a better way to do this. I noticed I had an album with an ' in it that errored...
escaped_dir = ia["album_dir"].replace("'", "\\'").replace('"', '\\"')
os.system('''osascript -e '
tell application "iPhoto"
import from "%s"
end tell
' ''' % escaped_dir)
def copyImage(self, imageId, folderName, folderDate):
"""
Copy an image from the library to a folder in the dest_dir. The
name of the folder is based on folderName and folderDate; if
folderDate is None, it's only based upon the folderName.
If use_metadata is True, also write the image metadata from the library
to the copy. If use_faces is True, faces will be saved as keywords.
"""
try:
image = self.images[imageId]
except KeyError:
raise iPhotoLibraryError, "Can't find image #%s" % imageId
if not os.path.exists(folderName):
try:
if not self.test:
os.makedirs(folderName)
except OSError, why:
raise iPhotoLibraryError, \
"Can't create %s: %s" % (folderName, why[1])
self.status(" Created %s\n" % folderName)
mFilePath = image["ImagePath"]
basename = os.path.basename(mFilePath)
# Deconflict ouput filenames
tFilePath = os.path.join(folderName, basename)
if self.deconflict:
j = 1
while tFilePath in self.output_files:
tFilePath = os.path.join(folderName, "%02d_"%j + basename)
j += 1
self.output_files.add(tFilePath)
# Skip unchanged files, unless we're writing metadata.
if not self.use_metadata and os.path.exists(tFilePath):
mStat = os.stat(mFilePath)
tStat = os.stat(tFilePath)
if not self.ignore_time_delta and abs(tStat[stat.ST_MTIME] - mStat[stat.ST_MTIME]) <= 10:
self.status("-")
return
if tStat[stat.ST_SIZE] == mStat[stat.ST_SIZE]:
self.status("-")
return
if not self.test:
shutil.copy2(mFilePath, tFilePath)
md_written = False
if self.use_metadata:
md_written = self.writePhotoMD(imageId, tFilePath)
if md_written:
self.status("+")
else:
self.status(".")
def writePhotoMD(self, imageId, filePath=None):
"""
Write the metadata from the library for imageId to filePath.
If filePath is None, write it to the photo in the library.
If use_faces is True, iPhoto face names will be written to
keywords.
"""
try:
image = self.images[imageId]
except KeyError:
raise iPhotoLibraryError, "Can't find image #%s" % imageId
if not filePath:
filePath = image['ImagePath']
caption = image.get("Caption", None)
rating = image.get("Rating", None)
comment = image.get("Comment", None)
keywords = set([self.keywords[k] for k in image.get("Keywords", [])])
if self.use_faces:
keywords.update([self.faces[f['face key']]
for f in image.get("Faces", [])
if self.faces.has_key(f['face key'])]
)
if caption or comment or rating or keywords:
try:
md = pyexiv2.ImageMetadata(filePath)
md.read()
if caption:
md["Iptc.Application2.Headline"] = [caption]
if rating:
md["Xmp.xmp.Rating"] = rating
if comment:
md["Iptc.Application2.Caption"] = [comment]
if keywords:
md["Iptc.Application2.Keywords"] = list(keywords)
if not self.test:
md.write(preserve_timestamps=True)
return True
except IOError, why:
self.status("\nProblem setting metadata (%s) on %s\n" % (
unicode(why.__str__(), errors='replace'), filePath
))
return False
def appleDate(self, text):
try:
return datetime.utcfromtimestamp(self.apple_epoch + float(text))
except (ValueError, TypeError):
raise iPhotoLibraryError, \
"Corrupted Library; unexpected value '%s' for date" % text
def status(self, msg, force=False):
if force or not self.quiet:
sys.stdout.write(msg)
sys.stdout.flush()
def build_import_list(self):
'''
We are going to make some assumptions here.
1. The dest_dir is a directory of albums containing images, optionally the albums can be in year dirs.
2. Album dirs are assumed to follow one of these naming patterns:
[0-9]{4}.[0-9]{2}.[0-9]{2} ?.* - Dated folder, unnamed, iPhoto album name could match or
could be iPhoto date format
.* - Named folder, iPhoto album name
Walk the dest dir and find all folders and files. For each folder determine the possible iPhoto album names.
When walking the xml eliminate any folder we find where one of the possible album names matches an
existing album name.
'''
if self.year_dir:
year_dir_list = os.listdir(self.dest_dir)
for year_dir in year_dir_list:
# if year_dir was specified, then only match on folders inside year folders
if not re.match("^[0-9]{4}$", year_dir): continue
# if import_from_date was specified, then skip folders where the year_dir is before the import_from_date.year
if self.import_from_date and int(year_dir) < self.import_from_date.year: continue
self.build_import_album_dirs(os.path.join(self.dest_dir, year_dir))
else:
self.build_import_album_dirs(self.dest_dir)
def build_import_album_dirs(self, base_dir):
delim = str(self.date_delimiter)
for album_name in os.listdir(base_dir):
album_names = [album_name]
folder_date = None
# Folder pattern: "2011_01_01 New Years Party"
m = re.match(r"([0-9]{4}\%s[0-9]{2}\%s[0-9]{2}) ?(.*)" % (delim, delim), album_name)
if m:
folder_date = datetime.strptime(m.group(1), "%Y" + delim + "%m" + delim + "%d")
album_names.append(m.group(2))
# Folder pattern: "2011_01_01"
m = re.match(r"^[0-9]{4}\%s[0-9]{2}\%s[0-9]{2}$" % (delim, delim), album_name)
if m:
folder_date = datetime.strptime(album_name, "%Y" + delim + "%m" + delim + "%d")
month, day, year = folder_date.strftime("%b %d %Y").split(" ")
album_names.append("%s %d, %s" %(month, int(day), year))
# Don't import folders that are prior to the specified date
if not folder_date: continue
if self.import_from_date and folder_date < self.import_from_date: continue
album_dir = os.path.abspath(os.path.join(base_dir, album_name))
this_album = { "album_names": album_names, "album_dir":album_dir, }
self.import_albums.append(this_album)
def error(msg):
sys.stderr.write("\n%s\n" % msg)
sys.exit(1)
if __name__ == '__main__':
usage = "Usage: %prog [options] <iPhoto Library dir> <destination dir>"
version = "exportiphoto version %s" % __version__
default_date_delimiter = "-"
option_parser = OptionParser(usage=usage, version=version)
option_parser.set_defaults(
test=False,
albums=False,
metadata=False,
faces=False,
quiet=False,
date=True,
ignore_time_delta=False
)
option_parser.add_option("-a", "--albums",
action="store_true", dest="albums",
help="use albums instead of events"
)
option_parser.add_option("-q", "--quiet",
action="store_true", dest="quiet",
help="use quiet mode"
)
option_parser.add_option("-d", "--date",
action="store_false", dest="date",
help="stop use date prefix in folder name"
)
option_parser.add_option("-x", "--deconflict",
action="store_true", dest="deconflict",
help="deconflict export directories of same name"
)
option_parser.add_option("-t", "--test",
action="store_true", dest="test",
help="don't actually copy files or import folders"
)
option_parser.add_option("-y", "--yeardir",
action="store_true", dest="year_dir",
help="add year directory to output"
)
option_parser.add_option("-e", "--date_delimiter",
action="store", type="string", dest="date_delimiter",
help="date delimiter default=%s" % default_date_delimiter
)
option_parser.add_option("-i", "--import",
action="store_true", dest="import_missing",
help="import missing albums from dest directory"
)
option_parser.add_option("-j", "--ignore_time_delta",
action="store_true", dest="ignore_time_delta",
help="ignore time delta when determining whether or not to copy a file"
)
option_parser.add_option("-z", "--import_from_date",
action="store", type="string", dest="import_from_date",
help="only import missing folers if folder date occurs after (YYYY-MM-DD). Uses date in folder name."
)
if pyexiv2:
option_parser.add_option("-m", "--metadata",
action="store_true", dest="metadata",
help="write metadata to images"
)
option_parser.add_option("-f", "--faces",
action="store_true", dest="faces",
help="store faces as keywords (requires -m)"
)
(options, args) = option_parser.parse_args()
if len(args) != 2:
option_parser.error(
"Please specify an iPhoto library and a destination."
)
try:
if options.date_delimiter is None:
options.date_delimiter = default_date_delimiter
library = iPhotoLibrary(args[0], # src
args[1], # dest
use_album=options.albums,
use_date=options.date,
use_faces=options.faces,
use_metadata=options.metadata,
deconflict=options.deconflict,
quiet=options.quiet,
year_dir=options.year_dir,
import_missing=options.import_missing,
import_from_date=options.import_from_date,
test=options.test,
date_delimiter=options.date_delimiter,
ignore_time_delta=options.ignore_time_delta
)
def copyImage(imageId, folderName, folderDate):
library.copyImage(imageId, folderName, folderDate)
except iPhotoLibraryError, why:
error(why[0])
except KeyboardInterrupt:
error("Interrupted.")
try:
library.walk([copyImage])
except iPhotoLibraryError, why:
error(why[0])
except KeyboardInterrupt:
error("Interrupted. Copy may be incomplete.")
Jump to Line
Something went wrong with that request. Please try again.