/
ibooks.py
145 lines (112 loc) · 3.95 KB
/
ibooks.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
# -*- coding: utf-8 -*-
"""Read cover & metadata from iBooks files"""
import os
import shutil
from tempfile import mkdtemp
import zipfile
import xml.etree.ElementTree as etree
import sys
class NotAnIbooksFile(Exception):
pass
class MissingCover(Exception):
pass
class iBooks(object):
"""iBooks Author file metadata parser and cover extractor
:param str filepath: path to .ibooks file
"""
NS = {
'opf': 'http://www.idpf.org/2007/opf',
'dc': 'http://purl.org/dc/elements/1.1/',
}
IMAGE_PATH = 'OPS/assets/images/cover-image.jpg'
def __init__(self, filepath):
self.filepath = filepath
self._tempdir = None
self._extracted_cover_path = None
try:
with zipfile.ZipFile(self.filepath, 'r') as zipreader:
with zipreader.open('OPS/ibooks.opf') as opf_file:
self.opf = etree.fromstring(opf_file.read())
except (zipfile.BadZipfile, KeyError) as e:
raise NotAnIbooksFile(e)
self._el_meta = self.opf.find('opf:metadata', self.NS)
def is_preview(self):
"""Check if ibooks file is a preview file
:return bool:
"""
return True if self.version is None else False
@property
def title(self):
"""Book Title as specified in metadata.
:return str: book title
"""
return self._el_meta.find('dc:title', self.NS).text
@property
def author(self):
"""Author as specified in metadata.
:return str: author
"""
return self._el_meta.find('dc:creator', self.NS).text
@property
def language(self):
"""iBook content language as specified in metadata.
:return str: 2-letter language code
"""
return self._el_meta.find('dc:language', self.NS).text
@property
def version(self):
"""Return book version.
NOTE: iBooks Author preview files MUST not contain a version.
:return str: version string of ibooks file
"""
version_el = self._el_meta.find(
"opf:meta[@property='ibooks:version']", self.NS
)
if version_el is not None:
return version_el.text
@property
def cover(self):
"""Extract cover on first access and return full path to temporary
cover file. Subsequent access to .cover will return cached path.
WARNING: Temporary cover file will only exist as long as 'self' exists.
Be sure to keep a reference to 'self' until you have processed
the cover image.
:return str: path to cover file
"""
self._tempdir = self._tempdir or mkdtemp(prefix='ibooks')
if not self._extracted_cover_path:
with zipfile.ZipFile(self.filepath, 'r') as zipreader:
try:
self._extracted_cover_path = zipreader.extract(
self.IMAGE_PATH, self._tempdir
)
except KeyError as e:
raise MissingCover(e)
return self._extracted_cover_path
def __del__(self):
"""Try to remove tempdir. We don´t care if it fails..."""
if self._tempdir:
shutil.rmtree(self._tempdir, ignore_errors=True)
def cli():
"""Command line script entry point.
Prints metadata to console and extracts cover to current working directory.
"""
try:
ibook = iBooks(sys.argv[1])
except IndexError:
print('Usage:\n ibooks myfile.ibooks')
sys.exit(0)
print('Title: %s' % ibook.title)
print('Author: %s' % ibook.author)
print('Language: %s' % ibook.language)
print('Version: %s' % ibook.version)
print('Preview: %s' % ibook.is_preview())
cover_path = os.path.join(
os.getcwdu(),
os.path.basename(sys.argv[1]).replace('.ibooks', '-cover.jpg')
)
shutil.copy(ibook.cover, cover_path)
print('Extracted cover to %s' % cover_path)
return 0
if __name__ == '__main__':
cli()