-
Notifications
You must be signed in to change notification settings - Fork 20
/
media.py
218 lines (168 loc) · 8.44 KB
/
media.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os
import re
import urllib
import shutil
import tempfile
import zipfile
from logger import log
import utils
def downloadAndInstallMandarinSounds(notifier, mediamanager, config):
log.info("Downloading Mandarin sound pack")
try:
# Download ZIP, using cache if necessary
downloader = MediaDownloader(mediamanager.mediadir())
the_media = downloader.download("Chinese-Lessons.com Mandarin Sounds", config.mandarinsoundsurl,
lambda: notifier.info("Downloading the sounds - this might take a while!"))
except IOError, e:
notifier.exception("Error while downloading the sound pack: are you connected to the internet?")
return
try:
# Install each file from the ZIP into our media folder
the_media.installpack(mediamanager.mediadir())
except zipfile.BadZipfile, e:
notifier.exception("The downloaded sound pack appeared to be corrupt")
return
# Tell the user we are done
exampleAudioField = config.candidateFieldNamesByKey['audio'][0]
notifier.info("Finished installing Mandarin sounds! These sound files will be used automatically as long as you have "
+ " the: <b>" + exampleAudioField + "</b> field in your deck, and the text: <b>%(" + exampleAudioField + ")s</b> in your card template")
class MediaDownloader(object):
def __init__(self, mediadir):
# Where shall we save downloaded files?
self.__cachedir = os.path.join(mediadir, "downloads")
# Ensure the cache exists
log.info("Initialising cache directory at %s", self.__cachedir)
utils.ensuredirexists(self.__cachedir)
def download(self, name, zipurl, downloadprompt=None):
# First check the cache to see if we have the download already
cachepath = self.urlcachepath(zipurl)
if os.path.exists(cachepath):
log.info("Found %s in the cache at %s", zipurl, cachepath)
return DownloadedMedia(name, cachepath)
# Can we actually write to the cache?
if utils.canwriteto(cachepath):
# We CAN write to the cache - download straight into it
log.info("Have write access to cache - downloading into it")
downloadto = cachepath
else:
# Use a temporary path instead, the user doesn't have enough permissions
log.info("No write access to cache - downloading to temporary location")
downloadto = tempfile.mktemp()
# Actually do the download and return the result
if downloadprompt is not None:
downloadprompt()
urllib.urlretrieve(zipurl, downloadto)
return DownloadedMedia(name, downloadto)
def urlcachepath(self, url):
# What the hell, just use a hash of the URL. Not meant to be human readable anyway.
return os.path.join(self.__cachedir, utils.md5(url))
class DownloadedMedia(object):
def __init__(self, name, zippath):
log.info("Initalizing downloaded media %s located at %s", name, zippath)
self.name = name
self.zippath = zippath
def installpack(self, mediadir):
# Work out the pack directory we want to extract to
packpath = utils.mkdirfallback(mediadir, self.name)
log.info("Extracting downloaded media into %s", packpath)
# Extract the ZIP into a new directory
thezip = zipfile.ZipFile(self.zippath)
for info in thezip.infolist():
# Create the directory
extractfilepath = os.path.join(packpath, info.filename)
utils.ensuredirexists(os.path.dirname(extractfilepath))
# Extract the file. NB: must use binary mode - this important for for Windows!
file = open(extractfilepath, 'wb')
file.write(thezip.read(info.filename))
file.close()
class MediaPack(object):
def __init__(self, packpath, media):
self.packpath = packpath
# Normalize capitalisation for ease of lookup
self.media = dict([(name.lower(), filename) for name, filename in media.items()])
def __str__(self):
return self.name
def __repr__(self):
return "MediaPack(%s, %s)" % (repr(self.packpath), repr(self.media))
def __eq__(self, other):
if other == None:
return False
return self.name == other.name and self.packpath == other.packpath and self.media == other.media
def __ne__(self, other):
return not(self == other)
name = property(lambda self: os.path.basename(self.packpath))
def summarize(self, audioextensions):
# Summarize the counts of files per extension
extensioncounts = [(extension, len([() for filename in self.media.values() if os.path.splitext(filename)[1].lower() == extension.lower()])) for extension in audioextensions]
formattedcounts = [str(count) + " " + extension + " " + utils.pluralize("file", count) for extension, count in extensioncounts if count > 0]
if len(formattedcounts) > 0:
# Only include brackets around the extension counts in this case
return self.name + " (" + ", ".join(formattedcounts) + ")"
else:
return self.name
def mediafor(self, basename, audioextensions):
# Check all possible extensions in order of priority
for extension in audioextensions:
name = (basename + extension).lower()
if name in self.media:
return self.media[name]
# No suitable media existed!
return None
@classmethod
def frompath(cls, packpath):
media = {}
for filename in os.listdir(packpath):
media[filename] = os.path.join(packpath, filename)
log.info("Discovered %d media files in the pack at %s", len(media), packpath)
return MediaPack(packpath, media)
# Use to discover files in the media directory that are not referenced in the media
# database. If this is true, the user has just copied them in - and we consider
# such things "legacy" sounds that should be replaced with a true media pack.
def discoverlegacymedia(mediadircontents, mediaindex):
# If the media directory was inacessible for any reason, just give up
if mediadircontents == None:
log.info("Couldn't discover legacy media because the media directory was not accessible")
return None
# Normalize case from the directory listing so that the removal check works reliably
mediadircontents = [os.path.normcase(mediadircontent) for mediadircontent in mediadircontents]
# Iterate over files and pluck them out into this dictionary of dictionaries
for orig_path, filename in mediaindex:
# Remove the file from consideration for the manual media lookup stage
try:
# NB: we can only do this reliably because we normalized the case above
mediadircontents.remove(os.path.normcase(filename))
except:
# Tried to remove the file from the directory listing, but it's not actually
# in the list. This means that the database entry is actually out of date and
# we should ignore it
log.info("Out of date database entry for %s -> %s", orig_path, filename)
continue
# Return the remaining files
return mediadircontents
"""
# initial work on an importer for the SWAC audio files
# This might be useful: http://polyglotte.tuxfamily.org/autres/anki_swac_en.html
def SWACimport(dir):
swacurl="http://download.shtooka.net/cmn-balm-hsk1_ogg.tar"
# 1 - Download te zip file
# 2 - unzip to media dir
# 3 - open the index file
packdir="C:\Nick\Language\Mandarin Sound Files\Swac" # testdir
tagfile = "index.tags.txt"
fullpath = os.path.join(packdir, tagfile)
# 4 - scan through the index file and put the file names and piniyn into variables
# regex should match the filename and put it into group(1)
# and output pinyin to group(3)
lineregex = re.compile(r"[[](.+)[]].+SWAC_ALPHAIDX=(.+)", re.MULTILINE)
#open file for reading
file = codecs.open(fullpath, "r", encoding='utf-8')
matches = re.search(file)
matches.group(1)
matches.group(3)
file.close()
# 4 - remove tones from pinin variable
# 5 - rename & move the files to the main directory
os.rename(tagfull, pinyinname)
"""