public
Description: Checks the naming of recorded/downloaded TV Episodes
Clone URL: git://github.com/dbr/checktveps.git
Search Repo:
checktveps / 2checkTvEps.py
100755 381 lines (320 sloc) 11.399 kb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
#!/usr/bin/env python
#encoding:utf-8
import os,re,sys
 
def colour(text,colour="red"):
    nocolour=False
    if nocolour: # Colour no supported, return plain text
        return text
    #end if
 
    c = {'red':'[31m',
         'green':'[32m',
         'blue':'[34m',
        }
    CLR=chr(27)+'[0m'
    if not colour in c.keys():
        raise ValueError("Invalid colour")
    else:
        return chr(27)+c[colour] + text + CLR
    #end if
#end colour
 
def getError(invalid,errorno):
    """Gets all invalid files with supplied error number"""
    ret = []
    for cur in invalid:
        if cur['errorno'] == errorno:
            ret.append(cur)
    return ret
#end searchError
 
 
###################################
# Configs
###################################
 
# Error-code to error-description mapping
errors = {
    1:'malformed name',
    2:'missing epsiode name',
    3:'path is incorrect'
}
 
# Regex configs
regex_config={}
 
# Character class for valid episode/show names.
# Example: [a-zA-Z0-9\-'\ ]
regex_config['valid_in_names'] = "[\w\(\).,\[\]'\ \-?]"
 
# Location to process
loc = "." # Runs from the current path
 
###################################
# Name regexs
###################################
# Valid filenames, with episode name
# Should return 4 groups:
# Series name.
# Season number.
# Episode number.
# Episode name.
# Ignore filetype extension.
#
# If there are 3 groups, they are treated as:
# Series name, epiosde number, episode name. Season number is defaulted to "1"
#
# Show name - [01x01-02] - The Episode Name (Part 1)
# Show name - [01x23] - The Episode Name (Part 1)
# Show name - [01x23] - The Episode Name
# Show name - [01xExtra01] - DVD Extra Feature 1
# Show name - [01xSpecial01] - Special Episode 1
# Show name - [01] - First episode
 
r_with_ep_name = [
    re.compile("^(%(valid_in_names)s+) - \[(\d{2})x(\d{2})\] - (%(valid_in_names)s+)$" % (regex_config)),
    re.compile("^(%(valid_in_names)s+) - \[(\d{2})x(\d{2}-\d{2})\] - (%(valid_in_names)s+)$" % (regex_config)),
    re.compile("^(%(valid_in_names)s+) - \[(\d{2})x(Special\d{1,2})\] - (%(valid_in_names)s+)$" % (regex_config)),
    re.compile("^(%(valid_in_names)s+) - \[(\d{2})xExtra(\d{1,2})\] - (%(valid_in_names)s+)$" % (regex_config)),
    re.compile("^(%(valid_in_names)s+) - \[(\d{2})] - (%(valid_in_names)s+)$" % (regex_config)),
]
 
###################################
# Valid filenames, but missing episode name
#
# Show name - [04x01]
# Show name - [04x01-02]
# Show name - [04xSpecial01]
# Show name - [04xExtra01]
r_missing_ep_name = [
    re.compile("(%(valid_in_names)s+) - \[(\d{2})x(\d{2})\]" % (regex_config)),
    re.compile("(%(valid_in_names)s+) - \[(\d{2})x(\d{2}-\d{2})\]"% (regex_config)),
    re.compile("(%(valid_in_names)s+) - \[(\d{2})x(Special\d{1,2})\]" % (regex_config)),
    re.compile("(%(valid_in_names)s+) - \[(\d{2})x(Extra\d{1,2})\]" % (regex_config)),
    re.compile("(%(valid_in_names)s+) - \[(\d{2})x(Extra\d{1,2})\]" % (regex_config))
]
 
# Valid path names
r_valid_path = [
    re.compile("/./(.+?)/season (\d{1,2})$"),
    re.compile("/./(.+?)/season (\d{1,2})/extras$"),
    re.compile(".+?/Misc")
]
 
###################################
# Regex to match valid, but not-to-be-processed files (dot-files, folder.jpg artwork)
###################################
decrappify = [
    re.compile("(?=^[.]{1}.*)"),
    re.compile("folder.jpg"),
]
 
###################################
# Output-helper to convert array of
# numbers (episode numbers) to human-readable string
###################################
 
def seq_display(x):
    """
Takes an array of numbers, returns a more readable string representation of them
>>> seq_display( [1,2,3, 5,6,7, 10, 20,21,22] )
'1->3, 5->7, 10, 20->22'
"""
    is_int=[]
    non_int=[]
    for cur_x in x:
        if cur_x.find("-") != -1:
            for tmp_split in cur_x.split("-"):
                try:
                    tmp_conv = int(tmp_split)
                    is_int.append(tmp_conv)
                except ValueError:
                    non_int.append(cur_x)
                #end try
            #end for tmp_split
        try:
            tmp_conv = int(cur_x)
            is_int.append(tmp_conv)
        except ValueError:
            non_int.append(cur_x)
        #end try
    #end for cur_x
    
    if len(is_int) == 0: return x # return original input, no numbers!
    
    start = min(is_int)
    end = max(is_int)
    
    if end == start: return start
    if end - start > 999: return ", ".join([str(y) for y in x]) # too long, return list
 
    break_start = False
 
    out = ""
 
    for i in xrange(start, end + 1):
        try:
            is_int.index(i)
            if not break_start:
                break_start = i
        except ValueError:
            if break_start:
                if break_start == i - 1: # start and end are same, its one number
                    out += "%d, " % (break_start)
                else:
                    out += "%d->%d, " % (break_start, i - 1)
                break_start = False
        #end try
    if break_start == i:
        out += "%d" % (break_start)
    else:
        out += "%d->%d" % (break_start, i) # last value
 
    return out
#end seq_display
 
 
###################################
# Classes to abstract show data
###################################
class ShowContainer:
    def __init__(self):
        self.shows = {}
    #end __init__
    
    def __getitem__(self, show_name):
        if not self.shows.has_key(show_name):
            self.shows[show_name] = Show(show_name)
        return dict.__getitem__(self.shows, show_name)
    #end __getitem__
    
    def __str__(self):
        out=""
        for current_show_name, current_show in self.shows.items():
            out += str(current_show) + "\n"
        return out
        
class Show:
    def __init__(self, name):
        self.show_name = name
        self.seasons = {}
    #end __init__
    
    def __getitem__(self,season_number):
        if not self.seasons.has_key(season_number):
            self.seasons[season_number] = Season(season_number)
        
        return dict.__getitem__(self.seasons, season_number)
    #end __getattr__
    
    def __setitem__(self,season_number, season):
        if not self.seasons.has_key(season_number):
            self.seasons[season_number] = Season(season_number)
        
        self.seasons[season_number] = season
    #end __setitem__
    
    def __str__(self):
        out = self.show_name + "\n"
        for cur_season_no, cur_season in self.seasons.items():
            out += str(cur_season) + "\n"
        return out
    #end __str__
#end Show
 
class Season:
    def __init__(self, number):
        self.season_number = number
        self.episodes = {}
    #end __init__
    
    def __getitem__(self, episode_number):
        if not self.episodes.has_key(episode_number):
            self.episodes[episode_number] = Episode(number = episode_number)
        
        return dict.__getitem__(self.episodes, episode_number)
    #end __getitem__
    
    def __setitem__(self,episode_number, episode):
        if not self.episodes.has_key(episode_number):
            self.episodes[episode_number] = Episode()
        
        self.episodes[episode_number] = episode
    #end __setitem__
    
    def __str__(self):
        out = "\tSeason %s\n" % (self.season_number)
        out += "\t\t"
        all_ep_nums = [cur_ep_num for cur_ep_num in self.episodes.keys()]
        out += "Episodes " + str(seq_display(all_ep_nums))
        return out
    #end __str__
#end Season
 
class Episode:
    def __init__(self, number):
        self.episode_number = number
        self.episode={}
    #end __init__
    
    def __getitem__(self,attr):
        return dict.__getitem__(self.episode, attr)
    #end __getitem__
    
    def __setitem__(self,attr,name):
        dict.__setitem__(self.episode, attr, name)
    #end __setitem__
#end Episode
 
 
###################################
# Find all valid files
###################################
allfiles=[]
for (path,dirs,files) in os.walk(loc):
    for file in files:
        filename = os.path.join(path,file)
        allfiles.append( str(filename) )
#end for f
 
files = [x for x in allfiles if os.path.isfile(x)] # only get files, not folders
 
# Strip out dotfiles/folder.jpg
for current_file in allfiles:
    current_file_path,current_file_name = os.path.split(current_file)
    for cur_decrap in decrappify:
        if cur_decrap.match(current_file_name):
            files.remove(current_file)
#end for current_file
 
files = [os.path.join(loc,x) for x in files] # append path to file name
 
# Warn if no files are found, then exit
if files.__len__() == 0:
    print colour('No files found','red')
    sys.exit(1)
 
###################################
# Validate filenames
###################################
 
valid = []
invalid = []
 
for cur in files:
    cpath,cfile = os.path.split(cur)
    cfile,cext = os.path.splitext(cfile)
 
    for cur_checker in r_valid_path:
        # Check if path is valid
        check = cur_checker.findall(cpath)
        if check:
            break
    else:
        print "invalid path",cpath
        invalid.append({'errorno':3, 'path':cpath,'filename':cfile,
                        'cext':cext})
    #end for cur_checker
 
    for cur_checker in r_with_ep_name:
        # Check if filename is valid (with ep name)
        check = cur_checker.findall(cfile)
        if check:
            # Valid file name
            valid.append({'path':cpath,'filename':cfile,
                            'cext':cext, 'match':check[0]})
            break # Found valid episode, skip to the next one
        #end if
    else:
        for cur_checker in r_missing_ep_name:
            # Check for valid name with missing episode name
            check = cur_checker.findall(cfile)
            if check:
                invalid.append({'errorno':2, 'path':cpath,'filename':cfile,
                                'cext':cext})
                break
            #end if check
        else:
            # Doesn't match valid-name or missing-ep-name regexs, it's invalid
            invalid.append({'errorno':1, 'path':cpath,'filename':cfile,
                            'cext':cext})
        #end for cur_checker
    #end for cur_checker
#end for
 
###################################
# Show invalid names
###################################
if len(invalid) > 0:
    print colour('WARNING', 'red'), ': Invalid file-names found'
    
    for errorno,errordescr in errors.items():
        errors = getError(invalid,errorno)
        if len(errors) == 0: continue
        
        errormsg = "# %s (error code %d)" % (errordescr, errorno)
        print "#"*len(errormsg)
        print errormsg
        print "#"*len(errormsg)
    
        for c in errors:
            print c['filename']
 
###################################
# Show valid names
###################################
if valid.__len__() > 0:
    print colour('INFO','green'), ': Valid file-names found:'
    allepisodes = ShowContainer()
    
    for cur in valid:
        if len(cur['match']) == 4:
            showname,seasno,epno,title = cur['match']
        elif len(cur['match']) == 3:
            seasno = 1
            showname,epno,title = cur['match']
        
        allepisodes[showname][seasno][epno]['name'] = title
    #end for cur in valid
    print allepisodes