This repository has been archived by the owner on Mar 1, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 14
/
check_m3u8_bitrate.py
69 lines (57 loc) · 2.19 KB
/
check_m3u8_bitrate.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
# TODO(colin): fix these lint errors (http://pep8.readthedocs.io/en/release-1.7.x/intro.html#error-codes)
# pep8-disable:E302,E501,E701
from api import list_videos
import urllib2
import re
from urlparse import urljoin
import csv
import sys
import os
re_url = re.compile(r"\.m3u8$")
re_total_dur = re.compile(r"#ZEN[_-]TOTAL[_-]DURATION:(\d+(?:\.\d+)?)$", re.MULTILINE)
re_segment_name = re.compile(r"^.*\.ts$", re.MULTILINE)
class HeadRequest(urllib2.Request):
def get_method(self):
return "HEAD"
header_row = ["youtube_id", "duration", "total_bytes", "kbps"]
existing_youtube_ids = set()
existing_rows = []
if os.path.exists(sys.argv[1]):
f = open(sys.argv[1], "r")
for row_num, row in enumerate(csv.reader(f)):
if row_num == 0:
assert row == header_row, row
else:
existing_youtube_ids.add(row[0])
existing_rows.append(row)
f.close()
writer = csv.writer(open(sys.argv[1], "w"))
writer.writerow(header_row)
writer.writerows(existing_rows)
for v in list_videos():
if v["youtube_id"] in existing_youtube_ids: continue
if "download_urls" not in v: continue
if v["download_urls"] is None: continue
url = v["download_urls"].get("m3u8", None)
if url is None: continue
# Munge the URL to get the low-kbps stream
url = re_url.sub("-low.m3u8", url)
try:
doc = urllib2.urlopen(url).read()
duration_match = re_total_dur.search(doc)
if duration_match is None:
print >>sys.stderr, "No duration match for {0}".format(url)
continue
duration = float(duration_match.group(1))
total_bytes = 0
for segment_name in re_segment_name.finditer(doc):
segment_url = urljoin(url, segment_name.group(0))
total_bytes += int(urllib2.urlopen(HeadRequest(segment_url)).info()["Content-Length"])
if total_bytes == 0:
print >>sys.stderr, "No segments for {0}".format(url)
continue
except urllib2.URLError:
print >>sys.stderr, "URLError for {0}".format(url)
continue
writer.writerow([v["youtube_id"], duration, total_bytes, (total_bytes / 125.0) / duration])
existing_youtube_ids.add(v["youtube_id"])