In [2]:
import re
import os
import sys
import requests
import utils

from collections import defaultdict
from contextlib import closing
from concurrent.futures import ThreadPoolExecutor, as_completed


class BiliDownloader:
    DIRNAME = 'D:/PyTorch/downloads' # absolute path
    COOKIE = 'SESSDATA=3f5681db%2C1602650790%2C8c420*41' # VIP COOKIE is better. Update it once a month 
    USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.88 Safari/537.36'
    
    def __init__(self, url, max_workers=4):
        self.url = url
        self.max_workers = max_workers
        
        self.qn2desc = {
            16: '360P',
            32: '480P',
            64: '720P',
            80: '1080P',
            112: '1080P+',
        }
        self._keys = ['title', 'series', 'evaluate', 'cover']
        self.get_basic_info()
        
    def get_basic_info(self):
        headers = {'User-Agent': self.USER_AGENT}
        res = requests.get(url, headers=headers)
        self.html = utils.extract_json(res.text.split('__INITIAL_STATE__=')[1])
        
        mediaInfo = self.html['mediaInfo']
        self.basic_info = {k: mediaInfo[k] for k in self._keys}
        
    def get_bangumi_info(self, page):
        eplist = self.html['epList']
        if isinstance(page, int):
            if page > 0:
                page = [page]
            else:
                page = [i + 1 for i in range(len(eplist))]
        assert max(page) <= len(eplist), 'no such page, check the maximum page'
        assert min(page) > 0, 'no such page, check the minimum page'

        info = defaultdict(list)
        info['page'] = page
            
        for i in page:
            info['aid'].append(eplist[i - 1]['aid'])
            info['cid'].append(eplist[i - 1]['cid'])
            info['title'].append(eplist[i - 1]['titleFormat'] + ' ' + eplist[i - 1]['longTitle'])
        
        return info
    
    def add_play_url(self, info, quality):
        assert quality > 0, 'quality must > 0'
        
        length = len(info['page'])
        info['play_url'] = [[] for _ in range(length)]

        headers = {
            'User-Agent': self.USER_AGENT,
            'Cookie': self.COOKIE
        }
        for i in range(length):
            cid = info['cid'][i]
            aid = info['aid'][i]
            url = 'https://api.bilibili.com/x/player/playurl?cid={}&avid={}'.format(cid, aid)
            html = requests.get(url, headers=headers).json()
            data = html['data']
            if data is None:
                print('\nInvalid Cookie! You need use or update the VIP COOKIE!')
                sys.exit(1)
            
            actual_gear = min(quality, len(data['accept_quality']))
            actual_quality = data['accept_quality'][-actual_gear]
            info['actual_quality'].append(actual_quality)
            
            url = 'https://api.bilibili.com/x/player/playurl?cid={}&avid={}&qn={}'.format(cid, aid, actual_quality)
            html = requests.get(url, headers=headers).json()
            data = html['data']

            for du in data['durl']:
                info['play_url'][i].append(du['url'])
                
    def download_single(self, file_path, play_url):
        if not os.path.exists(file_path):
            headers = {
                'User-Agent': self.USER_AGENT,
                'Referer': self.url
            }
            count = 0
            with closing(requests.get(play_url, headers=headers, stream=True)) as res:
                length = int(res.headers['Content-Length'])
                #print(f'\n    size: {utils.unit_convert(length)}')

                with open(file_path, 'wb') as f:
                    for data in res.iter_content(chunk_size=1024):
                        count += len(data)
                        f.write(data)
                        print('\r    {:5.1f}%'.format((count / length) * 100), end='')

        print('\n    Done: {}\n'.format(file_path))
        return file_path
    
    def postprocess(self, video_dir, file_path):
        file_name = os.path.split(file_path)[1]
        depart = re.search(r'P(\d+)-(\d+)-(\d+)-(.+).flv', file_name)
        if depart is not None:
            page = depart.group(1)
            part = depart.group(2)
            length = depart.group(3)
            titlename = depart.group(4)

            if page not in self.container.keys():
                target = os.path.join(video_dir, '{}.flv'.format(titlename))
                self.container[page] = [target, file_path]
            else:
                self.container[page].append(file_path)
                
                if len(self.container[page]) == int(length) + 1:
                    utils.merge_flv(self.container[page][1:], self.container[page][0]) # merge_flv (list of merge flvs, target file)
                    for f in self.container[page][1:]:
                        pass
                        #os.remove(f)
                    print('\n    Merged successfully:', self.container[page][0])
                    
    def __call__(self, page=0, quality=3, mode='common', info=None):
        assert mode in ['common', 'info', 'download']
        if mode == 'download':
            assert info is not None, 'info should not be None'
        else:
            info = self.get_bangumi_info(page)
            self.add_play_url(info, quality)
            if mode == 'info':
                return dict(info)
        
        title = self.html['mediaInfo']['title']
        sub_title = re.sub('[\/:*?"<>|]', ' ', title)
        qn_desc = self.qn2desc[info['actual_quality'][0]]
        midname = 'bangumi/{}--{}'.format(sub_title, qn_desc)
        
        print('Downloading bangumi:')
        print('{}({})----{}'.format(title, qn_desc, info['page']))
        
        video_dir = os.path.join(self.DIRNAME, midname)
        if not os.path.exists(video_dir):
            os.makedirs(video_dir)
            
        executor = ThreadPoolExecutor(max_workers=self.max_workers)
        tasks = []
        for i, p in enumerate(info['page']):
            sub_title = re.sub('[\/:*?"<>|]', ' ', info['title'][i])
            len_play_url = len(info['play_url'][i])
            
            for j in range(len_play_url):
                play_url = info['play_url'][i][j]

                if len_play_url == 1:
                    file_path = os.path.join(video_dir, '{}.flv'.format(sub_title))
                else:
                    # e.g. https://www.bilibili.com/bangumi/play/ss2539/
                    file_path = os.path.join(video_dir, 
                                             'P{}-{}-{}-{}.flv'.format(p, j + 1, len_play_url, sub_title))
                tasks.append(executor.submit(self.download_single, file_path, play_url))

        self.container = {}
        for future in as_completed(tasks):
            file_path = future.result()
            self.postprocess(video_dir, file_path)
        print('\nAll done!')

In [3]:
url = 'https://www.bilibili.com/bangumi/play/ss6333?from=search&seid=597606927081612871'
page = 1
quality = 5

bili_downloader = BiliDownloader(url)
info = bili_downloader(page, quality, 'info')


Invalid Cookie! You need use or update the VIP COOKIE!


SystemExit: 1

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


In [7]:
print(info)

{'page': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], 'aid': [11501084, 11501084, 11501084, 11501084, 11501084, 11501084, 11501084, 11501084, 11501084, 11501084, 11501084, 11501084], 'cid': [19014118, 19014112, 19014119, 19014113, 19014120, 19014114, 19014121, 19014116, 19014117, 19014115, 19014110, 19014111], 'title': ['第1话 01', '第2话 02', '第3话 03', '第4话 04', '第5话 05', '第6话 06', '第7话 07', '第8话 08', '第9话 09', '第10话 10', '第11话 11', '第12话 12'], 'play_url': [['http://cn-hk-bn-v-02.bilivideo.com/upgcxcode/18/41/19014118/19014118-1-112.flv?expires=1587326400&platform=pc&ssig=Mn5wUu1flqcgjCa83psRZw&oi=1081635498&trid=6a3a98a29ee7485480f17e05d47b8f02u&nfc=1&nfb=maPYqpoel5MI3qOUX6YpRA==&mid=40796034&logo=80000000', 'http://cn-hk-bn-v-02.bilivideo.com/upgcxcode/18/41/19014118/19014118-2-112.flv?expires=1587326400&platform=pc&ssig=b5m6jpL-yD_U-H25-qIY8w&oi=1081635498&trid=6a3a98a29ee7485480f17e05d47b8f02u&nfc=1&nfb=maPYqpoel5MI3qOUX6YpRA==&mid=40796034&logo=80000000', 'http://cn-hk-bn-v-02.bilivideo.

In [None]:
bili_downloader(mode='download', info=info)

Downloading bangumi:
棺姬嘉依卡（僅限港澳台地區）(1080P+)----[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]
    100.0%
    Done: D:/PyTorch/downloads\bangumi/棺姬嘉依卡（僅限港澳台地區）--1080P+\P1-3-14-第1话 01.flv

P1-3-14-第1话 01.flv
    100.0%
    Done: D:/PyTorch/downloads\bangumi/棺姬嘉依卡（僅限港澳台地區）--1080P+\P1-2-14-第1话 01.flv

P1-2-14-第1话 01.flv
    100.0%
    Done: D:/PyTorch/downloads\bangumi/棺姬嘉依卡（僅限港澳台地區）--1080P+\P1-4-14-第1话 01.flv

P1-4-14-第1话 01.flv
    100.0%
    Done: D:/PyTorch/downloads\bangumi/棺姬嘉依卡（僅限港澳台地區）--1080P+\P1-7-14-第1话 01.flv

P1-7-14-第1话 01.flv
    100.0%
    Done: D:/PyTorch/downloads\bangumi/棺姬嘉依卡（僅限港澳台地區）--1080P+\P1-5-14-第1话 01.flv

P1-5-14-第1话 01.flv
    100.0%
    Done: D:/PyTorch/downloads\bangumi/棺姬嘉依卡（僅限港澳台地區）--1080P+\P1-6-14-第1话 01.flv

P1-6-14-第1话 01.flv
    100.0%
    Done: D:/PyTorch/downloads\bangumi/棺姬嘉依卡（僅限港澳台地區）--1080P+\P1-9-14-第1话 01.flv

P1-9-14-第1话 01.flv
    100.0%
    Done: D:/PyTorch/downloads\bangumi/棺姬嘉依卡（僅限港澳台地區）--1080P+\P1-1-14-第1话 01.flv

P1-1-14-第1话 01.flv
    100.0%
    Do