# Video Downloader

> functionality for acquiring data for training components of the models without agency. currently only supports youtube with [yt-dlp](https://github.com/yt-dlp/yt-dlp)

In [None]:
#| default_exp utility.video_downloader
#| export
import yt_dlp
import dataclasses
from typing import List, Dict, Any
from pathlib import Path
from algorithmic_gamer.utility.data_utils import *
import json

In [None]:
#| export
@dataclasses.dataclass
class Chapter:
    start_time: int
    end_time: int
    title: str
    description: str

@dataclasses.dataclass
class VideoMetadata:
    id: str
    title: str
    alt_title: str
    description: str
    uploader: str
    timestamp: int
    upload_date: str
    release_date: str
    view_count: int
    concurrent_view_count: int
    like_count: int
    dislike_count: int
    comment_count: int
    duration: int
    chapters: List[Chapter]
    subtitles: Dict[str, str]
    fps:int
    resolution:str
    
    
def download_video_and_extract_info(url, output_filename, download=True, verborse=False):
    # Use youtube_dl to download the video and extract the metadata
    ydl_opts = {
        'outtmpl': output_filename,
        'http-chunk-size': 10485760,
        'external_downloader': 'aria2c',
        'external_downloader_args': [
            '--max-connection-per-server=8',
            '--split=8',
            '--continue',
            '--auto-file-renaming=false',
        ],
    }
    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
        info = ydl.extract_info(url, download=download)

    return info


def parse_video_metadata(info: dict):
    chapters = []
    try:
        for chapter in info.get('chapters', []):
            chapters.append(Chapter(
                start_time=chapter['start_time'],
                end_time=chapter['end_time'],
                title=chapter['title'],
                description=chapter['description'],
            ))
    except:
        pass
    v = VideoMetadata(
        id=info.get('id', 'N/A'),
        title=info.get('title', 'N/A'),
        alt_title=info.get('alt_title', 'N/A'),
        description=info.get('description', 'N/A'),
        uploader=info.get('uploader', 'N/A'),
        timestamp=info.get('timestamp', -1),
        upload_date=info.get('upload_date', 'N/A'),
        release_date=info.get('release_date', 'N/A'),
        view_count=info.get('view_count', -1),
        concurrent_view_count=info.get('concurrent_view_count', -1),
        like_count=info.get('like_count', -1),
        dislike_count=info.get('dislike_count', -1),
        comment_count=info.get('comment_count', -1),
        duration=info.get('duration', -1),
        chapters=[],
        subtitles=info.get('subtitles', {}),
        fps=info.get('fps', -1),
        resolution=info.get('resolution', 'N/A'),
    )
    return v

In [None]:
#| hide
import nbdev; nbdev.nbdev_export()

### single video test

In [None]:
#|eval: false
video = 'https://www.youtube.com/watch?v=EbLh7rZ3rhU'

In [None]:
#|eval: false
ydl_opts = {
    'http-chunk-size': 10485760,
    'external_downloader': 'aria2c',
    'external_downloader_args': [
        '--max-connection-per-server=8',
        '--split=8',
        '--continue',
        '--auto-file-renaming=false',
    ],
}
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
    info = ydl.extract_info(video, download=False)

[youtube] EbLh7rZ3rhU: Downloading webpage
[youtube] EbLh7rZ3rhU: Downloading android player API JSON


### download list of videos

In [None]:
#|eval: false
data_path = 'S:/algorithmic-gamer-resources'
with open(f'{data_path}/videos.txt') as f:
    vids = f.read().splitlines()

In [None]:
#|eval: false
failed = []
for v in vids:
    try:
        #create folder if it doesnt exist
        Path(f'{data_path}/videos/{v[-11:]}/').mkdir(parents=True, exist_ok=True)
        
        #download and collect info
        info = download_video_and_extract_info(v, f'{data_path}/videos/{v[-11:]}/video.mp4')
        
        #parse info
        vm = parse_video_metadata(info)
        
        #save info
        with open(f'{data_path}/videos/{v[-11:]}/video_info.json', 'w', encoding='utf-8') as f:
            json.dump(f_to_dict(info), f, ensure_ascii=False, indent=4)
            
        #save VideoMetadata
        with open(f'{data_path}/videos/{v[-11:]}/video_meta_data.json', 'w', encoding='utf-8') as f:
            json.dump(f_to_dict(vm), f, ensure_ascii=False, indent=4)
    except Exception as e:
        print(f'{v} failed with error: {e}')
        failed.append(v)

[youtube] PQmSUHhP3ug: Downloading webpage
[youtube] PQmSUHhP3ug: Downloading android player API JSON
[info] PQmSUHhP3ug: Downloading 1 format(s): 22
[download] Destination: S:\algorithmic-gamer-resources\videos\PQmSUHhP3ug\video.mp4
[download] 100% of   47.81MiB in 00:00:02 at 17.98MiB/s    
[youtube] i8G0C4W0mBs: Downloading webpage
[youtube] i8G0C4W0mBs: Downloading android player API JSON
[info] i8G0C4W0mBs: Downloading 1 format(s): 22
[download] Destination: S:\algorithmic-gamer-resources\videos\i8G0C4W0mBs\video.mp4
[download] 100% of   28.77MiB in 00:00:01 at 19.43MiB/s  
[youtube] VJsXa0QGXK0: Downloading webpage
[youtube] VJsXa0QGXK0: Downloading android player API JSON
[info] VJsXa0QGXK0: Downloading 1 format(s): 22
[download] Destination: S:\algorithmic-gamer-resources\videos\VJsXa0QGXK0\video.mp4
[download] 100% of   30.85MiB in 00:00:02 at 12.79MiB/s    
[youtube] HBOA1Jqf63Y: Downloading webpage
[youtube] HBOA1Jqf63Y: Downloading android player API JSON
[info] HBOA1Jqf63Y

[download] 100% of   43.42MiB in 00:00:01 at 25.73MiB/s  
[youtube] DpSdlaQnYuY: Downloading webpage
[youtube] DpSdlaQnYuY: Downloading android player API JSON
[info] DpSdlaQnYuY: Downloading 1 format(s): 22
[download] S:\algorithmic-gamer-resources\videos\DpSdlaQnYuY\video.mp4 has already been downloaded
[download] 100% of  111.73MiB
[youtube] LakQnOSgtys: Downloading webpage
[youtube] LakQnOSgtys: Downloading android player API JSON
[info] LakQnOSgtys: Downloading 1 format(s): 22
[download] Destination: S:\algorithmic-gamer-resources\videos\LakQnOSgtys\video.mp4
[download] 100% of  257.50MiB in 00:00:05 at 47.94MiB/s  
[youtube] RUgW2UdH-cI: Downloading webpage
[youtube] RUgW2UdH-cI: Downloading android player API JSON
[info] RUgW2UdH-cI: Downloading 1 format(s): 22
[download] Destination: S:\algorithmic-gamer-resources\videos\RUgW2UdH-cI\video.mp4
[download] 100% of  512.96MiB in 00:00:13 at 38.88MiB/s  
[youtube] Y7huvqSpqxg: Downloading webpage
[youtube] Y7huvqSpqxg: Downloading a

[youtube] mNP9jO7EHMo: Downloading android player API JSON
[youtube] mNP9jO7EHMo: Downloading MPD manifest
[youtube] mNP9jO7EHMo: Downloading MPD manifest
[info] mNP9jO7EHMo: Downloading 1 format(s): 22
[download] Destination: S:\algorithmic-gamer-resources\videos\mNP9jO7EHMo\video.mp4
[download] 100% of  602.80MiB in 00:00:15 at 39.96MiB/s  
[youtube] QlVmUnt9SVQ: Downloading webpage
[youtube] QlVmUnt9SVQ: Downloading android player API JSON
[youtube] QlVmUnt9SVQ: Downloading MPD manifest
[youtube] QlVmUnt9SVQ: Downloading MPD manifest
[info] QlVmUnt9SVQ: Downloading 1 format(s): 22
[download] Destination: S:\algorithmic-gamer-resources\videos\QlVmUnt9SVQ\video.mp4
[download] 100% of  457.38MiB in 00:00:12 at 36.94MiB/s    
[youtube] QlVmUnt9SVQ: Downloading webpage
[youtube] QlVmUnt9SVQ: Downloading android player API JSON
[youtube] QlVmUnt9SVQ: Downloading MPD manifest
[youtube] QlVmUnt9SVQ: Downloading MPD manifest
[info] QlVmUnt9SVQ: Downloading 1 format(s): 22
[download] S:\algor

[download] 100% of   31.93MiB in 00:00:01 at 22.53MiB/s  
[youtube] B-4--16sCIg: Downloading webpage
[youtube] B-4--16sCIg: Downloading android player API JSON
[youtube] B-4--16sCIg: Downloading MPD manifest
[youtube] B-4--16sCIg: Downloading MPD manifest
[info] B-4--16sCIg: Downloading 1 format(s): 22
[download] Destination: S:\algorithmic-gamer-resources\videos\B-4--16sCIg\video.mp4
[download] 100% of  539.45MiB in 00:00:11 at 47.05MiB/s  
[youtube] -D7wFoBVZJ8: Downloading webpage
[youtube] -D7wFoBVZJ8: Downloading android player API JSON
[youtube] -D7wFoBVZJ8: Downloading MPD manifest
[youtube] -D7wFoBVZJ8: Downloading MPD manifest
[info] -D7wFoBVZJ8: Downloading 1 format(s): 22
[download] Destination: S:\algorithmic-gamer-resources\videos\-D7wFoBVZJ8\video.mp4
[download] 100% of   28.74MiB in 00:00:01 at 19.20MiB/s  
[youtube] e30LstNycVQ: Downloading webpage
[youtube] e30LstNycVQ: Downloading android player API JSON
[youtube] e30LstNycVQ: Downloading MPD manifest
[youtube] e30Lst

[download] 100% of  449.92MiB in 00:00:52 at 8.62MiB/s   
[youtube] g18DE4-dxrc: Downloading webpage
[youtube] g18DE4-dxrc: Downloading android player API JSON
[youtube] g18DE4-dxrc: Downloading MPD manifest
[youtube] g18DE4-dxrc: Downloading MPD manifest
[info] g18DE4-dxrc: Downloading 1 format(s): 22
[download] Destination: S:\algorithmic-gamer-resources\videos\g18DE4-dxrc\video.mp4
[download] 100% of  198.99MiB in 00:00:20 at 9.74MiB/s      
