In [2]:
import asyncio
import concurrent.futures
import functools
import requests
import os

import nest_asyncio
nest_asyncio.apply()

# WARNING:
# Here I'm pointing to a publicly available sample video.
# If you are planning on running this code, make sure the
# video is still available as it might change location or get deleted.
# If necessary, replace it with a URL you know is working.
URL = 'https://rr3---sn-2va3vhuxa-f5fz.googlevideo.com/videoplayback?expire=1706544461&ei=7Xi3ZbKhCp_Ki9oPva-YiA0&ip=31.0.1.112&id=o-AOuZ726bvjOwMCUB6WqJugpl_N_uAM8ZfhUAoqbFUfT8&itag=251&source=youtube&requiressl=yes&xpc=EgVo2aDSNQ%3D%3D&mh=ha&mm=31%2C29&mn=sn-2va3vhuxa-f5fz%2Csn-f5f7lnld&ms=au%2Crdu&mv=m&mvi=3&pl=19&initcwndbps=540000&vprv=1&svpuc=1&mime=audio%2Fwebm&gir=yes&clen=4790544&dur=287.241&lmt=1701471774487386&mt=1706522691&fvip=2&keepalive=yes&fexp=24007246&c=ANDROID&txp=5532434&sparams=expire%2Cei%2Cip%2Cid%2Citag%2Csource%2Crequiressl%2Cxpc%2Cvprv%2Csvpuc%2Cmime%2Cgir%2Cclen%2Cdur%2Clmt&sig=AJfQdSswRAIgVTx-YdA7GVUh5I20ObF_SzsP7gHknDAgyNqL1LFuZVoCICIR2Got5wZdYkr9b6Tf-JWJ0JzPlHiDgA4ij45OKjWN&lsparams=mh%2Cmm%2Cmn%2Cms%2Cmv%2Cmvi%2Cpl%2Cinitcwndbps&lsig=AAO5W4owRQIhAKYLT3lTXzI0xoPVS8_O-O_EB8SwEcN-BMfIJZIyoiaAAiAL4HCR00kCGpm7S-0gR7o0EM6VdVHykzp_K7P8OOmP-Q%3D%3D'
OUTPUT = 'video.webm'



In [None]:

async def get_size(url):
    response = requests.head(url)
    size = int(response.headers['Content-Length'])
    return size


def download_range(url, start, end, output):
    print("downloading", url, start, end)
    headers = {'Range': f'bytes={start}-{end}'}
    response = requests.get(url, headers=headers)

    with open(output, 'wb') as f:
        for part in response.iter_content(1024):
            f.write(part)


async def download(run, loop, url, output, chunk_size=1000000):
    file_size = await get_size(url)
    chunks = range(0, file_size, chunk_size)

    tasks = [
        run(
            download_range,
            url,
            start,
            start + chunk_size - 1,
            f'{output}.part{i}',
        )
        for i, start in enumerate(chunks)
    ]

    await asyncio.wait(tasks)

    with open(output, 'wb') as o:
        for i in range(len(chunks)):
            chunk_path = f'{output}.part{i}'

            with open(chunk_path, 'rb') as s:
                o.write(s.read())

            os.remove(chunk_path)
 

In [None]:
executor = concurrent.futures.ThreadPoolExecutor(max_workers=3)
loop = asyncio.new_event_loop()
run = functools.partial(loop.run_in_executor, executor)

asyncio.set_event_loop(loop)

try:
    loop.run_until_complete(
        download(run, loop, URL, OUTPUT)
    )
finally:
    loop.close()

In [9]:
import asyncio
import concurrent.futures
import io

from tqdm.notebook import tqdm

def get_size(url):
    response = requests.head(url)
    size = int(response.headers['Content-Length'])
    return size

def download_range(url, start, end, id):
    print("downloading", id, start, end)
    headers = {'Range': f'bytes={start}-{end}'}
    progress_bar = tqdm(total=end-start, unit='B', unit_scale=True)
    response = requests.get(url, headers=headers)

    output = io.BytesIO()
    for part in response.iter_content(1024):
        output.write(part)
        progress_bar.update(len(part))
    progress_bar.close()
    return output

def run_async_functions_asyncio(url):
    chunk_size=1000000
    file_size = get_size(url)
    chunks = range(0, file_size, chunk_size)

    # tasks = [async_function(i) for i in range(len(functions))]
    with concurrent.futures.ThreadPoolExecutor(max_workers=12) as executor:
        futures = {executor.submit(download_range, url, start, start + chunk_size - 1, i): (download_range, (url, start, start + chunk_size - 1, i)) for i, start in enumerate(chunks)}
        concurrent.futures.wait(futures.keys())

        # Wait for all tasks to complete using as_completed
        results = []
        intermediate = []
        for future in concurrent.futures.as_completed(futures):
            func, args = futures[future]
            result = future.result()
            intermediate.append((args[3], result))

    results = [a[1] for a in list(sorted(intermediate, key=lambda x:x[0]))]

    return results

def main():

    results = run_async_functions_asyncio(URL)
    buffer = b''

    for i, result in enumerate(results):
        buffer+=result.getvalue()
    return buffer

if __name__ == "__main__":
    value = main()
    with open("file.webm", "wb") as f:  
        f.write(value)


downloading 0 0 999999
downloading 1 1000000 1999999
downloading 2 2000000 2999999
downloading 3 3000000 3999999
downloading 4 4000000 4999999


  0%|          | 0.00/1.00M [00:00<?, ?B/s]

  0%|          | 0.00/1.00M [00:00<?, ?B/s]

  0%|          | 0.00/1.00M [00:00<?, ?B/s]

  0%|          | 0.00/1.00M [00:00<?, ?B/s]

  0%|          | 0.00/1.00M [00:00<?, ?B/s]

In [None]:
def run_parallel_functions(functions, *args_list):
    with concurrent.futures.ThreadPoolExecutor() as executor:
        futures = {executor.submit(func, *args): (func, args) for func, args in zip(functions, zip(*args_list))}

        # Wait for all tasks to complete using as_completed

        concurrent.futures.wait(futures.keys())

        results = []
        for future in concurrent.futures.as_completed(futures):
            func, args = futures[future]
            result = future.result()
            results.append(result)

    return results

if __name__ == "__main__":

    results = run_parallel_functions(functions_to_run)
    print(results)