[Reference](https://medium.com/@write2bishwarup/asyncio-the-underrated-weapon-for-ml-11a37f315355)

# 1. Sequential


In [1]:
import io
import time

import requests
from numpy.typing import NDArray
from PIL import Image

# this gives a ranodm 300x300 image everytime you request the url
unsplash_search_url = "https://source.unsplash.com/random/300x300"


def download_random_image(num: int = 1) -> NDArray:
    """downloads an image using requests"""
    response = requests.get(unsplash_search_url)
    if response.status_code == 200:
        img = Image.open(io.BytesIO(response.content))

    print(f"image {num} shape: {img.size}")
    
def download_random_images(n: int = 10):
    print("Downloading the images sequentially...")
    for i in range(n):
        download_random_image(i)
        
if __name__ == "__main__":
    tick = time.perf_counter()
    download_random_images()
    tock = time.perf_counter()
    print(f"elapsed: {tock-tick:.2f} seconds")

Downloading the images sequentially...
image 0 shape: (300, 300)
image 1 shape: (300, 300)
image 2 shape: (300, 300)
image 3 shape: (300, 300)
image 4 shape: (300, 300)
image 5 shape: (300, 300)
image 6 shape: (300, 300)
image 7 shape: (300, 300)
image 8 shape: (300, 300)
image 9 shape: (300, 300)
elapsed: 1.73 seconds


# 2. Multi-processing


In [2]:
from multiprocessing import Pool

def download_multiprocessing(n: int = 10, n_processes: int = 4) -> None:
    """downloads a bunch of images using multiprocessing"""
    print("downloading images using multi-processing...")
    p = Pool(n_processes)
    p.map(download_random_image, range(n))
    
if __name__ == "__main__":
    tick = time.perf_counter()
    download_multiprocessing()
    tock = time.perf_counter()
    print(f"elapsed: {tock-tick:.2f} seconds")

downloading images using multi-processing...
image 0 shape: (300, 300)
image 3 shape: (300, 300)
image 2 shape: (300, 300)
image 1 shape: (300, 300)
image 5 shape: (300, 300)image 7 shape: (300, 300)image 6 shape: (300, 300)image 4 shape: (300, 300)



image 9 shape: (300, 300)
image 8 shape: (300, 300)
elapsed: 1.64 seconds


# 3. Multi-threading


In [3]:
import concurrent.futures

def download_multithreading(n: int = 10, n_threads: int = 4) -> None:
    """downloads a bunch of images using threading"""
    print("downloading images using multi-threading...")
    with concurrent.futures.ThreadPoolExecutor(max_workers=n_threads) as executor:
        for i in range(n):
            executor.submit(download_random_image, i)

if __name__ == "__main__":
    tick = time.perf_counter()
    download_multithreading(n_threads=4)
    tock = time.perf_counter()
    print(f"elapsed: {tock-tick:.2f} seconds")

downloading images using multi-threading...
image 3 shape: (300, 300)image 2 shape: (300, 300)image 1 shape: (300, 300)


image 0 shape: (300, 300)
image 4 shape: (300, 300)
image 5 shape: (300, 300)
image 6 shape: (300, 300)image 7 shape: (300, 300)

image 8 shape: (300, 300)
image 9 shape: (300, 300)
elapsed: 0.75 seconds


# 4. asyncio


In [4]:
# import asyncio

# import aiohttp

# async def download_image_async(session: aiohttp.ClientSession, num: int = 1):
#     async with session.get(unsplash_search_url) as response:
#         if response.status == 200:
#             image_buffer = await response.read()
#             img = Image.open(io.BytesIO(image_buffer))
#             print(f"image {num} shape: {img.size}")


# async def download_images_async(n: int = 10):
#     print("Downloading images using async...")
#     async with aiohttp.ClientSession() as session:
#         tasks = [download_image_async(session, i + 1) for i in range(n)]
#         _ = await asyncio.gather(*tasks)
#     return


# if __name__ == "__main__":
#     tick = time.perf_counter()
#     asyncio.run(download_images_async())
#     tock = time.perf_counter()
#     print(f"elapsed: {tock-tick:.2f} seconds")