[Reference](https://oceanpad.medium.com/python-asynchronous-library-comparison-f4a9c8c225e5)

- threading
- multiprocessing
- asyncio


In [1]:
import requests
import time


def download_site(url, session):
  with session.get(url) as response:
    print("Got content from website: {}".format(url))


def download_all_sites(sites):
  with requests.Session() as session:
    for url in sites:
        download_site(url, session)

if __name__ == "__main__":
  sites = ["https://stackoverflow.com", "https://github.com"] * 10
  start_time = time.time()
  download_all_sites(sites)
  duration = time.time() - start_time
  print("Download time: {}".format(duration))

Got content from website: https://stackoverflow.com
Got content from website: https://github.com
Got content from website: https://stackoverflow.com
Got content from website: https://github.com
Got content from website: https://stackoverflow.com
Got content from website: https://github.com
Got content from website: https://stackoverflow.com
Got content from website: https://github.com
Got content from website: https://stackoverflow.com
Got content from website: https://github.com
Got content from website: https://stackoverflow.com
Got content from website: https://github.com
Got content from website: https://stackoverflow.com
Got content from website: https://github.com
Got content from website: https://stackoverflow.com
Got content from website: https://github.com
Got content from website: https://stackoverflow.com
Got content from website: https://github.com
Got content from website: https://stackoverflow.com
Got content from website: https://github.com
Download time: 2.1867833137512

In [2]:
import concurrent.futures
import threading
import requests
import time
thread_local = threading.local()

def get_session():
  if getattr(thread_local, "session", None) is None:
    thread_local.session = requests.Session()
  return thread_local.session

def download_site(url):
  session = get_session()
  with session.get(url) as response:
    print("Got content from website: {}".format(url))

def download_all_sites(sites):
  with concurrent.futures.ThreadPoolExecutor(max_workers = 5) as executor:
    executor.map(download_site, sites)
    
if __name__ == "__main__":
  sites = ["https://stackoverflow.com", "https://github.com"] * 10
  start_time = time.time()
  download_all_sites(sites)
  duration = time.time() - start_time
  print("Download time: {}".format(duration))

Got content from website: https://stackoverflow.com
Got content from website: https://stackoverflow.com
Got content from website: https://stackoverflow.com
Got content from website: https://stackoverflow.com
Got content from website: https://github.com
Got content from website: https://github.com
Got content from website: https://stackoverflow.com
Got content from website: https://github.com
Got content from website: https://github.com
Got content from website: https://stackoverflow.com
Got content from website: https://github.com
Got content from website: https://stackoverflow.com
Got content from website: https://github.com
Got content from website: https://stackoverflow.com
Got content from website: https://github.com
Got content from website: https://stackoverflow.com
Got content from website: https://github.com
Got content from website: https://github.com
Got content from website: https://github.com
Got content from website: https://stackoverflow.com
Download time: 0.8919253349304

In [5]:
!pip install aiohttp

Collecting aiohttp
  Downloading aiohttp-3.7.4.post0-cp37-cp37m-manylinux2014_x86_64.whl (1.3 MB)
[K     |████████████████████████████████| 1.3 MB 7.1 MB/s 
[?25hCollecting yarl<2.0,>=1.0
  Downloading yarl-1.7.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (271 kB)
[K     |████████████████████████████████| 271 kB 69.2 MB/s 
[?25hCollecting async-timeout<4.0,>=3.0
  Downloading async_timeout-3.0.1-py3-none-any.whl (8.2 kB)
Collecting multidict<7.0,>=4.5
  Downloading multidict-5.2.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (160 kB)
[K     |████████████████████████████████| 160 kB 56.7 MB/s 
Installing collected packages: multidict, yarl, async-timeout, aiohttp
Successfully installed aiohttp-3.7.4.post0 async-timeout-3.0.1 multidict-5.2.0 yarl-1.7.0


In [7]:
import asyncio
import aiohttp
import time

async def download_site(session, url):
  async with session.get(url) as response:
    print("Got content from website: {}".format(url))

async def download_all_sites(sites):
  async with aiohttp.ClientSession() as session:
    tasks = []
    for url in sites:
      task = asyncio.ensure_future(download_site(session, url))
      tasks.append(task)
    await asyncio.gather(*tasks, return_exceptions=True)

if __name__ == "__main__":
  sites = ["https://stackoverflow.com", "https://github.com"] * 10
  start_time = time.time()
  asyncio.get_event_loop().run_until_complete(download_all_sites(sites))
  duration = time.time() - start_time
  print("Download time: {}".format(duration))

In [8]:
import requests
import multiprocessing
import time
session = None

def set_global_session():
  global session
  if not session:
      session = requests.Session()

def download_site(url):
  with session.get(url) as response:
    print("Got content from website: {}".format(url))

def download_all_sites(sites):
  with multiprocessing.Pool(initializer = set_global_session) as pool:
        pool.map(download_site, sites)
        
if __name__ == "__main__":
  sites = ["https://stackoverflow.com", "https://github.com"] * 10
  start_time = time.time()
  download_all_sites(sites)
  duration = time.time() - start_time
  print("Download time: {}".format(duration))

Got content from website: https://stackoverflow.com
Got content from website: https://github.com
Got content from website: https://stackoverflow.com
Got content from website: https://github.com
Got content from website: https://github.com
Got content from website: https://stackoverflow.com
Got content from website: https://stackoverflow.com
Got content from website: https://github.com
Got content from website: https://github.com
Got content from website: https://stackoverflow.com
Got content from website: https://stackoverflow.com
Got content from website: https://github.com
Got content from website: https://stackoverflow.com
Got content from website: https://github.com
Got content from website: https://github.com
Got content from website: https://stackoverflow.com
Got content from website: https://stackoverflow.com
Got content from website: https://github.com
Got content from website: https://stackoverflow.com
Got content from website: https://github.com
Download time: 1.4255616664886

- So for CPU-intensive tasks, we should use multiprocessing, cause multiprocessing will use multiple CPUs and can reduce calculation time.

- For I/O intensive tasks, we can choose threading or asyncio. This will help you run your tasks in a higher perfermance.