In [1]:
import requests
import time

import concurrent.futures
import threading

import asyncio
import aiohttp
import nest_asyncio
import multiprocessing_methods as mm
import multiprocessing

#### Wersja Synchroniczna

In [2]:
def download_site(url, session):
	session.get(url)


def download_all_sites(sites):
	with requests.Session() as session:
		for url in sites:
			download_site(url, session)

sites = [
		"https://www.jython.org",
		"http://olympus.realpython.org/dice",
] * 80
start_time = time.time()
download_all_sites(sites)
duration_sync = time.time() - start_time
print(f"Downloaded {len(sites)} in {duration_sync} seconds")

Downloaded 160 in 17.83223557472229 seconds


#### Wersja z wątkami

In [3]:
thread_local = threading.local()
def get_session():
    if not hasattr(thread_local, "session"):
        thread_local.session = requests.Session()
    return thread_local.session


def download_all_sites(sites):
    with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
        executor.map(download_site, sites)



start_time = time.time()
download_all_sites(sites)
duration_thread = time.time() - start_time
print(f"Downloaded {len(sites)} in {duration_thread} seconds")

Downloaded 160 in 0.003989219665527344 seconds


#### Wersja asyncio


In [4]:
async def download_all_sites_async(sites):
    async with aiohttp.ClientSession() as session:
        tasks = []
        for url in sites:
            task = asyncio.ensure_future(session.get(url))
            tasks.append(task)
        await asyncio.gather(*tasks, return_exceptions=True)

start_time = time.time()
nest_asyncio.apply()
asyncio.run(download_all_sites_async(sites))
duration_asyncio = time.time() - start_time
print(f"Downloaded {len(sites)} sites in {duration_asyncio} seconds")

Downloaded 160 sites in 1.3637261390686035 seconds


#### Wersja multiprocessing

W Jupyter nie działa multiprocessing i jedynym rozwiązaniem jakie znalazłem to przeniesienie metod do zewnętrznego pliku py

In [5]:
def download_all_sites(sites):
	with multiprocessing.Pool(initializer=mm.set_global_session) as pool:
			pool.map(mm.download_site_multiprocessing, sites)

start_time = time.time()
download_all_sites(sites)
duration_mul = time.time() - start_time
print(f"Downloaded {len(sites)} in {duration_mul} seconds")

Downloaded 160 in 2.4024012088775635 seconds


#### Porównanie wyników

In [6]:
print(f"Synchroniczna: {duration_sync}")
print(f"threading: {duration_thread}")
print(f"asyncio: {duration_asyncio}")
print(f"multiprocessing: {duration_mul}")

Synchroniczna: 17.83223557472229
threading: 0.003989219665527344
asyncio: 1.3637261390686035
multiprocessing: 2.4024012088775635


#### Przyspieszanie programów ograniczonych przez CPU

##### Wersja Synchroniczna

In [7]:
def cpu_bound(number):
    return sum(i * i for i in range(number))


def find_sums(numbers):
    for number in numbers:
        cpu_bound(number)



numbers = [5_000_000 + x for x in range(20)]

start_time = time.time()
find_sums(numbers)
duration_sync_cpu = time.time() - start_time
print(f"Duration {duration_sync_cpu} seconds")


Duration 9.576128005981445 seconds


##### Wersja wątkowa


In [8]:
def find_sums(numbers):
    with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
        executor.map(cpu_bound, numbers)


start_time = time.time()
find_sums(numbers)
duration_thread_cpu = time.time() - start_time
print(f"Duration {duration_thread_cpu} seconds")

Duration 9.923840999603271 seconds


##### Wersja asyncio


In [9]:
async def find_sum_async(numbers):
    tasks=[]
    for number in numbers:
        task = asyncio.ensure_future(cpu_bound_async(number))
        tasks.append(task)
    await asyncio.gather(*tasks, return_exceptions=True)


async def cpu_bound_async(number):
    return sum(i * i for i in range(number))


start_time = time.time()
nest_asyncio.apply()
asyncio.run(find_sum_async(numbers))
duration_asyncio_cpu = time.time() - start_time
print(f"Duration {duration_asyncio_cpu} seconds")

Duration 9.697748184204102 seconds


##### Wersja z multiprocessingiem


In [10]:
def find_sums_multiprocessing(numbers):
    with multiprocessing.Pool() as pool:
        pool.map(mm.cpu_bound_multiprocessing, numbers)

start_time = time.time()
find_sums_multiprocessing(numbers)
duration_mul_cpu = time.time() - start_time
print(f"Duration {duration_mul_cpu} seconds")

Duration 1.8489975929260254 seconds


##### Porównanie
Multiprocessing w przypadku obliczeń czysto procesorowych dzięki użyciu większej ilości rdzeni jest w stanie dramatycznie przyspieszyć obliczenia

In [11]:
print(f"Synchroniczna: {duration_sync_cpu}" )
print(f"threading: {duration_thread_cpu}" )
print(f"asyncio: {duration_asyncio_cpu}" )
print(f"multiprocessing: {duration_mul_cpu}" )


Synchroniczna: 9.576128005981445
threading: 9.923840999603271
asyncio: 9.697748184204102
multiprocessing: 1.8489975929260254
