## Import package

In [2]:
# concurency package
import threading as th
import multiprocessing as mp
import concurrent.futures
import asyncio
import aiohttp

import datetime
import functools
import time
import requests
import os

## Tranditional multiprocessing and threading

In [12]:
# use multiprocessing and threading

# cal cost time
def timer(func):
    """Print the runtime of the decorated function"""
    @functools.wraps(func)
    def wrapper_timer(*args, **kwargs):
        start_time = time.perf_counter()    # 1
        value = func(*args, **kwargs)
        end_time = time.perf_counter()      # 2
        run_time = end_time - start_time    # 3
        print(f"Finished {func.__name__!r} in {run_time:.4f} secs")
        return value
    return wrapper_timer

# Fibonacci
def fib(n):
    if n < 2:
        return 1
    return fib(n - 1) + fib(n - 2)

def do_some_fib(n, start):
    finish = fib(n)
    print(f'{n} ', th.current_thread().name)
    print(f'{n} pid:', os.getpid())
    print(f'{n} parent id:', os.getppid())
    print(f'Complete {n} levels FIB. Answer is {finish}. Cost time {datetime.datetime.now() - start}\n')


FIBS = [35, 28, 1, 25, 10]
@timer
def threading_fib():
    threads = []
    for i in range(len(FIBS)):
        threads.append(th.Thread(target=do_some_fib, args=(FIBS[i], datetime.datetime.now())))
        threads[i].start()

    for i in range(len(FIBS)):
        threads[i].join()
        
@timer
def processing_fib():
    processes = []
    for i in range(len(FIBS)):
        processes.append(mp.Process(target=do_some_fib, args=(FIBS[i], datetime.datetime.now())))
        processes[i].start()  
    for i in range(len(FIBS)):
        processes[i].join()
        
@timer    
def sync_fib():
    for i in FIBS:
        do_some_fib(i, datetime.datetime.now())

In [10]:
sync_fib()

35  MainThread
35 pid: 116124
35 parent id: 114367
Complete 35 levels FIB. Answer is 14930352. Cost time 0:00:03.427871

28  MainThread
28 pid: 116124
28 parent id: 114367
Complete 28 levels FIB. Answer is 514229. Cost time 0:00:00.110496

1  MainThread
1 pid: 116124
1 parent id: 114367
Complete 1 levels FIB. Answer is 1. Cost time 0:00:00.000093

25  MainThread
25 pid: 116124
25 parent id: 114367
Complete 25 levels FIB. Answer is 121393. Cost time 0:00:00.027719

10  MainThread
10 pid: 116124
10 parent id: 114367
Complete 10 levels FIB. Answer is 89. Cost time 0:00:00.001258

Finished 'sync_fib' in 3.5677 secs


In [17]:
threading_fib()
print('-' * 50)
processing_fib()

1  Thread-41
1 pid: 116124
1 parent id: 114367
Complete 1 levels FIB. Answer is 1. Cost time 0:00:00.032167

10  Thread-43
10 pid: 116124
25  Thread-42
25 pid: 116124
25 parent id: 114367
Complete 25 levels FIB. Answer is 121393. Cost time 0:00:00.134112

10 parent id: 114367
Complete 10 levels FIB. Answer is 89. Cost time 0:00:00.191300

28  Thread-40
28 pid: 116124
28 parent id: 114367
Complete 28 levels FIB. Answer is 514229. Cost time 0:00:00.467486

35  Thread-39
35 pid: 116124
35 parent id: 114367
Complete 35 levels FIB. Answer is 14930352. Cost time 0:00:03.706364

Finished 'threading_fib' in 3.7076 secs
--------------------------------------------------
1  MainThread
1 pid: 7395
1 parent id: 116124
Complete 1 levels FIB. Answer is 1. Cost time 0:00:00.017431
10  MainThread

10 pid: 7399
10 parent id: 116124
Complete 10 levels FIB. Answer is 89. Cost time 0:00:00.013438

25  MainThread
25 pid: 7396
25 parent id: 116124
Complete 25 levels FIB. Answer is 121393. Cost time 0:00:00.

## Concurrent future package (Python 3.4+)

In [180]:
URLS = [
    'https://docs.python.org/3/library/ast.html',
    'https://docs.python.org/3/library/abc.html',
    'https://docs.python.org/3/library/time.html',
    'https://docs.python.org/3/library/os.html',
    'https://docs.python.org/3/library/sys.html',
    'https://docs.python.org/3/library/io.html',
    'https://docs.python.org/3/library/pdb.html',
    'https://docs.python.org/3/library/weakref.html'
]

# use cocurrent

@timer
def get_content(url):
    print(url)
    print(f'{url} | ', th.current_thread().name, f' | pid:', os.getpid(), f' | parent id:', os.getppid())
    return requests.get(url).text

@timer
def thread_scrap():
    with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
        future_to_url = {executor.submit(get_content, url): url for url in URLS}
        print(future_to_url)
        for future in concurrent.futures.as_completed(future_to_url):
            url = future_to_url[future]
            try:
                data = future.result()
            except Execption as exc:
                print('%r generated an exception: %s' % (url, exc))
            else:
                print('%r page length is %d' % (url, len(data)))

@timer
def processing_scrap():
    with concurrent.futures.ProcessPoolExecutor(10) as executor:
        future_to_url = {}
        for key, value in zip(URLS, executor.map(get_content, URLS)):
            future_to_url[key] = value
            print('%r page length is %d' % (key, len(value)))

                
@timer
def main():
    for url in URLS:
        try:
            data = get_content(url)
        except Exception as exc:
            print('%r generated an exception: %s' % (url, exc))
        else:
            print('%r page length is %d' % (url, len(data)))

In [178]:
main()

https://docs.python.org/3/library/ast.html
https://docs.python.org/3/library/ast.html |  MainThread  | pid: 64284  | parent id: 64150
Finished 'get_content' in 3.1750 secs
'https://docs.python.org/3/library/ast.html' page length is 40454
https://docs.python.org/3/library/abc.html
https://docs.python.org/3/library/abc.html |  MainThread  | pid: 64284  | parent id: 64150


KeyboardInterrupt: 

In [193]:
thread_scrap()
print('-' * 50)
processing_scrap()

https://docs.python.org/3/library/ast.htmlhttps://docs.python.org/3/library/abc.html
https://docs.python.org/3/library/abc.html |  ThreadPoolExecutor-26_1  | pid: 64284  | parent id: 64150
https://docs.python.org/3/library/time.html
https://docs.python.org/3/library/time.html |  ThreadPoolExecutor-26_2  | pid: 64284  | parent id: 64150

https://docs.python.org/3/library/ast.html |  ThreadPoolExecutor-26_0  | pid: 64284  | parent id: 64150
https://docs.python.org/3/library/os.html
https://docs.python.org/3/library/os.html |  ThreadPoolExecutor-26_3  | pid: 64284  | parent id: 64150
https://docs.python.org/3/library/sys.html
https://docs.python.org/3/library/sys.html |  https://docs.python.org/3/library/io.html
https://docs.python.org/3/library/io.html |  ThreadPoolExecutor-26_5  | pid: 64284  | parent id: 64150
ThreadPoolExecutor-26_4  | pid: 64284  | parent id: 64150
https://docs.python.org/3/library/pdb.html
https://docs.python.org/3/library/pdb.html |  https://docs.python.org/3/libra

## Coroutine (Async/Await)

In [98]:
def fib_yield():
    n1 = 0
    yield n1
    n2 = 1
    yield n2
    while True:
        n3 = n1 + n2
        yield n3
        n1 = n2
        n2 = n3
        

In [156]:
test = fib_yield()

In [171]:
next(test)

377

12586269025

In [153]:
# use asyncio 

@timer
async def fetch_url(session, url):
    start = datetime.datetime.now()
    async with session.get(url, timeout=60 * 60) as response:
        print(url, ' Starto')
        print(f'{url} ', th.current_thread().name)
        print(f'{url} pid:', os.getpid())
        print(f'{url} parent id:', os.getppid())
        html = await response.text()
        print(url, f' Length is {len(html)}. And spend {datetime.datetime.now() - start}')

async def fetch_all_urls(session, urls, loop):
    await asyncio.gather(*[fetch_url(session, url) for url in urls],
    return_exceptions=True)

@timer
async def get_htmls(urls):
    connector = aiohttp.TCPConnector(limit=100)
    async with aiohttp.ClientSession(loop=loop, connector=connector) as session:
        await fetch_all_urls(session, urls, loop)
    print('I\'m in get_htmls')

print(URLS)
start = datetime.datetime.now()
loop = asyncio.get_event_loop()
loop.run_until_complete(get_htmls(URLS))
print(f'I\'m Finish. {datetime.datetime.now() - start}')

['https://docs.python.org/3/library/ast.html', 'https://docs.python.org/3/library/abc.html', 'https://docs.python.org/3/library/time.html', 'https://docs.python.org/3/library/os.html', 'https://docs.python.org/3/library/sys.html', 'https://docs.python.org/3/library/io.html', 'https://docs.python.org/3/library/pdb.html', 'https://docs.python.org/3/library/weakref.html']
Finished 'get_htmls' in 0.0000 secs
Finished 'fetch_url' in 0.0000 secs
Finished 'fetch_url' in 0.0000 secs
Finished 'fetch_url' in 0.0000 secs
Finished 'fetch_url' in 0.0000 secs
Finished 'fetch_url' in 0.0000 secs
Finished 'fetch_url' in 0.0000 secs
Finished 'fetch_url' in 0.0000 secs
Finished 'fetch_url' in 0.0000 secs
https://docs.python.org/3/library/weakref.html  Starto
https://docs.python.org/3/library/weakref.html  MainThread
https://docs.python.org/3/library/weakref.html pid: 64284
https://docs.python.org/3/library/weakref.html parent id: 64150
https://docs.python.org/3/library/weakref.html  Length is 65807. And

In [189]:
@timer
async def fetch_url(session, url):
    print(url, ' function start!!')
    start = datetime.datetime.now()
    async with session.get(url, timeout=60 * 60) as response:
        print(url, ' Starto | ', th.current_thread().name, f' | pid: {os.getpid()}', f' | parent id: {os.getppid()}')
        html = await response.text()
        print(url, f' Length is {len(html)}. And spend {datetime.datetime.now() - start}')

async def fetch_all_urls(session, urls, loop):
    print('fetch_all_urls start')
    await asyncio.gather(*[fetch_url(session, url) for url in urls],
    return_exceptions=True)
    print('fetch_all_urls Finish.')

@timer
async def get_htmls(urls):
    print('get_htmls start')
    connector = aiohttp.TCPConnector(limit=100)
    async with aiohttp.ClientSession(loop=loop, connector=connector) as session:
        await fetch_all_urls(session, urls, loop)
    print(f'I\'m in get_htmls')

print(URLS)
start = datetime.datetime.now()
loop = asyncio.get_event_loop()
loop.run_until_complete(get_htmls(URLS))
print(f'I\'m Finish. {datetime.datetime.now() - start}')

['https://docs.python.org/3/library/ast.html', 'https://docs.python.org/3/library/abc.html', 'https://docs.python.org/3/library/time.html', 'https://docs.python.org/3/library/os.html', 'https://docs.python.org/3/library/sys.html', 'https://docs.python.org/3/library/io.html', 'https://docs.python.org/3/library/pdb.html', 'https://docs.python.org/3/library/weakref.html']
Finished 'get_htmls' in 0.0000 secs
get_htmls start
fetch_all_urls start
Finished 'fetch_url' in 0.0000 secs
Finished 'fetch_url' in 0.0000 secs
Finished 'fetch_url' in 0.0000 secs
Finished 'fetch_url' in 0.0000 secs
Finished 'fetch_url' in 0.0000 secs
Finished 'fetch_url' in 0.0000 secs
Finished 'fetch_url' in 0.0000 secs
Finished 'fetch_url' in 0.0000 secs
https://docs.python.org/3/library/sys.html  function start!!
https://docs.python.org/3/library/time.html  function start!!
https://docs.python.org/3/library/os.html  function start!!
https://docs.python.org/3/library/weakref.html  function start!!
https://docs.python