# Python 协程

## 从一个爬虫说起

In [2]:
import time

def crawl_page(url):
    print('crawling {}'.format(url))
    sleep_time = int(url.split('_')[-1])
    time.sleep(sleep_time)
    print('OK {}'.format(url))
    
def main(urls):
    for url in urls: 
        crawl_page(url)
        
%time main(['url_1', 'url_2', 'url_3', 'url_4'])

crawling url_1
OK url_1
crawling url_2
OK url_2
crawling url_3
OK url_3
crawling url_4
OK url_4
CPU times: total: 0 ns
Wall time: 10 s


In [1]:
import asyncio
import time

async def crawl_page(url):
    print('crawling {}'.format(url))
    sleep_time = int(url.split('_')[-1])
    await asyncio.sleep(sleep_time)
    print('OK {}'.format(url))
    
async def main(urls):
    for url in urls:
        await crawl_page(url)
        
# jupyter
t1 = time.time()
await main(['url_1', 'url_2', 'url_3', 'url_4'])
t2 = time.time()
print(t2 -t1) 

# Ipython
# %time asyncio.run(main(['url_1', 'url_2', 'url_3', 'url_4']))

crawling url_1
OK url_1
crawling url_2
OK url_2
crawling url_3
OK url_3
crawling url_4
OK url_4
10.025423049926758


In [2]:
print(crawl_page(''))

<coroutine object crawl_page at 0x000001CF7E215540>


  print(crawl_page(''))


In [4]:
import asyncio
import time

async def crawl_page(url):
    print('crawling {}'.format(url))
    sleep_time = int(url.split('_')[-1])
    await asyncio.sleep(sleep_time)
    print('OK {}'.format(url))
    
async def main(urls):
    tasks = [asyncio.create_task(crawl_page(url)) for url in urls]
    for task in tasks:
        await task
        
# jupyter
t1 = time.time()
await main(['url_1', 'url_2', 'url_3', 'url_4'])
t2 = time.time()
print(t2 -t1) 

# Ipython
# %time asyncio.run(main(['url_1', 'url_2', 'url_3', 'url_4']))

crawling url_1
crawling url_2
crawling url_3
crawling url_4
OK url_1
OK url_2
OK url_3
OK url_4
4.0193517208099365


In [8]:
import asyncio
import time

async def crawl_page(url):
    print('crawling {}'.format(url))
    sleep_time = int(url.split('_')[-1])
    await asyncio.sleep(sleep_time)
    print('OK {}'.format(url))
    
async def main(urls):
    tasks = [asyncio.create_task(crawl_page(url)) for url in urls]
    await asyncio.gather(*tasks)
        
# jupyter
t1 = time.time()
await main(['url_1', 'url_2', 'url_3', 'url_4'])
t2 = time.time()
print(t2 -t1) 

# Ipython
# %time asyncio.run(main(['url_1', 'url_2', 'url_3', 'url_4']))

crawling url_1
crawling url_2
crawling url_3
crawling url_4
OK url_1
OK url_2
OK url_3
OK url_4
4.017024040222168


## 解密协程运行时

In [10]:
import asyncio


async def worker_1():
    print('worker_1 start')
    await asyncio.sleep(1)
    print('worker_1 done')


async def worker_2():
    print('worker_2 start')
    await asyncio.sleep(2)
    print('worker_2 done')
    
async def main():
    print('before await')
    await worker_1()
    print('awaited worker_1')
    await worker_2()
    print('awaited worker_2')
    

# jupyter
t1 = time.time()
await main()
t2 = time.time()
print(t2 -t1) 
    
# Ipython
# %time asyncio.run(main())

before await
worker_1 start
worker_1 done
awaited worker_1
worker_2 start
worker_2 done
awaited worker_2
3.020576000213623


In [14]:
import asyncio


async def worker_1():
    print('worker_1 start')
    await asyncio.sleep(1)
    print('worker_1 done')


async def worker_2():
    print('worker_2 start')
    await asyncio.sleep(2)
    print('worker_2 done')
    
async def main():
    task1 = asyncio.create_task(worker_1())
    task2 = asyncio.create_task(worker_2())
    print('before await')
    await task1
    print('awaited worker_1')
    await task2
    print('awaited worker_2')
    

# jupyter
t1 = time.time()
await main()
t2 = time.time()
print(t2 -t1) 
    
# Ipython
# %time asyncio.run(main())

before await
worker_1 start
worker_2 start
worker_1 done
awaited worker_1
worker_2 done
awaited worker_2
2.0118842124938965


### 加深解密协程运行流程

In [19]:
import asyncio


async def worker_1():
    print('worker_1 start')
#     await asyncio.sleep(1)
    time.sleep(1)
    print('worker_1 done')


async def worker_2():
    print('worker_2 start')
    await asyncio.sleep(2)
    print('worker_2 done')


async def main():
    task1 = asyncio.create_task(worker_1())
    task2 = asyncio.create_task(worker_2())
    print('before await')
    await task1
    print('awaited worker_1')
    await task2
    print('awaited worker_2')


# jupyter
t1 = time.time()
await main()
t2 = time.time()
print(t2 - t1)

# Ipython
# %time asyncio.run(main())

before await
worker_1 start
worker_1 done
worker_2 start
awaited worker_1
worker_2 done
awaited worker_2
3.015061378479004


In [23]:
import asyncio


async def worker_1():
    print('worker_1 start')
    await asyncio.sleep(1)
    print('worker_1 done')


async def worker_2():
    print('worker_2 start')
    await asyncio.sleep(2)
    print('worker_2 done')


async def main():
    task1 = asyncio.create_task(worker_1())
    task2 = asyncio.create_task(worker_2())
    print('before await')
    await task2
    print('awaited worker_2')
    await task1
    print('awaited worker_1')


# jupyter
t1 = time.time()
await main()
t2 = time.time()
print(t2 - t1)

# Ipython
# %time asyncio.run(main())

before await
worker_1 start
worker_2 start
worker_1 done
worker_2 done
awaited worker_2
awaited worker_1
2.027482032775879


## 限定协程任务时长

In [8]:
import asyncio
import time

async def worker_1():
    await asyncio.sleep(1)
    print('worker_1 done')
    return 1


async def worker_2():
    await asyncio.sleep(2)
    print('worker_2 done')
    return 2 / 0


async def worker_3():
    await asyncio.sleep(3)
    return 3

async def main():
    task1 = asyncio.create_task(worker_1())
    task2 = asyncio.create_task(worker_2())
    task3 = asyncio.create_task(worker_3())
    
    await asyncio.sleep(2)
    task3.cancel()
    
    res = await asyncio.gather(task1, task2, task3, return_exceptions=True)
    print(res)


# jupyter
t1 = time.time()
await main()
t2 = time.time()
print(t2 - t1)

# Ipython
# %time asyncio.run(main())

worker_1 done
worker_2 done
[1, ZeroDivisionError('division by zero'), CancelledError()]
1.9989843368530273
