# 协程
- 参考资料
    - http://python.jobbole.com/86481/
    - http://python.jobbole.com/87310/
    - https://segmentfault.com/a/1190000009781688
        
# 迭代器
- 可迭代(Iterable):直接作用于for循环的变量
- 迭代器(Iterator)：不但可以作用于for循环，还可以被next调用
- list是典型的可迭代对象，但不是迭代器
- 通过isinstance
- iterable和iterator转换
    - iter函数

In [2]:
# 可迭代
l = [i for i in range(10)]

# l是可迭代的，但不是迭代器
for idx in l:
    print(idx)
    
# range是个迭代器
for i in range(5):
    print(i)

0
1
2
3
4
5
6
7
8
9
0
1
2
3
4


In [5]:
# isinstance案例
# 判断某个变量是否是一个实例

# 判断是否可迭代
from collections import Iterable
ll = [1,2,3,4,5]

print(isinstance(ll, Iterable))

from collections import Iterator
print(isinstance(ll, Iterator))

True
False


In [7]:
# iter函数

s = 'i love wangxiaojing'

print(isinstance(s, Iterable))
print(isinstance(s, Iterator))

s_iter = iter(s)
print(isinstance(s_iter, Iterable))
print(isinstance(s_iter, Iterator))

True
False
True
True


# 生成器
- generator：一边循环一边计算下一个元素的机制/算法
- 需要满足三个条件：
    - 每次调用都生产出for循环需要的下一个元素或者
    - 如果达到最后一个后，报出StopItaration异常
    - 可以被next函数调用
- 如何生成一个生成器
    - 直接使用
    - 如果函数中包含yield，则这个函数就叫生成器
    - next调用函数，遇到yield返回

In [12]:
# 直接使用生成器

L = [x*x for x in range(5)]# 放在中括号中是列表生成器
g = (x*x for x in range(5))#放在小括号中就是生成器

print(L)
print(g)
for i in g:
    print(i)

print(type(L))
print(type(g))

[0, 1, 4, 9, 16]
<generator object <genexpr> at 0x7efd58246eb8>
0
1
4
9
16
<class 'list'>
<class 'generator'>


In [13]:
# 函数案例

def odd():
    print("Step 1")
    print("Step 2")
    print("Step 3")
    return None

odd()

Step 1
Step 2
Step 3


In [23]:
# 生成器的案例
# 在函数odd中，yield负责返回
def odd():
    print("Step 1")
    yield 1
    print("Step 2")
    yield 2
    print("Step 3")
    yield 3
    


# for i in odd():
#    print(i)
# odd()是调用生成器
g = odd()
one = next(g)
print(one)

two = next(g)
print(two)

three = next(g)
print(three)

Step 1
1
Step 2
2
Step 3
3
Step 1
1
Step 2
2
Step 3
3


In [24]:
# for循环调用生成器
def fib(max):
    n, a, b = 0,0,1 #注意写法
    while n < max:
        print(b)
        a, b = b, a+b
        n +=1
    return 'Done'

fib(5)

1
1
2
3
5


'Done'

In [30]:
# 斐波那契数列的生成器写法
# for循环调用生成器
def fib(max):
    n, a, b = 0,0,1 #注意写法
    while n < max:
        yield b
        a, b = b, a+b
        n +=1
        
    # 需要注意，报出异常是的返回值是return的返回值
    return 'Done'

g = fib(5)

for i in range(6):
    rst = next(g)
    print(rst)

1
1
2
3
5


StopIteration: Done

In [31]:
ge = fib(10)
'''
生成器的典型用法是在for中使用
比较常用的典型生成器就是range
'''

for i in ge:
    print(i)

1
1
2
3
5
8
13
21
34
55


# 协程
- 历史历程
    - 3.4引入协程，用yield实现
    - 3.5引入协程语法
    - 实现的额协程比较好的包有asyncio，tornado，gevent
- 定义：协程是为非式多任务产生子程序的计算机程序组件，协程允许不同入口点在不同位置暂停或开始执行程序
- 从技术角度讲，协程就是一个你可以暂停执行的函数，或者干脆把协程理解成生成器
- 协程的实现：
    - yield返回
    - send调用

- 协程的四个状态
    - inspect.geigeneratorstate(...) 函数确定，该函数会返回下述字符串中的一个
    - GEN_CREATED:等待开始执行
    - GEN_RUNNING:解释器正在执行
    - GEN_SUSPENED:在yield表达式处暂停
    - GEN_CLOSED:执行结束
    - next预激（prime）
    - 代码案例v2
- 协程终止
    - 协程中未处理的异常会向上冒泡，传给next函数或send方法的调用方（即触发协程的对象）
    - 终止协程的一种方式：发送某个哨符值，让协程退出。内置的None和Ellipsis等常量经常用作哨符值

- yield from
    - 调用协程为了得到返回值，协程必须正常终止
    - 生成器正常终止会发出StopIteration异常，异常对象的value属性保存返回值
    - yield from从内部铺获StopIteration异常
    - 案例v3
    - 委派生成器
        - 包含yield from表达式的生成器函数
        - 委派生成器在yield from表达式处暂定，调用方可以直接把数据发给生成器
        - 子生成器再把产出的值发给调用方
        - 子生成器在最后，解释器会抛出stopIteration,并且把返回值附加到异常对象上
        - 案例v4
        

In [32]:
# 协程代码案例1
def simple_coroutine():
    print('-> start')
    x = yield
    print('-> received', x)

# 主线程
sc = simple_coroutine()
print(1111)
# 可以使用sc.send(None),效果一样
next(sc) # 预激

print(2222)
sc.send('zhexiao')

1111
-> start
2222
-> received zhexiao


StopIteration: 

In [34]:
# 案例V2，协程的状态
def simple_coroutine(a):
    print('-> start')
    
    b = yield a
    print('-> received', a, b)
    
    c = yield a+b
    print('-> received', a, b, c)
    
# runc
sc = simple_coroutine(5)

aa = next(sc)
print(aa)
bb = sc.send(6)
print(bb)
cc = sc.send(7)
print(cc)

-> start
5
-> received 5 6
11
-> received 5 6 7


StopIteration: 

In [38]:
# 案例v3
def gen():
    for c in 'AB':
        yield c
 
# list直接用生成器作为参数
print(gen())
print(list(gen()))

def gen_new():
    yield from 'AB'
    
print(list(gen_new()))

<generator object gen at 0x7efd58182570>
['A', 'B']
['A', 'B']


In [37]:
print(list('AB'))

['A', 'B']


In [39]:
# 案例v04， 委派生成器
from collections import namedtuple

'''
解释：
1. 外层 for 循环每次迭代会新建一个 grouper 实例，赋值给 coroutine 变量； grouper 是委派生成器。
2. 调用 next(coroutine)，预激委派生成器 grouper，此时进入 while True 循环，调用子生成器 averager 后，在 yield from 表达式处暂停。
3. 内层 for 循环调用 coroutine.send(value)，直接把值传给子生成器 averager。同时，当前的 grouper 实例（coroutine）在 yield from 表达式处暂停。
4. 内层循环结束后， grouper 实例依旧在 yield from 表达式处暂停，因此， grouper函数定义体中为 results[key] 赋值的语句还没有执行。
5. coroutine.send(None) 终止 averager 子生成器，子生成器抛出 StopIteration 异常并将返回的数据包含在异常对象的value中，yield from 可以直接抓取 StopItration 异常并将异常对象的 value 赋值给 results[key]
'''
ResClass = namedtuple('Res', 'count average')


# 子生成器
def averager():
    total = 0.0
    count = 0
    average = None

    while True:
        term = yield
        # None是哨兵值
        if term is None:
            break
        total += term
        count += 1
        average = total / count

    return ResClass(count, average)


# 委派生成器
def grouper(storages, key):
    while True:
        # 获取averager()返回的值
        storages[key] = yield from averager()


# 客户端代码
def client():
    process_data = {
        'boys_2': [39.0, 40.8, 43.2, 40.8, 43.1, 38.6, 41.4, 40.6, 36.3],
        'boys_1': [1.38, 1.5, 1.32, 1.25, 1.37, 1.48, 1.25, 1.49, 1.46]
    }

    storages = {}
    for k, v in process_data.items():
        # 获得协程
        coroutine = grouper(storages, k)

        # 预激协程
        next(coroutine)

        # 发送数据到协程
        for dt in v:
            coroutine.send(dt)

        # 终止协程
        coroutine.send(None)
    print(storages)

# run
client()




{'boys_2': Res(count=9, average=40.422222222222224), 'boys_1': Res(count=9, average=1.3888888888888888)}


# 还剩下的内容
- xml,json,
- re,xpath
- 网络编程：socket，ftp,mail
- http协议==> http web server 小项目
- django，尽可能详细

-------------
- 习题课
--
-- 爬虫

# asyncio
- python3.4开始引入标配库当中，内置对异步io的支持
- asyncio本身是一个消息循环
- 步骤：
    - 创建消息循环
    - 把协程导入
    - 关闭

In [52]:
import threading
# y引入异步io包
import asyncio

# 使用协程
@asyncio.coroutine
def hello():
    print('hello world! (%s)' % threading.currentThread())
    print('Start.....(%s)' % threading.currentThread())
    yield from asyncio.sleep(10)
    print('Done.....(%s)' % threading.currentThread())
    print('hello again (%s)' % threading.currentThread())

# 启动消息循环   
loop = asyncio.get_event_loop()
# 定义任务
tasks = [hello(), hello()]
# asyncio使用wait等待task执行完毕
loop.run_until_complete(asyncio.wait(tasks))
# 关闭消息循环
loop.close

RuntimeError: Event loop is closed

In [2]:
import asyncio

@asyncio.coroutine
def wget(host):
    print('wget %s...' % host)
    # 异步请求网络地址
    connect = asyncio.open_connection(host, 80)
    # 注意yield from的用法
    reader, writer = yield from connect
    header = 'GET / HTTP/1.0\r\nHOST:%s\r\n\r\n' %host
    writer.writer(header.encode('utf-8'))
    yield from writer.drain()
    while True:
        line = yield from reader.readline()
        # http协议的换行使用\r\n
        if line == b'\r\n':
            break
        print('%s header > %s' % (host, line.decode('utf-8').rstrip()))
    writer.close()
    
loop = asyncio.get_event_loop()
tasks = [wget(host) for host in ['www.sina.com.cn', 'www.sohu.com', 'www.163.com']]
loop.run_until_complete(asyncio.wait(tasks))
loop.close

wget www.163.com...
wget www.sohu.com...
wget www.sina.com.cn...


Task exception was never retrieved
future: <Task finished coro=<wget() done, defined at <ipython-input-2-ca96be75bfea>:3> exception=AttributeError("'StreamWriter' object has no attribute 'writer'",)>
Traceback (most recent call last):
  File "<ipython-input-2-ca96be75bfea>", line 11, in wget
    writer.writer(header.encode('utf-8'))
AttributeError: 'StreamWriter' object has no attribute 'writer'
Task exception was never retrieved
future: <Task finished coro=<wget() done, defined at <ipython-input-2-ca96be75bfea>:3> exception=AttributeError("'StreamWriter' object has no attribute 'writer'",)>
Traceback (most recent call last):
  File "<ipython-input-2-ca96be75bfea>", line 11, in wget
    writer.writer(header.encode('utf-8'))
AttributeError: 'StreamWriter' object has no attribute 'writer'
Task exception was never retrieved
future: <Task finished coro=<wget() done, defined at <ipython-input-2-ca96be75bfea>:3> exception=AttributeError("'StreamWriter' object has no attribute 'writer'",)>
Tr

<bound method _UnixSelectorEventLoop.close of <_UnixSelectorEventLoop running=False closed=False debug=False>>

# async and await
- 为了更好的表示异步io
- python3.5引入
- 让协程代码更简洁
- 使用上，可以简单的进行替换
    - 用async替换@asyncio.coroutine
    - await替换yield from

In [1]:
import threading
# y引入异步io包
import asyncio

# 使用协程
# @asyncio.coroutine
async def hello():
    print('hello world! (%s)' % threading.currentThread())
    print('Start.....(%s)' % threading.currentThread())
    await asyncio.sleep(10)
    print('Done.....(%s)' % threading.currentThread())
    print('hello again (%s)' % threading.currentThread())

# 启动消息循环   
loop = asyncio.get_event_loop()
# 定义任务
tasks = [hello(), hello()]
# asyncio使用wait等待task执行完毕
loop.run_until_complete(asyncio.wait(tasks))
# 关闭消息循环
loop.close

hello world! (<_MainThread(MainThread, started 140089803003648)>)
Start.....(<_MainThread(MainThread, started 140089803003648)>)
hello world! (<_MainThread(MainThread, started 140089803003648)>)
Start.....(<_MainThread(MainThread, started 140089803003648)>)
Done.....(<_MainThread(MainThread, started 140089803003648)>)
hello again (<_MainThread(MainThread, started 140089803003648)>)
Done.....(<_MainThread(MainThread, started 140089803003648)>)
hello again (<_MainThread(MainThread, started 140089803003648)>)


<bound method _UnixSelectorEventLoop.close of <_UnixSelectorEventLoop running=False closed=False debug=False>>

# aiohttp
- asyncio实现单线程的并发io，在客户端用处不大
- 在服务器端可以asyncio+coroutine配合，因为http是io操作
- asyncio实现了tcp，udp，ssl等协议
- aiohttp是基于asyncio实现的http框架
- pip install aiohttp

In [24]:
# aiohttp案例

import asyncio

from aiohttp import web

async def index(request):
    await asyncio.sleep(0.5)
    return web.Response(body=b'<h1>Index</h1>')

async def hello(request):
    await asyncio.sleep(0.5)
    text = '<h1>hello, %s!</h1>' % request.match_info['name']
    return web.Response(body=text.encode('utf-8'))

async def init(loop):
    app = web.Application(loop=loop)
    app.router.add_route('GET', '/', index)
    app.router.add_route('GET', '/hello/{name}', hello)
    srv = await loop.create_server(app.make_handler(), '127.0.0.1', 8000)
    print('Server started at http://127.0.0.1:8000')
    return srv

loop = asyncio.get_event_loop()
loop.run_until_complete(init(loop))
loop.run_forever()

  def _remove(item, selfref=ref(self)):
  def _remove(item, selfref=ref(self)):


RuntimeError: Event loop is closed

# concurrent.futures
- python3新增的库
- 类似其他语言的线程池的概念
- 利用multiprocessing实现真正的并行计算
- 核心原理：以子进程的形式，并行运行多个python解释器，
从而令python程序可以利用多核CPU来提升执行速度。
由于子进程与主解释器相分离，所以他们的全局解释器锁也是相互独立的。
每个子进程都能够完整的使用一个CPU内核。
    - concurrent.futures.Executor
    - ThreadPoolExecutor
    - ProcessPoolExcutor
    - 执行的时候需要自行选择
- submit(fn, args, kwargs)
    - fn:异步执行的函数
    - args， kwargs参数

In [None]:
# 关于concurrent的案例
from concurrent.futures import ThreadPoolExecutor
import time

def return_future(msg):
    time.sleep(3)
    return msg

# 创建一个线程池
pool = ThreadPoolExecutor(max_workers=2)

# 往线程池加入两个task
f1 = pool.submit(return_future, 'hello')
f2 = pool.submit(return_future, 'world')

# 等待执行完毕
print(f1.done())
time.sleep(3)
print(f2.done())

# 结果
print(f1.result())
print(f2.result())

# current中map函数
- map(fn, \*iterables, timeout=None)
    - 跟map函数类似
    - 函数需要异步执行
    - timeout：超时时间
    - map跟submit使用一个就行
# Future
    - 我的同学三分钟后来家做客，我想送同学一条香烟作为礼物
    - 可是我手里没有存货
    - 于是让张同学去帮我买烟
    - 商店离我来回路程30分钟
    - 无奈我在朋友来后，只好拿一个空盒子给朋友看，并告诉朋友这是我送的礼物
    - 随后我和朋友吹了一个小时牛逼
    - 期间27分钟后张同学带着香烟返回，并将香烟放入烟盒
    - 两个小时后我朋友离开，并带走了我送给他的香烟
    - 其中用来展示的烟盒

In [None]:
# map案例
import time,re
import os,datetime
from concurrent import futures

data = ['1', '2']

def wait_on(argument):
    print(argument)
    time.sleep(2)
    return "ok"

ex = futures.ThreadPoolExecutor(max_workers=2)
for i in ex.map(wait_on, data):
    print(i)

In [3]:
import time
import asyncio
 
now = lambda : time.time()

        
 
async def do_some_work(x):
    print('Waiting: ', x)
 
start = now()
 
coroutine = do_some_work(2)
 
loop = asyncio.get_event_loop()
loop.run_until_complete( coroutine ) 
loop.close


 
print('TIME: ', now() - start)

Waiting:  2
TIME:  0.008651018142700195


In [4]:
import time
import asyncio
 

async def do_work(x):
    print('Waiting: ', x)
    await orm.create_pool(loop=loop, **db_dict)
 
start = time.time()
 
coroutine = do_work(2)
 
# loop = asyncio.get_event_loop()
# loop.run_until_complete(coroutine)
# loop.close
@asyncio.coroutine
def destory_pool():
    global pool
    if pool is not None :
        pool.close()
        yield from __pool.wait_closed()


if __name__ == '__main__':

    loop = asyncio.get_event_loop()
    loop.run_until_complete( coroutine )  
    loop.close()
    destory_pool()
    if loop.is_closed():
        sys.exit(0)
  

Waiting:  2


NameError: name 'orm' is not defined

In [49]:
import asyncio
import threading as tg
if __name__ == '__main__':
    def start_loop(loop):
        asyncio.set_event_loop(loop) # set loop
        loop.run_forever()
  
    async def do_some_work(x):
        print('Waiting {}'.format(x))
        await asyncio.sleep(x)
        print('Done after {}s'.format(x))
        return x
         
    new_loop = asyncio.new_event_loop()
    t = tg.Thread(target=start_loop, args=(new_loop,))
    t.daemon = 1
    t.start()
    print('begin')
    tasks=[]
    for s in [6,4]:
        tasks.append(asyncio.run_coroutine_threadsafe(do_some_work(s), new_loop))
    for task in tasks:print(task.result())   
    new_loop.stop()
    print('abc',tg.active_count())
    print('end')
    new_loop.close()

begin
Waiting 6
Waiting 4
Done after 4s
Done after 6s
6
4
abc 11
end


RuntimeError: Cannot close a running event loop

In [51]:
import asyncio
import time
 
now = lambda : time.time()
 
async def do_some_work(x):
    print('Waiting: ', x)
 
start = now()
 
coroutine = do_some_work(2)
loop = asyncio.get_event_loop()
# task = asyncio.ensure_future(coroutine)
task = loop.create_task(coroutine)
print(task)
loop.run_until_complete(task)
print(task)
print('TIME: ', now() - start)

RuntimeError: Event loop is closed

In [5]:
import time
import asyncio
 
now = lambda : time.time()
 
async def do_some_work(x):
    print('Waiting: ', x)
    return 'Done after {}s'.format(x)
 
def callback(future):
    print('Callback: ', future.result())
 
start = now()
 
coroutine = do_some_work(2)
loop = asyncio.get_event_loop()
task = asyncio.ensure_future(coroutine)
task.add_done_callback(callback)
loop.run_until_complete(task)
 
print('TIME: ', now() - start)

Waiting:  2
Callback:  Done after 2s
TIME:  0.004666805267333984


In [6]:
import asyncio
import time
 
now = lambda: time.time()
 
async def do_some_work(x):
    print('Waiting: ', x)
    await asyncio.sleep(x)
    return 'Done after {}s'.format(x)
 
start = now()
 
coroutine = do_some_work(2)
loop = asyncio.get_event_loop()
task = asyncio.ensure_future(coroutine)
loop.run_until_complete(task)
 
print('Task ret: ', task.result())
print('TIME: ', now() - start)

Waiting:  2
Task ret:  Done after 2s
TIME:  2.0101702213287354


In [7]:
import asyncio
 
import time
 
now = lambda: time.time()
 
async def do_some_work(x):
    print('Waiting: ', x)
 
    await asyncio.sleep(x)
    return 'Done after {}s'.format(x)
 
coroutine1 = do_some_work(1)
coroutine2 = do_some_work(2)
coroutine3 = do_some_work(2)
 
tasks = [
    asyncio.ensure_future(coroutine1),
    asyncio.ensure_future(coroutine2),
    asyncio.ensure_future(coroutine3)
]
 
start = now()
 
loop = asyncio.get_event_loop()
try:
    loop.run_until_complete(asyncio.wait(tasks))
except KeyboardInterrupt as e:
    print(asyncio.Task.all_tasks())
    for task in asyncio.Task.all_tasks():
        print(task.cancel())
    loop.stop()
    loop.run_forever()
finally:
    loop.close()
 
print('TIME: ', now() - start)

Waiting:  1
Waiting:  2
Waiting:  2
TIME:  2.007338523864746
