#### 1. 使用非阻塞实现http请求
在等待的过程中（while) 不会出现阻塞，可以进行另外的操作

In [None]:
import socket
import time
from urllib.parse import urlparse


def get_html(url):
    # 对url进行解析
    # urlparse("scheme://netloc/path;parameters?query#fragment")
    # ParseResult(scheme='scheme', netloc='netloc', path='/path;parameters', params='',
    #             query='query', fragment='fragment')
    url = urlparse(url)
    host = url.netloc
    path = url.path
    if path == "":
        path = "/"
    client = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    # 非阻塞设置
    client.setblocking(False)
    try:
        client.connect((host, 80))
    except BlockingIOError as e:
        pass
    # 非阻塞发送请求，在while中进行反复请求，循环的过程中可以进行其他操作，发送成功为跳出的条件
    while True:
        try:
            # 这里要用大写的GET
            client.send("GET {} HTTP/1.1\r\nHost:{}\r\nConnection:close\r\n\r\n".format(path, host).encode('utf8'))
            # 如果成功
            break
        except OSError as e:
            pass
    data = b''
    while True:
        try:
            d = client.recv(1024)
        except BlockingIOError as e:
            continue
        if d:
            data += d
        else:
            break
    data = data.decode("utf8").split("\r\n\r\n")[1]
    print(data)
    client.close()


start_time = time.time()
for url in range(20):
    url = "http://shop.projectsedu.com/goods/{}/".format(url)
    get_html(url)
print(time.time() - start_time)

### 2. select

In [40]:
from selectors import DefaultSelector, EVENT_READ, EVENT_WRITE

# windows 下默认为poll。linux下则会为epoll
selector = DefaultSelector()
urls = []
stop = False


def loop():
    """
    1. select 本身不支持register模式
    2. 描述符 socket 发生变化后的自动回调逻辑需要程序员来完成
    不断请求socker状态，并调用对应的回调函数
    回调+事件循环+select(poll\epoll)
    :return:
    """
    while not stop:
        # key 为一个nametu
        ready = selector.select()
        for key, mask in ready:
            callback = key.data
            callback(key)


class Fetcher(object):
    def __init__(self):
        self.data = b''
        self.spider_url = None
        self.host = None
        self.path = None
        self.client = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        self.client.setblocking(False)

    def connect(self, key):
        """
        # 描述符可读时的回调函数
        # 一旦某个描述符就绪( -般是读就绪或者写就绪) ，能够通知程序进行相应的读写操作
        :param key:
        :return:
        """
        # fd 为描述符self.client.fileno()的值，便于变化时进行观测
        selector.unregister(key.fd)
        self.client.send(
            "GET {} HTTP/1.1\r\nHost:{}\r\nConnection:close\r\n\r\n".format(self.path, self.host).encode('utf8'))
        # 一旦某个描述符就绪( -般是读就绪或者写就绪) ，能够通知程序进行相应的读写操作
        selector.register(self.client.fileno(), EVENT_READ, self.read_able)

    def read_able(self, key):
        """
        描述符可读时进行的读写操作
        :param key:
        :return:
        """
        d = self.client.recv(1024)
        if d:
            self.data += d
        else:
            selector.unregister(key.fd)
            data = self.data.decode("utf8")
            # html 内容
            html_data = data.split("\r\n\r\n")[1]
            print(html_data)
            self.client.close()
            urls.remove(self.spider_url)
            if not urls:
                global stop
                stop = True

    def get_html(self, url):
        # 对url进行解析
        # urlparse("scheme://netloc/path;parameters?query#fragment")
        # ParseResult(scheme='scheme', netloc='netloc', path='/path;parameters', params='',
        #             query='query', fragment='fragment')

        self.spider_url = url
        url = urlparse(url)
        self.host = url.netloc
        self.path = url.path
        if self.path == "":
            self.path = "/"  # 非阻塞设置
        try:
            self.client.connect((self.host, 80))
        except BlockingIOError as e:
            pass
        # 非阻塞发送请求，在while中进行反复请求，循环的过程中可以进行其他操作，发送成功为跳出的条件
        # 注册描述符,描述符类型, 描述符可读时的回调
        selector.register(self.client.fileno(), events=EVENT_WRITE, data=self.connect)


start_time = time.time()
for url in range(20):
    url = "http://shop.projectsedu.com/goods/{}/".format(url)
    urls.append(url)
    fetcher = Fetcher()
    fetcher.get_html(url)
# url = "http://www.baidu.com"
# urls.append(url)
# fetcher = Fetcher()
# fetcher.get_html(url)
loop()
print(time.time() - start_time)

{"detail":"未找到。"}
{"id":7,"category":{"id":132,"sub_cat":[],"name":"进口生鲜","code":"jksx","desc":"","category_type":2,"is_tab":false,"add_time":"2017-07-29T18:56:34","parent_category":110},"images":[{"image":"http://shop.projectsedu.com/media/goods/images/8_P_1448945032810.jpg"},{"image":"http://shop.projectsedu.com/media/goods/images/8_P_1448945032646.jpg"}],"goods_sn":"","name":"五星眼肉牛排套餐8片装原味原切生鲜牛肉","click_num":3037,"sold_num":0,"fav_num":0,"goods_num":0,"market_price":150.0,"shop_price":125.0,"goods_brief":"","goods_desc":"<p><img src=\"/media/goods/images/2_20170719161405_249.jpg\" title=\"\" alt=\"2.jpg\"/></p><p><img src=\"/media/goods/images/2_20170719161414_628.jpg\" title=\"\" alt=\"2.jpg\"/></p><p><img src=\"/media/goods/images/2_20170719161435_381.jpg\" title=\"\" alt=\"2.jpg\"/></p>","ship_free":true,"goods_front_image":"http://shop.projectsedu.com/media/goods/images/8_P_1448945032810.jpg","is_new":false,"is_hot":false,"add_time":"2017-07-31T23:53:53"}
{"id":1,"category":{"id

### 3. 协程
#### yield
#### send方法 发送内容到方法中，通过yield 接收

In [32]:
def get_func():
    name = yield "http://projectsedu.com"
    print(name)
    yield 2
    yield 3
    return "end"


gen = get_func()
print(next(gen))
print(gen.send("bobby"))
print(next(gen))
try:
    re = next(gen)
    print(re)
except StopIteration as e:
    re = e.value
    print(re)

http://projectsedu.com
bobby
2
3
end


#### close

In [37]:
def get_func():
    try:
        yield "http://projectsedu.com"
    #GeneratorExit是继承自BaseExceptibn, 而不是Exception
    except GeneratorExit as e:
        raise StopIteration
        pass
    # yield 2
    # yield 3
    return "end"


gen = get_func()
print(next(gen))
gen.close()
print(next(gen))

http://projectsedu.com


  from ipykernel import kernelapp as app


StopIteration: 

#### throw
抛出异常，扔出的是前一个yield的异常

In [35]:
def get_func():
    try:
        yield "http://projectsedu.com"
    except Exception as e:
        pass
    yield 2
    yield 3
    return "end"


gen = get_func()
print(next(gen))
# gen.throw(Exception, "error")
print(next(gen))
print(next(gen))


http://projectsedu.com
2
3


#### 4. yield from

In [18]:
result = {}


def value_sum(proname):
    total = 0
    values = []
    while True:
        # 接受值
        x = yield
        if not x:
            break
        total += x
        print(proname + "销量：", x)
        values.append(x)
    return total, values


# 委托生成器
def delegation(key):
    # 委托器
    # 建立调用器 main和子生成器之间的双向通道
    # 在调用器中使用委托器的 send 方法可以将值传输到子生成器中，子生成器再通过yield获取
    # 调用器可以获取的
    while True:
        result[key] = yield from value_sum(key)
        print(key + "销量统计完成")


# 调用器
def main():
    data = {
        "d1": [100, 200, 300],
        "d2": [1233, 213, 343],
        "d3": [1123, 232, 1231],
    }
    for keys, values in data.items():
        de = delegation(keys)
        de.send(None)
        for value in values:
            de.send(value)
        de.send(None)
    print(result)


main()

d1销量： 100
d1销量： 200
d1销量： 300
d1销量统计完成
d2销量： 1233
d2销量： 213
d2销量： 343
d2销量统计完成
d3销量： 1123
d3销量： 232
d3销量： 1231
d3销量统计完成
{'d1': (600, [100, 200, 300]), 'd2': (1789, [1233, 213, 343]), 'd3': (2586, [1123, 232, 1231])}


### 5. 使用生成器实现协程

In [44]:
import inspect


def get_func():
    value=yield 1
    return 'bobby'

def downloader

gen = get_func()
# 获取生成器的状态
print(inspect.getgeneratorstate(gen))
next(gen)
print(inspect.getgeneratorstate(gen))
try:
    next(gen)
except StopIteration as e:
   pass
print(inspect.getgeneratorstate(gen))

GEN_CREATED
GEN_SUSPENDED
GEN_CLOSED
