## 1.1解压序列赋值给多个变量

In [1]:
p = (4, 5)
x, y = p
x, y 

(4, 5)

In [2]:
data = ["ACME", 50, 91.1, (2012, 12, 21)]
name, shares, price, date = data
name, date

('ACME', (2012, 12, 21))

In [3]:
name, shares, price ,(year, mon, day) = data
name,year, mon, day

('ACME', 2012, 12, 21)

In [4]:
p = (4, 5)
x, y , z = p

ValueError: not enough values to unpack (expected 3, got 2)

In [None]:
s = "Hello"
a, b ,c ,d, e = s
a,b,c

In [None]:
_, shares, price, _ = data
shares, price

1.2解压可迭代对象赋值给多个变量

问题：如果一个可迭代对象的元素个数超过变量个数时，会抛出一个ValueError,
那么怎么才能从这个可迭代对象中解压出N个元素出来？

In [None]:
def drop_first_last(grades):
    first, *middle, last = grades
    return avg(middle)

record = ("Dave", "dave@example.com", '773-555-121', "847-555-1212")
name, email, *phone_numbers = record
name, email, phone_numbers # type(phone_numbers) = type([])

In [None]:
*trailing, current = [10, 8, 7, 1, 9, 5, 10, 3]
trailing, current

星号表达式在迭代元素为可变长元组的序列时是很有用的

In [None]:
records = [("foo", 1, 2), ("bar", "hello"), ("foo", 3, 4)]
def do_foo(x, y):
    print("foo", x, y)
    
def do_bar(s):
    print("bar", s)
    
for tag, *args in records:
    if tag == "foo":
        do_foo(*args)
    elif tag == "bar":
        do_bar(*args)

In [None]:
line = "noboby:*:-2:-2:Unprivileged User:/var/empty:/usr/bin/false"
uname, * fields, homedir, sh = line.split(":")
uname, homedir, sh

In [None]:
record = ("ACME", 50, 123.45, (12, 18, 2012))
name, *_, (*_, year) = record
name, year

In [None]:
items = [1, 10, 7, 4, 5, 9]
head, *tail = items
head, tail

In [None]:
def sum(items):
    head, *tail = items
    return head + sum(tail) if tail else head
sum(items)

1.3 保留最后N个元素

在迭代操作或者其他操作的时候，怎样只保留最后几个元素的历史记录

In [11]:
from collections import deque

#deque(maxlen=N) 构造函数会新建一个固定大小的队列，当新的元素加入并且这个队列
#已满的时候，最老的元素会自动被移除掉

def search(lines, pattern, history=5):
    previous_lines = deque(maxlen=history)
    for line in lines: 
        if pattern in line:
            yield line, previous_lines # 生成器件4.3节
        previous_lines.append(line)
        
if __name__ == "__main__":
    with open("somefile.txt") as f:
        for line, prevlines in search(f, "python", 5):
            for pline in prevlines:
                print(pline)
                print(line, end="")
                print("-" * 20)

Keeping a limited history is a perfect use for a `collections.deque`.

[source,python]
--------------------
For example, the following code performs a simple text match on a

[source,python]
--------------------
sequence of lines and prints the matching line along with the previous

[source,python]
--------------------
N lines of context when found:

[source,python]
--------------------


[source,python]
--------------------
        previous_lines.append(line)

         search(f, 'python', 5)
--------------------


         search(f, 'python', 5)
--------------------
# Example use on a file

         search(f, 'python', 5)
--------------------
if __name__ == '__main__':

         search(f, 'python', 5)
--------------------
    with open('somefile.txt') as f:

         search(f, 'python', 5)
--------------------


1.4 查找最大或最小的N个元素

怎样从一个集合中获得最大或者最小的N个元素列表

In [1]:
import heapq
nums = [1, 8, 2, 23, 7, -4, 18, 23, 42, 37, 2]
print(heapq.nlargest(3, nums))
print(heapq.nsmallest(3, nums))

[42, 37, 23]
[-4, 1, 2]


In [21]:
portfolio = [ 
    {"name": "IBM", "shares": 100, "price":91.1},
    {"name": "AAPL", "shares": 50, "price":543.22},
    {"name": "FB", "shares": 200, "price":21.09},
    {"name": "HPQ", "shares": 35, "price":31.75},
    {"name": "YHOO", "shares": 45, "price":16.35},
    {"name": "ACME", "shares": 75, "price":115.65}
]
cheap = heapq.nsmallest(3, portfolio, key=lambda s: s ["price"])
expensive = heapq.nlargest(3, portfolio, key=lambda s: s["price"])

In [22]:
cheap,expensive

([{'name': 'YHOO', 'shares': 45, 'price': 16.35},
  {'name': 'FB', 'shares': 200, 'price': 21.09},
  {'name': 'HPQ', 'shares': 35, 'price': 31.75}],
 [{'name': 'AAPL', 'shares': 50, 'price': 543.22},
  {'name': 'ACME', 'shares': 75, 'price': 115.65},
  {'name': 'IBM', 'shares': 100, 'price': 91.1}])

In [8]:
nums = [1, 8, 2, 23, 7, -4, 18, 23, 42, 37, 2]
heapq.heapify(nums)
print(nums)
# 堆数据结构最重要的特征式heap[0]永远是最小的元素，并且剩余的元素可以很容易
# 通过heaqp.heapop()方法得到，该方法先将第一个元素弹出来，然后
# 用下一个最小的元素来取代被弹出元素，例如，如果想要查找最小的3个元素
# 你可以这样做
print(nums[0])
print(heapq.heappop(nums))
print(heapq.heappop(nums))
print(heapq.heappop(nums))

[-4, 2, 1, 23, 7, 2, 18, 23, 42, 37, 8]
-4
-4
1
2


1.5 实现一个优先级队列

问题：怎样实现一个按优先级排序的队列？并且在这个队列上面每次pop操作总是返回优先级最高的那个元素

In [9]:
import heapq

class PriorityQueue:
    def __init__(self):
        self._queue = []
        self._index = 0
        
    def push(self, item, priority):
        heapq.heappush(self._queue, (-priority, self._index, item))
        self._index += 1
        
    def pop(self):
        return heapq.heappop(self._queue)[-1]
    
class Item:
    def __init__(self, name):
        self.name = name
    
    def __repr__(self):
        return "Item({!r})".format(self.name)
    
# 第一个pop操作返回优先级最高的元素，另外注意到如果两个有着相同优先级的元素
# （foo和grok），pop操作按照他们被插入到队列的顺序返回的
# heappop()函数总是返回最小的元素，保证队列pop操作返回正确元素的关键
# heapq.heappush() heapq.heappop()分别在队列_queue上插入和删除第一个元素，
# 并且队列_queue保证第一个元素拥有最高优先级
# (=priority,index,item) 优先级为负数目的使得元素按照优先级从高到低排序，这个跟普通的按优先级从低到高排序的堆排序恰巧相反
#index变量的作用是保证同等优先级元素的正确排序，通过保存一个不断增加的index变量，可以确保元素按照他们插入的顺序排序
# index变量也在相同优先级元素比较的时候起到重要作用

先假定Item是不支持排序的

In [11]:
q = PriorityQueue()
q.push(Item('foo'), 1), q.push(Item("bar"), 5), q.push(Item("spam"), 4) ,q.push(Item("grok"), 1)
q.pop(), q.pop(), q.pop(), q.pop()

(Item('bar'), Item('spam'), Item('foo'), Item('grok'))

In [23]:
a = Item("foo")
b = Item("bar")
a<b

TypeError: '<' not supported between instances of 'Item' and 'Item'

In [24]:
a = (1, Item("foo"))
b = (5, Item("bar"))
a < b

True

In [26]:
c = (1, Item('grok'))
a < c

TypeError: '<' not supported between instances of 'Item' and 'Item'

In [27]:
a = (1, 0, Item('foo'))
b = (5, 1, Item('bar'))
c = (1, 2, Item('grok'))
a < b

True

In [28]:
a < c

True

1.6 字典中的键映射多个值

怎样实现一个键对应多个值的字典（也叫multidict）

In [1]:
d = {"a": [1, 2, 3], "b": [4, 5]}
e = {"a": {1,2,3}, "b": {4, 5}}
# 如果你想保持元素的插入顺序就应该使用列表
# 如果想去掉重复元素就使用集合（不关心元素的顺序问题）
# defaultdict的一个特征是他会自动初始化每个key刚开始对应的值，所以只关心添加元素操作

from collections import defaultdict

d = defaultdict(list)
d['a'].append(1)
d['a'].append(2)
d['b'].append(4)

d = defaultdict(set)
d['a'].add(1)
d['a'].add(2)
d['b'].add(4)
print(d)

a = {}
a.setdefault("a", []).append(1)
a.setdefault("a", []).append(2)
a.setdefault("b", []).append(4)
print(a)


defaultdict(<class 'set'>, {'a': {1, 2}, 'b': {4}})
{'a': [1, 2], 'b': [4]}


一般来讲，创建一个多值映射字典是简单的，但是，如果你选择自己实现的话
那么对于值的初始化可能会有点麻烦，你可能像下面这样来实现

In [2]:
d = {}
for key, value in pairs:
    if key not in d:
        d[key] = []
    d[key].append(value) # list.append(value)
# or
d = defaultdict(list)
for key, value in pairs:
    d[key].append(value)

NameError: name 'pairs' is not defined

1.7字典排序

你想创建一个字典，并且在迭代和序列这个字典的时候能够控制元素的顺序

OrderedDict在迭代操作的时候它会保持元素被插入时的顺序，每次当一个新的元素插入进来的时候，它会被放到链表的维度，对于一个已经存在的键的重复赋值不会改变键的顺序

In [1]:
from collections import OrderedDict

d = OrderedDict()
d['foo'] = 1
d['bar'] = 2
d['spam'] = 3
d['grok'] = 4

for key in d:
    print(key, d[key])
    
import json
json.dumps(d)


foo 1
bar 2
spam 3
grok 4


'{"foo": 1, "bar": 2, "spam": 3, "grok": 4}'

1.8字典的运算

怎样在数据字典中执行一些计算操作（比如最小、最大、排序等）

In [3]:
price = {
    "ACME": 45.24,
    "AAPL": 612.78,
    "IBM": 205.55,
    "HPQ": 37.20,
    "FB": 10.75
}
min_price = min(zip(price.values(), prices.keys()))
max_price = max(zip(price.values(), prices.keys()))

prices_sorted = sorted(zip(price.values(), price.keys()))

print(min_price)
print(max_price)
print(prices_sorted)

(10.75, 'FB')
(612.78, 'AAPL')
[(10.75, 'FB'), (37.2, 'HPQ'), (45.24, 'ACME'), (205.55, 'IBM'), (612.78, 'AAPL')]


In [6]:
prices_and_names = zip(price.values(), prices.keys())
print(prices_and_names)
print(prices_and_names)

<zip object at 0x0000026D0DE20948>
<zip object at 0x0000026D0DE20948>


In [7]:
min(price)
max(price)

min(price.values())
max(price.values())

min(price, key=lambda k: prices[k])
max(price, key=lambda k: prices[k])

min_value = price[min(price, key = lambda k: price[k])]

1.9查找两字典的相同点

怎样在两个字典中寻找相同点（比如相同的键或者相同的值）?

In [8]:
a = {
    "x": 1,
    "y": 2,
    "z": 3
}

b = {
    "w":10,
    "x": 11,
    "y": 2,
}
#可以在keys或者items方法返回结果上执行集合操作
#这些操作可以用于修改或者过滤字典元素，比如，假如你想以现有字典
#构造排除几个指定键的新字典，用字典推导实现这样的需求
#字典的keys方法返回一个展现键集合的键视图对象，支持集合操作
#字典的items方法返回一个包含键值对的元素视图对象，这个对象支持也支持集合操作
#values方法也是类似，但是并不支持这里介绍的集合操作


In [9]:
a.keys() & b.keys() # 交集

{'x', 'y'}

In [10]:
a.keys() - b.keys() # 差集

{'z'}

In [11]:
a.items() & b.items()

{('y', 2)}

In [15]:
c = {key: a[key] for key in a.keys() - {"z", "w"}} # 排除z w键
c

{'y': 2, 'x': 1}

In [14]:
c

{'y': 2, 'x': 1}

1.10 删除序列相同元素并保持顺序

怎样在一个序列上面保持元素顺序的同时消失重复的值

In [16]:
def dedupe(items):
    seen = set()
    for item in items:
        if item not in seen:
            yield item
            seen.add(item)
a = [1, 5, 2, 1, 9, 1, 5, 10]
list(dedupe(a))


[1, 5, 2, 9, 10]

In [3]:
def dedupe(items, key=None): # key is a function
    seen = set()
    for item in items:
        val = item if key is None else key(item)
        if val not in seen:
            yield item
            seen.add(val)
            
a = [{"x":1, "y":2}, {"x":1, "y":3}, {"x":1, "y":2}, {"x":2, "y":4}]
list(dedupe(a, key=lambda d: (d["x"], d["y"])))

[{'x': 1, 'y': 2}, {'x': 1, 'y': 3}, {'x': 2, 'y': 4}]

In [4]:
list(dedupe(a, key=lambda d:d["x"]))

[{'x': 1, 'y': 2}, {'x': 2, 'y': 4}]

讨论：如果你仅仅就是想消除重复元素，通常可以简单的构造一个集合

In [5]:
a=[1, 5, 2, 1, 9, 1,5, 10]
set(a)

{1, 2, 5, 9, 10}

这种方法不能维护元素的顺序，生成的结果中的元素位置被打乱，而上面的方法可以
避免这种情况

# 1.11命名切片

内置的slice()函数创建了一个切片对象，可以被用在任何切片允许使用的地方

In [1]:
items = [0, 1, 2, 3, 4, 5, 6]
a = slice(2, 4)
items[2:4], items[a]

([2, 3], [2, 3])

In [2]:
items[a] = [10,11]
items

[0, 1, 10, 11, 4, 5, 6]

In [3]:
del items[a]

In [4]:
items

[0, 1, 4, 5, 6]

In [1]:
a = slice(5, 50, 2)
a.start, a.stop, a.step

(5, 50, 2)

In [2]:
s = "HelloWorld"
a.indices(len(s)) # (5,10,2)
for i in range(*a.indices(len(s))):
    print(s[i])

W
r
d


In [3]:
a.indices(len(s)) # ?

(5, 10, 2)

In [4]:
for i in range(*a.indices(len(s))):
    print(s[i])

W
r
d


1.12序列中出现次数最多的元素

问题：
怎样找出一个序列中出现次数最多的元素呢？

In [6]:
words = [
    "look", "into", "my", "eyes", "look", "into", "my", "eyes",
    "the", "eyes", "the", "eyes", "the", "eyes", "not", "around", "the",
    "eyes", "don`t", "look", "around", "the", "eyes", "look", "into", "my", "eyes",
    "you`re", "under"]

from collections import Counter
word_counts = Counter(words)

top_three = word_counts.most_common(3)
print(top_three)

[('eyes', 8), ('the', 5), ('look', 4)]


In [7]:
word_counts["not"],word_counts['eyes']

(1, 8)

In [None]:
morewords = ["why", "are", "you", "not", "looking", "in", "my", "eyes"]

a = Counter(words)
b = Counter(morewords)

