## 1.1解压序列赋值给多个变量

In [1]:
p = (4, 5)
x, y = p
x, y 

(4, 5)

In [2]:
data = ["ACME", 50, 91.1, (2012, 12, 21)]
name, shares, price, date = data
name, date

('ACME', (2012, 12, 21))

In [3]:
name, shares, price ,(year, mon, day) = data
name,year, mon, day

('ACME', 2012, 12, 21)

In [4]:
p = (4, 5)
x, y , z = p

ValueError: not enough values to unpack (expected 3, got 2)

In [None]:
s = "Hello"
a, b ,c ,d, e = s
a,b,c

In [None]:
_, shares, price, _ = data
shares, price

1.2解压可迭代对象赋值给多个变量

问题：如果一个可迭代对象的元素个数超过变量个数时，会抛出一个ValueError,
那么怎么才能从这个可迭代对象中解压出N个元素出来？

In [None]:
def drop_first_last(grades):
    first, *middle, last = grades
    return avg(middle)

record = ("Dave", "dave@example.com", '773-555-121', "847-555-1212")
name, email, *phone_numbers = record
name, email, phone_numbers # type(phone_numbers) = type([])

In [None]:
*trailing, current = [10, 8, 7, 1, 9, 5, 10, 3]
trailing, current

星号表达式在迭代元素为可变长元组的序列时是很有用的

In [None]:
records = [("foo", 1, 2), ("bar", "hello"), ("foo", 3, 4)]
def do_foo(x, y):
    print("foo", x, y)
    
def do_bar(s):
    print("bar", s)
    
for tag, *args in records:
    if tag == "foo":
        do_foo(*args)
    elif tag == "bar":
        do_bar(*args)

In [None]:
line = "noboby:*:-2:-2:Unprivileged User:/var/empty:/usr/bin/false"
uname, * fields, homedir, sh = line.split(":")
uname, homedir, sh

In [None]:
record = ("ACME", 50, 123.45, (12, 18, 2012))
name, *_, (*_, year) = record
name, year

In [None]:
items = [1, 10, 7, 4, 5, 9]
head, *tail = items
head, tail

In [None]:
def sum(items):
    head, *tail = items
    return head + sum(tail) if tail else head
sum(items)

1.3 保留最后N个元素

在迭代操作或者其他操作的时候，怎样只保留最后几个元素的历史记录

In [11]:
from collections import deque

#deque(maxlen=N) 构造函数会新建一个固定大小的队列，当新的元素加入并且这个队列
#已满的时候，最老的元素会自动被移除掉

def search(lines, pattern, history=5):
    previous_lines = deque(maxlen=history)
    for line in lines: 
        if pattern in line:
            yield line, previous_lines # 生成器件4.3节
        previous_lines.append(line)
        
if __name__ == "__main__":
    with open("somefile.txt") as f:
        for line, prevlines in search(f, "python", 5):
            for pline in prevlines:
                print(pline)
                print(line, end="")
                print("-" * 20)

Keeping a limited history is a perfect use for a `collections.deque`.

[source,python]
--------------------
For example, the following code performs a simple text match on a

[source,python]
--------------------
sequence of lines and prints the matching line along with the previous

[source,python]
--------------------
N lines of context when found:

[source,python]
--------------------


[source,python]
--------------------
        previous_lines.append(line)

         search(f, 'python', 5)
--------------------


         search(f, 'python', 5)
--------------------
# Example use on a file

         search(f, 'python', 5)
--------------------
if __name__ == '__main__':

         search(f, 'python', 5)
--------------------
    with open('somefile.txt') as f:

         search(f, 'python', 5)
--------------------


1.4 查找最大或最小的N个元素

怎样从一个集合中获得最大或者最小的N个元素列表

In [1]:
import heapq
nums = [1, 8, 2, 23, 7, -4, 18, 23, 42, 37, 2]
print(heapq.nlargest(3, nums))
print(heapq.nsmallest(3, nums))

[42, 37, 23]
[-4, 1, 2]


In [21]:
portfolio = [ 
    {"name": "IBM", "shares": 100, "price":91.1},
    {"name": "AAPL", "shares": 50, "price":543.22},
    {"name": "FB", "shares": 200, "price":21.09},
    {"name": "HPQ", "shares": 35, "price":31.75},
    {"name": "YHOO", "shares": 45, "price":16.35},
    {"name": "ACME", "shares": 75, "price":115.65}
]
cheap = heapq.nsmallest(3, portfolio, key=lambda s: s ["price"])
expensive = heapq.nlargest(3, portfolio, key=lambda s: s["price"])

In [22]:
cheap,expensive

([{'name': 'YHOO', 'shares': 45, 'price': 16.35},
  {'name': 'FB', 'shares': 200, 'price': 21.09},
  {'name': 'HPQ', 'shares': 35, 'price': 31.75}],
 [{'name': 'AAPL', 'shares': 50, 'price': 543.22},
  {'name': 'ACME', 'shares': 75, 'price': 115.65},
  {'name': 'IBM', 'shares': 100, 'price': 91.1}])

In [8]:
nums = [1, 8, 2, 23, 7, -4, 18, 23, 42, 37, 2]
heapq.heapify(nums)
print(nums)
# 堆数据结构最重要的特征式heap[0]永远是最小的元素，并且剩余的元素可以很容易
# 通过heaqp.heapop()方法得到，该方法先将第一个元素弹出来，然后
# 用下一个最小的元素来取代被弹出元素，例如，如果想要查找最小的3个元素
# 你可以这样做
print(nums[0])
print(heapq.heappop(nums))
print(heapq.heappop(nums))
print(heapq.heappop(nums))

[-4, 2, 1, 23, 7, 2, 18, 23, 42, 37, 8]
-4
-4
1
2


1.5 实现一个优先级队列

问题：怎样实现一个按优先级排序的队列？并且在这个队列上面每次pop操作总是返回优先级最高的那个元素

In [9]:
import heapq

class PriorityQueue:
    def __init__(self):
        self._queue = []
        self._index = 0
        
    def push(self, item, priority):
        heapq.heappush(self._queue, (-priority, self._index, item))
        self._index += 1
        
    def pop(self):
        return heapq.heappop(self._queue)[-1]
    
class Item:
    def __init__(self, name):
        self.name = name
    
    def __repr__(self):
        return "Item({!r})".format(self.name)
    
# 第一个pop操作返回优先级最高的元素，另外注意到如果两个有着相同优先级的元素
# （foo和grok），pop操作按照他们被插入到队列的顺序返回的
# heappop()函数总是返回最小的元素，保证队列pop操作返回正确元素的关键
# heapq.heappush() heapq.heappop()分别在队列_queue上插入和删除第一个元素，
# 并且队列_queue保证第一个元素拥有最高优先级
# (=priority,index,item) 优先级为负数目的使得元素按照优先级从高到低排序，这个跟普通的按优先级从低到高排序的堆排序恰巧相反
#index变量的作用是保证同等优先级元素的正确排序，通过保存一个不断增加的index变量，可以确保元素按照他们插入的顺序排序
# index变量也在相同优先级元素比较的时候起到重要作用

先假定Item是不支持排序的

In [11]:
q = PriorityQueue()
q.push(Item('foo'), 1), q.push(Item("bar"), 5), q.push(Item("spam"), 4) ,q.push(Item("grok"), 1)
q.pop(), q.pop(), q.pop(), q.pop()

(Item('bar'), Item('spam'), Item('foo'), Item('grok'))

In [23]:
a = Item("foo")
b = Item("bar")
a<b

TypeError: '<' not supported between instances of 'Item' and 'Item'

In [24]:
a = (1, Item("foo"))
b = (5, Item("bar"))
a < b

True

In [26]:
c = (1, Item('grok'))
a < c

TypeError: '<' not supported between instances of 'Item' and 'Item'

In [27]:
a = (1, 0, Item('foo'))
b = (5, 1, Item('bar'))
c = (1, 2, Item('grok'))
a < b

True

In [28]:
a < c

True

In [2]:
rows = [
    {"fname": "Brian", "lname": "Jones","uid":1003},
    {"fname": "David", "lname":"Beazley", "uid":1002},
    {"fname": "John", "lname": "Cleese", "uid": 1001},
    {"fname": "Big", "lname": "Jones", "uid":1004}
]

from operator import itemgetter
"""
rows被传递给接受一个关键字参数的sorted()内置参数，这个参数是callable类型，并且从rows中接受一个单一元素，
然后返回被用来排序的值
itemgetter函数负责创建这个callable对象的
"""
rows_by_fname = sorted(rows, key=itemgetter('fname'))
rows_by_uid = sorted(rows, key=itemgetter('uid'))

print(rows_by_fname), print(rows_by_uid)

[{'fname': 'Big', 'lname': 'Jones', 'uid': 1004}, {'fname': 'Brian', 'lname': 'Jones', 'uid': 1003}, {'fname': 'David', 'lname': 'Beazley', 'uid': 1002}, {'fname': 'John', 'lname': 'Cleese', 'uid': 1001}]
[{'fname': 'John', 'lname': 'Cleese', 'uid': 1001}, {'fname': 'David', 'lname': 'Beazley', 'uid': 1002}, {'fname': 'Brian', 'lname': 'Jones', 'uid': 1003}, {'fname': 'Big', 'lname': 'Jones', 'uid': 1004}]


(None, None)

In [3]:
rows_by_lfname = sorted(rows, key=itemgetter('lname', 'fname'))
print(rows_by_lfname)

[{'fname': 'David', 'lname': 'Beazley', 'uid': 1002}, {'fname': 'John', 'lname': 'Cleese', 'uid': 1001}, {'fname': 'Big', 'lname': 'Jones', 'uid': 1004}, {'fname': 'Brian', 'lname': 'Jones', 'uid': 1003}]


In [5]:
rows_by_fname = sorted(rows, key=lambda r: r['fname'])
rows_by_lfname = sorted(rows, key=lambda r: (r['lname'], r['fname']))

min(rows, key=itemgetter('uid')), max(rows, key=itemgetter('uid'))

({'fname': 'John', 'lname': 'Cleese', 'uid': 1001},
 {'fname': 'Big', 'lname': 'Jones', 'uid': 1004})

# 1.14 排序不支持原生比较对象

你想排序类型相同的对象，但是他们不支持原生的比较操作

In [6]:
class User:
    def __init__(self, user_id):
        self.user_id = user_id
        
    def __repr__(self):
        return "User({})".format(self.user_id)
    
def sort_notcompare():
    users = [User(23), User(3), User(99)]
    print(users)
    print(sorted(users, key=lambda u: u.user_id))

In [7]:
from operator import attrgetter
sorted(users, key=attrgetter('user_id'))

NameError: name 'users' is not defined

## 1.15 通过某个字段讲记录分组

问题：你有一个字典或者实例的序列，然后你想根据某个特定的字段比如date来分组迭代访问

解决方案：itertools.groupby()函数对于这样的数据分组操作非常实用

In [9]:
rows= [
    {'address': '5412 NCLARK', 'date': '07/01/2012'},
    {'address': '5148 NCLARK', 'date': '07/04/2012'},
    {'address': '5800 E58TH', 'date': '07/02/2012'},
    {'address': '2122 NCLARK', 'date': '07/03/2012'},
    {'address': '5645 NRAVENSWOOD', 'date':'07/02/2012'},
    {'address': '1060 WADDISON', 'date': '07/02/2012'},
    {'address': '4801 NBROADWAY', 'date': '07/01/2012'},
    {'address': '1039 WGRANVILLE', 'date':'07/04/2012'},
]

In [11]:
from operator import itemgetter
from itertools import groupby

#Sort by the desired field first
rows.sort(key=itemgetter('date'))

for date, items in groupby(rows, key=itemgetter('date')):
    print(date)
    for i in items:
        print('    ', i)

07/01/2012
     {'address': '5412 NCLARK', 'date': '07/01/2012'}
     {'address': '4801 NBROADWAY', 'date': '07/01/2012'}
07/02/2012
     {'address': '5800 E58TH', 'date': '07/02/2012'}
     {'address': '5645 NRAVENSWOOD', 'date': '07/02/2012'}
     {'address': '1060 WADDISON', 'date': '07/02/2012'}
07/03/2012
     {'address': '2122 NCLARK', 'date': '07/03/2012'}
07/04/2012
     {'address': '5148 NCLARK', 'date': '07/04/2012'}
     {'address': '1039 WGRANVILLE', 'date': '07/04/2012'}


groupby（）函数扫描整个序列并且查找连续相同值或者根据指定key函数返回值相同的元素序列。在每次迭代的时候，
它会返回一个值和一个迭代器对象，这个迭代器对象可以生成元素值全部等于上面那个值得组中所有对象

一个非常重要的准备步骤是要根据指定的字段讲数据排序，因为groupby仅仅检查连续的元素，如果实现实现并没有排序完成的话，
分组函数将得不到想要的结果


In [12]:
from collections import defaultdict
rows_by_date = defaultdict(list)
for row in rows:
    rows_by_date[row['date']].append(row)

In [13]:
rows_by_date

defaultdict(list,
            {'07/01/2012': [{'address': '5412 NCLARK', 'date': '07/01/2012'},
              {'address': '4801 NBROADWAY', 'date': '07/01/2012'}],
             '07/02/2012': [{'address': '5800 E58TH', 'date': '07/02/2012'},
              {'address': '5645 NRAVENSWOOD', 'date': '07/02/2012'},
              {'address': '1060 WADDISON', 'date': '07/02/2012'}],
             '07/03/2012': [{'address': '2122 NCLARK', 'date': '07/03/2012'}],
             '07/04/2012': [{'address': '5148 NCLARK', 'date': '07/04/2012'},
              {'address': '1039 WGRANVILLE', 'date': '07/04/2012'}]})

In [14]:
for r in rows_by_date['07/01/2012']:
    print(r)

{'address': '5412 NCLARK', 'date': '07/01/2012'}
{'address': '4801 NBROADWAY', 'date': '07/01/2012'}


在上面这个例子中，我们没有必要先将记录排序，因此，对内存占用不是很关心，这种方式会比先排序再通过groupby函数
迭代的方式运行更快一些