In [None]:
# 现在有一个包含 N 个元素的元组或者是序列，怎样将它里面的值解压后同时赋值给 N 个变量
s = 'Hello'
a, b, c, d, e = s
print(a, b, c, d, e)

In [2]:
data = ['ACE', 50, 91, (2012, 12, 21)]
_, shares, price, _ = data
print(shares, price)

50 91


In [3]:
line = 'nobody:*:-2:-2:Unprivileged User:/var/empty:/usr/bin/false'
uname, *fields, homedir, sh = line.split(":")
print(uname, homedir, sh)

nobody /var/empty /usr/bin/false


In [5]:
# 能用这种分割语法去巧妙的实现递归算法
items = [1, 10, 7, 4, 5, 9]
def sum(items):
    head, *tail = items
    return head + sum(tail) if tail else head

sum = sum(items)
print(sum)

36


In [3]:
# 在迭代操作或者其他操作的时候，怎样只保留最后有限几个元素的历史记录
# 保留有限历史记录正是 collections.deque 大显身手的时候
from collections import deque

q = deque(maxlen=3)
q.append(1)
q.append(2)
q.append(3)
print(q)
q.append(4)
print(q)
q.append(5)
print(q)
# 在队列两端插入或删除元素时间复杂度都是 O(1) 
# 区别于列表，在列表的开头插入或删除元素的时间复杂度为 O(N)
q.appendleft(10)
print(q)
q.pop()
print(q)
q.popleft()
print(q)

deque([1, 2, 3], maxlen=3)
deque([2, 3, 4], maxlen=3)
deque([3, 4, 5], maxlen=3)
deque([10, 3, 4], maxlen=3)
deque([10, 3], maxlen=3)
deque([3], maxlen=3)


In [4]:
# 怎样从一个集合中获得最大或者最小的 N 个元素列表
import heapq
nums = [1, 8, 2, 23, 7, -4, 18, 23, 42, 37, 2]
print(heapq.nlargest(3, nums))
print(heapq.nsmallest(3, nums))

[42, 37, 23]
[-4, 1, 2]


In [None]:
import heapq
portfolio = [
    {'name': 'IBM', 'shares': 100, 'price': 91.1},
    {'name': 'AAPL', 'shares': 50, 'price': 543.22},
    {'name': 'FB', 'shares': 200, 'price': 21.09},
    {'name': 'HPQ', 'shares': 35, 'price': 31.75},
    {'name': 'YHOO', 'shares': 45, 'price': 16.35},
    {'name': 'ACME', 'shares': 75, 'price': 115.65}
]
cheap = heapq.nsmallest(3, portfolio, key=lambda s: s['price'])
expensive = heapq.nlargest(3, portfolio, key=lambda s: s['price'])
print(cheap, expensive)

[{'name': 'YHOO', 'shares': 45, 'price': 16.35}, {'name': 'FB', 'shares': 200, 'price': 21.09}, {'name': 'HPQ', 'shares': 35, 'price': 31.75}] [{'name': 'AAPL', 'shares': 50, 'price': 543.22}, {'name': 'ACME', 'shares': 75, 'price': 115.65}, {'name': 'IBM', 'shares': 100, 'price': 91.1}]


In [16]:
nums = [1, 8, 2, 23, 7, -4, 18, 23, 42, 37, 2]
import heapq
heap = list(nums)
heapq.heapify(heap)
print(heap)
# 堆数据结构最重要的特征是 heap[0] 永远是最小的元素
# 并且剩余的元素可以很容易的通过调用 heapq.heappop() 方法得到
for _ in range(3):
    print(heapq.heappop(heap))

[-4, 2, 1, 23, 7, 2, 18, 23, 42, 37, 8]
-4
1
2


In [22]:
# 实现一个优先级队列
# 并且在这个队列上面每次 pop 操作总是返回优先级最高的那个元素
import heapq

class PriorityQueue:
    def __init__(self):
        self._queue = []
        self._index = 0

    def push(self, item, priority):
        heapq.heappush(self._queue, (-priority, self._index, item))
        self._index += 1

    def pop(self):
        return heapq.heappop(self._queue)[-1]
    
class Item:
    def __init__(self, name):
        self.name = name
    def __repr__(self):
        return 'Item({!r})'.format(self.name)

q = PriorityQueue()
q.push(Item('foo'), 1)
q.push(Item('bar'), 5)
q.push(Item('spam'), 4)
q.push(Item('grok'), 1)
for _ in range(3):
    print(q.pop())  
# 只要两个元素的优先级不同就能比较
# 但是如果两个元素优先级一样的话，那么比较操作就会跟之前一样出错
a = (1, Item('foo'))
b = (5, Item('bar'))
a < b
c = (1, Item('grok'))
a < c # TypeError: '<' not supported between instances of 'Item' and 'Item'
# index 变量组成三元组 (priority, index, item) 就能很好的避免上面的错误

Item('bar')
Item('spam')
Item('foo')


In [24]:
# 怎样实现一个键对应多个值的字典（也叫 multidict）
from collections import defaultdict

d = defaultdict(list)
d['a'].append(1)
d['a'].append(2)
d['b'].append(4)
print(d)
d = defaultdict(set)
d['a'].add(1)
d['a'].add(2)
d['b'].add(4)
print(d)

defaultdict(<class 'list'>, {'a': [1, 2], 'b': [4]})
defaultdict(<class 'set'>, {'a': {1, 2}, 'b': {4}})


In [33]:
# 怎样在数据字典中执行一些计算操作（比如求最小值、最大值、排序等等）
prices = {
    'ACME': 45.23,
    'AAPL': 612.78,
    'IBM': 205.55,
    'HPQ': 37.20,
    'FB': 10.75
}
# 使用 zip() 函数先将键和值反转过来
min_price = min(zip(prices.values(), prices.keys())) # min_price is (10.75, 'FB')
max_price = max(zip(prices.values(), prices.keys())) # max_price is (612.78, 'AAPL')
prices_sorted = sorted(zip(prices.values(), prices.keys()))
print(min_price, max_price, prices_sorted)
print(min(prices, key=lambda k: prices[k])) # Returns 'FB'
print(max(prices, key=lambda k: prices[k])) # Returns 'AAPL'

(10.75, 'FB') (612.78, 'AAPL') [(10.75, 'FB'), (37.2, 'HPQ'), (45.23, 'ACME'), (205.55, 'IBM'), (612.78, 'AAPL')]
FB
AAPL


In [30]:
# 你想创建一个字典，并且在迭代或序列化这个字典的时候能够控制元素的顺序。
# 在迭代操作的时候它会保持元素被插入时的顺序
from collections import OrderedDict

d = OrderedDict()
d['foo'] = 1
d['bar'] = 2
d['spam'] = 3
d['grok'] = 4
# Outputs "foo 1", "bar 2", "spam 3", "grok 4"
for key in d:
    print(key, d[key])
import json
json.dumps(d)
print(d)

foo 1
bar 2
spam 3
grok 4
OrderedDict([('foo', 1), ('bar', 2), ('spam', 3), ('grok', 4)])


In [36]:
# 怎样在两个字典中寻寻找相同点（比如相同的键、相同的值等等）
a = {
    'x' : 1,
    'y' : 2,
    'z' : 3
}

b = {
    'w' : 10,
    'x' : 11,
    'y' : 2
}
print(a.keys() & b.keys())
print(a.keys() - b.keys())
print(a.items() & b.items())
# 假如你想以现有字典构造一个排除几个指定键的新字典
c = {key:a[key] for key in a.keys() - {'z', 'w'}}
print(c)

{'x', 'y'}
{'z'}
{('y', 2)}
{'x': 1, 'y': 2}


In [37]:
# 怎样在一个序列上面保持元素顺序的同时消除重复的值？
def dedupe(items):
    seen = set()
    for item in items:
        if item not in seen:
            yield item
            seen.add(item)
a = [1, 5, 2, 1, 9, 1, 5, 10]
list(dedupe(a))

[1, 5, 2, 9, 10]

In [39]:
# 想消除元素不可哈希（比如 dict 类型）的序列中重复元素
def dedupe(items, key=None):
    seen = set()
    for item in items:
        val = item if key is None else key(item)
        if val not in seen:
            yield item
            seen.add(val)
a = [ {'x':1, 'y':2}, {'x':1, 'y':3}, {'x':1, 'y':2}, {'x':2, 'y':4}]
print(list(dedupe(a, key=lambda d: (d['x'],d['y']))))
print(list(dedupe(a, key=lambda d: d['x'])))

[{'x': 1, 'y': 2}, {'x': 1, 'y': 3}, {'x': 2, 'y': 4}]
[{'x': 1, 'y': 2}, {'x': 2, 'y': 4}]


In [40]:
# 如果你的程序包含了大量无法直视的硬编码切片，并且你想清理一下代码
######    0123456789012345678901234567890123456789012345678901234567890'
record = '....................100 .......513.25 ..........'
cost = int(record[20:23]) * float(record[31:37])
SHARES = slice(20, 23)
PRICE = slice(31, 37)
cost = int(record[SHARES]) * float(record[PRICE])
print(cost)

51325.0


In [42]:
items = [0, 1, 2, 3, 4, 5, 6]
a = slice(2, 4)
print(a)
items[2:4]

slice(2, 4, None)


[2, 3]

In [43]:
a = slice(5, 50, 2)
print(a.start, a.stop, a.step)

5 50 2


In [44]:
s = 'HelloWorld'
print( a.indices(len(s)))
for i in range(*a.indices(len(s))):
    print(s[i])

(5, 10, 2)
W
r
d


In [46]:
# 怎样找出一个序列中出现次数最多的元素呢？
words = [
    'look', 'into', 'my', 'eyes', 'look', 'into', 'my', 'eyes',
    'the', 'eyes', 'the', 'eyes', 'the', 'eyes', 'not', 'around', 'the',
    'eyes', "don't", 'look', 'around', 'the', 'eyes', 'look', 'into',
    'my', 'eyes', "you're", 'under'
]
from collections import Counter
word_counts = Counter(words)
# 出现频率最高的3个单词
top_three = word_counts.most_common(3)
print(top_three)
# Outputs [('eyes', 8), ('the', 5), ('look', 4)]
print(word_counts['not'], word_counts['eyes'])

[('eyes', 8), ('the', 5), ('look', 4)]
1 8


In [47]:
morewords = ['why','are','you','not','looking','in','my','eyes']
for word in morewords:
    word_counts[word] += 1

word_counts['eyes']
# 或者你可以使用 update() 方法  word_counts.update(morewords)

9

In [49]:
# Counter 实例一个鲜为人知的特性是它们可以很容易的跟数学运算操作相结合
a = Counter(words)
b = Counter(morewords)
print(a, b)
c = a + b
print(c)
d = a - b
print(d)

Counter({'eyes': 8, 'the': 5, 'look': 4, 'into': 3, 'my': 3, 'around': 2, 'not': 1, "don't": 1, "you're": 1, 'under': 1}) Counter({'why': 1, 'are': 1, 'you': 1, 'not': 1, 'looking': 1, 'in': 1, 'my': 1, 'eyes': 1})
Counter({'eyes': 9, 'the': 5, 'look': 4, 'my': 4, 'into': 3, 'not': 2, 'around': 2, "don't": 1, "you're": 1, 'under': 1, 'why': 1, 'are': 1, 'you': 1, 'looking': 1, 'in': 1})
Counter({'eyes': 7, 'the': 5, 'look': 4, 'into': 3, 'my': 2, 'around': 2, "don't": 1, "you're": 1, 'under': 1})


In [51]:
# 你有一个字典列表，你想根据某个或某几个字典字段来排序这个列表
rows = [
    {'fname': 'Brian', 'lname': 'Jones', 'uid': 1003},
    {'fname': 'David', 'lname': 'Beazley', 'uid': 1002},
    {'fname': 'John', 'lname': 'Cleese', 'uid': 1001},
    {'fname': 'Big', 'lname': 'Jones', 'uid': 1004}
]
from operator import itemgetter
rows_by_fname = sorted(rows, key=itemgetter('fname'))
rows_by_uid = sorted(rows, key=itemgetter('uid'))
print(rows_by_fname)
print(rows_by_uid)
# itemgetter() 函数也支持多个 keys
rows_by_lfname = sorted(rows, key=itemgetter('lname','fname'))
print(rows_by_lfname)

[{'fname': 'Big', 'lname': 'Jones', 'uid': 1004}, {'fname': 'Brian', 'lname': 'Jones', 'uid': 1003}, {'fname': 'David', 'lname': 'Beazley', 'uid': 1002}, {'fname': 'John', 'lname': 'Cleese', 'uid': 1001}]
[{'fname': 'John', 'lname': 'Cleese', 'uid': 1001}, {'fname': 'David', 'lname': 'Beazley', 'uid': 1002}, {'fname': 'Brian', 'lname': 'Jones', 'uid': 1003}, {'fname': 'Big', 'lname': 'Jones', 'uid': 1004}]
[{'fname': 'David', 'lname': 'Beazley', 'uid': 1002}, {'fname': 'John', 'lname': 'Cleese', 'uid': 1001}, {'fname': 'Big', 'lname': 'Jones', 'uid': 1004}, {'fname': 'Brian', 'lname': 'Jones', 'uid': 1003}]


In [52]:
# itemgetter() 有时候也可以用 lambda 表达式代替
rows_by_fname = sorted(rows, key=lambda r: r['fname'])
rows_by_lfname = sorted(rows, key=lambda r: (r['lname'],r['fname']))
print(rows_by_fname, rows_by_lfname)

[{'fname': 'Big', 'lname': 'Jones', 'uid': 1004}, {'fname': 'Brian', 'lname': 'Jones', 'uid': 1003}, {'fname': 'David', 'lname': 'Beazley', 'uid': 1002}, {'fname': 'John', 'lname': 'Cleese', 'uid': 1001}] [{'fname': 'David', 'lname': 'Beazley', 'uid': 1002}, {'fname': 'John', 'lname': 'Cleese', 'uid': 1001}, {'fname': 'Big', 'lname': 'Jones', 'uid': 1004}, {'fname': 'Brian', 'lname': 'Jones', 'uid': 1003}]


In [53]:
a = min(rows, key=itemgetter('uid'))
b = max(rows, key=itemgetter('uid'))
print(a, b)

{'fname': 'John', 'lname': 'Cleese', 'uid': 1001} {'fname': 'Big', 'lname': 'Jones', 'uid': 1004}


In [58]:
# 你想排序类型相同的对象，但是他们不支持原生的比较操作
class User:
    def __init__(self, user_id):
        self.user_id = user_id

    def __repr__(self):
        return 'User({})'.format(self.user_id)


def sort_notcompare():
    users = [User(23), User(3), User(99)]
    print(users)
    print(sorted(users, key=lambda u: u.user_id))
sort_notcompare()

[User(23), User(3), User(99)]
[User(3), User(23), User(99)]


In [62]:
from operator import attrgetter
users = [User(23), User(3), User(99)]
print(sorted(users, key=attrgetter('user_id')))
# by_name = sorted(users, key=attrgetter('last_name', 'first_name'))
min(users, key=attrgetter('user_id'))

[User(3), User(23), User(99)]


User(3)

In [63]:
# 你有一个字典或者实例的序列，然后你想根据某个特定的字段
# 比如 date 来分组迭代访问
rows = [
    {'address': '5412 N CLARK', 'date': '07/01/2012'},
    {'address': '5148 N CLARK', 'date': '07/04/2012'},
    {'address': '5800 E 58TH', 'date': '07/02/2012'},
    {'address': '2122 N CLARK', 'date': '07/03/2012'},
    {'address': '5645 N RAVENSWOOD', 'date': '07/02/2012'},
    {'address': '1060 W ADDISON', 'date': '07/02/2012'},
    {'address': '4801 N BROADWAY', 'date': '07/01/2012'},
    {'address': '1039 W GRANVILLE', 'date': '07/04/2012'},
]
from operator import itemgetter
from itertools import groupby

# Sort by the desired field first
rows.sort(key=itemgetter('date'))
# Iterate in groups
for date, items in groupby(rows, key=itemgetter('date')):
    print(date)
    for i in items:
        print(' ', i)

07/01/2012
  {'address': '5412 N CLARK', 'date': '07/01/2012'}
  {'address': '4801 N BROADWAY', 'date': '07/01/2012'}
07/02/2012
  {'address': '5800 E 58TH', 'date': '07/02/2012'}
  {'address': '5645 N RAVENSWOOD', 'date': '07/02/2012'}
  {'address': '1060 W ADDISON', 'date': '07/02/2012'}
07/03/2012
  {'address': '2122 N CLARK', 'date': '07/03/2012'}
07/04/2012
  {'address': '5148 N CLARK', 'date': '07/04/2012'}
  {'address': '1039 W GRANVILLE', 'date': '07/04/2012'}


In [65]:
from collections import defaultdict
rows_by_date = defaultdict(list)
for row in rows:
    rows_by_date[row['date']].append(row)
for r in rows_by_date['07/01/2012']:
    print(r)

{'address': '5412 N CLARK', 'date': '07/01/2012'}
{'address': '4801 N BROADWAY', 'date': '07/01/2012'}


In [69]:
# 你有一个数据序列，想利用一些规则从中提取出需要的值或者是缩短序列
mylist = [1, 4, -5, 10, -7, 2, 3, -1]
# 最简单的过滤序列元素的方法就是使用列表推导
a = [n for n in mylist if n > 0]
b = [n for n in mylist if n < 0]
print(a, b)
# 使用列表推导的一个潜在缺陷就是如果输入非常大的时候会产生一个非常大的结果集，占用大量内存
# 使用生成器表达式迭代产生过滤的元素
pos = (n for n in mylist if n > 0)
for x in pos:
    print(x)

[1, 4, 10, 2, 3] [-5, -7, -1]
1
4
10
2
3


In [70]:
# 过滤规则比较复杂，不能简单的在列表推导或者生成器表达式中表达出来
# 可以将过滤代码放到一个函数中， 然后使用内建的 filter() 函数
values = ['1', '2', '-3', '-', '4', 'N/A', '5']
def is_int(val):
    try:
        x = int(val)
        return True
    except ValueError:
        return False
ivals = list(filter(is_int, values))
print(ivals)
# Outputs ['1', '2', '-3', '4', '5']

['1', '2', '-3', '4', '5']


In [72]:
# 能在过滤的时候转换数据
mylist = [1, 4, -5, 10, -7, 2, 3, -1]
import math
print([math.sqrt(n) for n in mylist if n > 0])
clip_neg = [n if n > 0 else 0 for n in mylist]
clip_pos = [n if n < 0 else 0 for n in mylist]
print(clip_neg, clip_pos)

[1.0, 2.0, 3.1622776601683795, 1.4142135623730951, 1.7320508075688772]
[1, 4, 0, 10, 0, 2, 3, 0] [0, 0, -5, 0, -7, 0, 0, -1]


In [74]:
addresses = [
    '5412 N CLARK',
    '5148 N CLARK',
    '5800 E 58TH',
    '2122 N CLARK',
    '5645 N RAVENSWOOD',
    '1060 W ADDISON',
    '4801 N BROADWAY',
    '1039 W GRANVILLE',
]
counts = [ 0, 3, 10, 4, 1, 7, 6, 1]
# 想将那些对应 count 值大于5的地址全部输出
from itertools import compress
more5 = [n > 5 for n in counts]
print(more5)
list(compress(addresses, more5))

[False, False, True, False, False, True, True, False]


['5800 E 58TH', '1060 W ADDISON', '4801 N BROADWAY']

In [78]:
# 你想构造一个字典，它是另外一个字典的子集
prices = {
    'ACME': 45.23,
    'AAPL': 612.78,
    'IBM': 205.55,
    'HPQ': 37.20,
    'FB': 10.75
}
# Make a dictionary of all prices over 200
p1 = {key: value for key, value in prices.items() if value > 200}
# Make a dictionary of tech stocks
tech_names = {'AAPL', 'IBM', 'HPQ', 'MSFT'}
p2 = {key: value for key, value in prices.items() if key in tech_names}
p3 = dict((key, value) for key, value in prices.items() if value > 200)
print(p1)
print(p2)
print(p3)
# Make a dictionary of tech stocks
tech_names = { 'AAPL', 'IBM', 'HPQ', 'MSFT' }
p4 = { key:prices[key] for key in prices.keys() & tech_names }
print(p4)

{'AAPL': 612.78, 'IBM': 205.55}
{'AAPL': 612.78, 'IBM': 205.55, 'HPQ': 37.2}
{'AAPL': 612.78, 'IBM': 205.55}
{'AAPL': 612.78, 'IBM': 205.55, 'HPQ': 37.2}


In [80]:
# 映射名称到序列元素
# 你有一段通过下标访问列表或者元组中元素的代码，
# 但是这样有时候会使得你的代码难以阅读， 于是你想通过名称来访问元素
from collections import namedtuple
Subscriber = namedtuple('Subscriber', ['addr', 'joined'])
sub = Subscriber('jonesy@example.com', '2012-10-19')
print(sub)
print(sub.addr, sub.joined)

Subscriber(addr='jonesy@example.com', joined='2012-10-19')
jonesy@example.com 2012-10-19


In [81]:
# 下面是使用命名元组的版本
from collections import namedtuple

Stock = namedtuple('Stock', ['name', 'shares', 'price'])
def compute_cost(records):
    total = 0.0
    for rec in records:
        s = Stock(*rec)
        total += s.shares * s.price
    return total

In [82]:
from collections import namedtuple

Stock = namedtuple('Stock', ['name', 'shares', 'price', 'date', 'time'])

# Create a prototype instance
stock_prototype = Stock('', 0, 0.0, None, None)

# Function to convert a dictionary to a Stock
def dict_to_stock(s):
    return stock_prototype._replace(**s)
a = {'name': 'ACME', 'shares': 100, 'price': 123.45}
dict_to_stock(a)

Stock(name='ACME', shares=100, price=123.45, date=None, time=None)

In [85]:
# 你需要在数据序列上执行聚集函数（比如 sum() , min() , max() ）
# 但是首先你需要先转换或者过滤数据
# Determine if any .py files exist in a directory
import os
# Output a tuple as CSV
s = ('ACME', 50, 123.45)
print(','.join(str(x) for x in s))
# Data reduction across fields of a data structure
portfolio = [
    {'name':'GOOG', 'shares': 50},
    {'name':'YHOO', 'shares': 75},
    {'name':'AOL', 'shares': 20},
    {'name':'SCOX', 'shares': 65}
]
min_shares = min(s['shares'] for s in portfolio)
print(min_shares)
# Original: Returns 20
min_shares = min(s['shares'] for s in portfolio)
# Alternative: Returns {'name': 'AOL', 'shares': 20}
min_shares = min(portfolio, key=lambda s: s['shares'])
print(min_shares)

ACME,50,123.45
20
{'name': 'AOL', 'shares': 20}


In [86]:
nums = [1, 2, 3, 4, 5]
s = sum((x * x for x in nums)) # 显式的传递一个生成器表达式对象
s = sum(x * x for x in nums) # 更加优雅的实现方式，省略了括号
s = sum([x * x for x in nums])

In [87]:
# 合并多个字典或映射
# 现在有多个字典或者映射，你想将它们从逻辑上合并为一个单一的映射后执行某些操作
# 比如查找值或者检查某些键是否存在
a = {'x': 1, 'z': 3 }
b = {'y': 2, 'z': 4 }
from collections import ChainMap
c = ChainMap(a,b)
print(c['x']) # Outputs 1 (from a)
print(c['y']) # Outputs 2 (from b)
print(c['z']) # Outputs 3 (from a)

1
2
3
