In [1]:
# 1.1 解压序列赋值给多个变量
data = [ 'ACME', 50, 91.1, (2012, 12, 21) ]
name, shares, price, (year, mon, day) = data
print(name, shares, price, (year, mon, day))
_, shares, _, _ = data
print(_, shares)

s = 'Hello'
a, b, c, d, e = s
print(a, b, c, d, e)

ACME 50 91.1 (2012, 12, 21)
(2012, 12, 21) 50
H e l l o


In [2]:
# 1.2 解压可迭代对象赋值给多个变量
record = ('Dave', 'dave@example.com', '773-555-1212', '847-555-1212')
name, email, *phone_numbers = record
print(name, email)
print(phone_numbers)
print(type(phone_numbers))

*trailing, current = [10, 8, 7, 1, 9, 5, 10, 3]
print(trailing)
print(current)

records = [("foo", 0, 1, 2), ("bar", "hello"), ("foo", 3, 4)]
def do_foo(x, *y):
    print('foo', x, y)
def do_bar(s):
    print('bar', s)
for tag, *args in records:
    if tag == 'foo':
        do_foo(*args)
    elif tag == 'bar':
        do_bar(*args)

line = 'nobody:*:-2:-2:Unprivileged User:/var/empty:/usr/bin/false'
uname, *fields, homedir, _ = line.split(':')
print(uname)
print(fields)
print(homedir)
print(_)

Dave dave@example.com
['773-555-1212', '847-555-1212']
<class 'list'>
[10, 8, 7, 1, 9, 5, 10]
3
foo 0 (1, 2)
bar hello
foo 3 (4,)
nobody
['*', '-2', '-2', 'Unprivileged User']
/var/empty
/usr/bin/false


In [3]:
# 1.3 保留最后 N 个元素
from collections import deque

q = deque(maxlen=3)
q.append(1)
q.append(2)
q.append(3)
print(q)
q.append(4)
print(q)
q.appendleft(5)
print(q)
q.pop()
print(q)
q.popleft()
print(q)

deque([1, 2, 3], maxlen=3)
deque([2, 3, 4], maxlen=3)
deque([5, 2, 3], maxlen=3)
deque([5, 2], maxlen=3)
deque([2], maxlen=3)


In [4]:
# 1.4 查找最大或最小的 N 个元素
import heapq
nums = [1, 8, 2, 23, 7, -4, 18, 23, 42, 37, 2]
print(heapq.nlargest(3, nums))
print(heapq.nsmallest(3, nums))

portfolio = [
    {'name': 'IBM', 'shares': 100, 'price': 91.1},
    {'name': 'AAPL', 'shares': 50, 'price': 543.22},
    {'name': 'FB', 'shares': 200, 'price': 21.09},
    {'name': 'HPQ', 'shares': 35, 'price': 31.75},
    {'name': 'YHOO', 'shares': 45, 'price': 16.35},
    {'name': 'ACME', 'shares': 75, 'price': 115.65}
]
cheap = heapq.nsmallest(3, portfolio, key=lambda s: s['price'])
expensive = heapq.nlargest(3, portfolio, key=lambda s: s['price'])
print(cheap)
print(expensive)

heap = list(nums)
heapq.heapify(heap)
print(heap)
print(heapq.heappop(heap))
print(heapq.heappop(heap))
print(heapq.heappop(heap))

[42, 37, 23]
[-4, 1, 2]
[{'name': 'YHOO', 'shares': 45, 'price': 16.35}, {'name': 'FB', 'shares': 200, 'price': 21.09}, {'name': 'HPQ', 'shares': 35, 'price': 31.75}]
[{'name': 'AAPL', 'shares': 50, 'price': 543.22}, {'name': 'ACME', 'shares': 75, 'price': 115.65}, {'name': 'IBM', 'shares': 100, 'price': 91.1}]
[-4, 2, 1, 23, 7, 2, 18, 23, 42, 37, 8]
-4
1
2


In [5]:
# 1.5 实现一个优先级队列
import heapq

class PriorityQueue:
    def __init__(self):
        self._queue = []
        self._index = 0
    
    def push(self, item, priority):
        heapq.heappush(self._queue, (priority, self._index, item))
        self._index += 1
    
    def pop(self):
        return heapq.heappop(self._queue)[-1]
    
class Item:
    def __init__(self, name):
        self.name = name
    
    def __repr__(self):
        return "Item({!r})".format(self.name)

q = PriorityQueue()
q.push(Item('foo'), 1)
q.push(Item('bar'), 5)
q.push(Item('spam'), 4)
q.push(Item('grok'), 1)
print(q.pop())
print

Item('foo')


<function print>

In [6]:
# 1.6 字典中的键映射多个值
from collections import defaultdict

d = defaultdict(list)
d['a'].append(1)
d['a'].append(2)
d['b'].append(4)
print(d)

d = defaultdict(set)
d['a'].add(1)
d['a'].add(2)
d['b'].add(4)
print(d)

d = {}
d.setdefault('a', []).append(1)
d.setdefault('b', []).append(2)
d.setdefault('b', []).append(3)

pairs = [('a', 1), ('a', 2), ('b', 3)]
d = defaultdict(list)
for key, value in pairs:
    d[key].append(value)
print(d)

defaultdict(<class 'list'>, {'a': [1, 2], 'b': [4]})
defaultdict(<class 'set'>, {'a': {1, 2}, 'b': {4}})
defaultdict(<class 'list'>, {'a': [1, 2], 'b': [3]})


In [7]:
# 1.7 字典排序
from collections import OrderedDict

d = OrderedDict()
d['foo'] = 1
d['bar'] = 2
d['spam'] = 3
d['grok'] = 4
for key in d:
    print(key, d[key])
    
import json
d = json.dumps(d)
print(d)

foo 1
bar 2
spam 3
grok 4
{"foo": 1, "bar": 2, "spam": 3, "grok": 4}


In [8]:
# 1.8 字典的运算
prices = {
    'ACME': 45.23,
    'AAPL': 612.78,
    'IBM': 205.55,
    'HPQ': 37.20,
    'FB': 10.75
}

min_prices = min(zip(prices.values(), prices.keys()))
max_prices = max(zip(prices.values(), prices.keys()))
print(min_prices)
print(max_prices)

print(min(prices))
print(min(prices, key=lambda k: prices[k]))
min_value = prices[min(prices, key=lambda k: prices[k])]
print(min_value)

(10.75, 'FB')
(612.78, 'AAPL')
AAPL
FB
10.75


In [9]:
# 1.9 查找两字典的相同点
a = {
    'x' : 1,
    'y' : 2,
    'z' : 3
}
b = {
    'w' : 10,
    'x' : 11,
    'y' : 2
}

print(a.keys() & b.keys())
print(a.keys() - b.keys())
print(a.items() & b.items())
print(a.items() - b.items())

{'y', 'x'}
{'z'}
{('y', 2)}
{('z', 3), ('x', 1)}


In [10]:
# 1.10 删除序列相同元素并保持顺序
def dedupe(items):
    seen = set()
    for item in items:
        if item not in seen:
            yield item
            seen.add(item)
a = [1, 5, 2, 1, 9, 1, 5, 10]
print(list(dedupe(a)))

def dedupe(items, key=None):
    seen = set()
    for item in items:
        val = item if key is None else key(item)
        if val not in seen:
            yield item
            seen.add(val)
a = [ {'x':1, 'y':2}, {'x':1, 'y':3}, {'x':1, 'y':2}, {'x':2, 'y':4}]
print(list(dedupe(a, key=lambda d: (d['x'],d['y']))))

[1, 5, 2, 9, 10]
[{'x': 1, 'y': 2}, {'x': 1, 'y': 3}, {'x': 2, 'y': 4}]


In [17]:
# 1.11 命名切片
record = '....................100 .......513.25 ..........'
SHARES = slice(20, 23)
PRICE = slice(31, 37)
cost = int(record[SHARES]) * float(record[PRICE])
print(cost)

a = slice(5, 50, 2)
print(a.start, a.stop, a.step)

s = 'HelloWorld'
a.indices(len(s))
for i in range(*a.indices(len(s))):
    print(s[i])

51325.0
5 50 2
W
r
d


In [26]:
# 1.12 序列中出现次数最多的元素
from collections import Counter

words = [
    'look', 'into', 'my', 'eyes', 'look', 'into', 'my', 'eyes',
    'the', 'eyes', 'the', 'eyes', 'the', 'eyes', 'not', 'around', 'the',
    'eyes', "don't", 'look', 'around', 'the', 'eyes', 'look', 'into',
    'my', 'eyes', "you're", 'under'
]
word_counts = Counter(words)
top_three = word_counts.most_common(3)
print(top_three)
print(word_counts['eyes'])
print(word_counts['the'])

morewords = ['why','are','you','not','looking','in','my','eyes']
word_counts.update(morewords)
top_three = word_counts.most_common(3)
print(top_three)

a = Counter(words)
b = Counter(morewords)
c = a + b
d = a - b
print(a)
print(b)
print(c)
print(d)

[('eyes', 8), ('the', 5), ('look', 4)]
8
5
[('eyes', 9), ('the', 5), ('look', 4)]
Counter({'eyes': 8, 'the': 5, 'look': 4, 'into': 3, 'my': 3, 'around': 2, 'not': 1, "don't": 1, "you're": 1, 'under': 1})
Counter({'why': 1, 'are': 1, 'you': 1, 'not': 1, 'looking': 1, 'in': 1, 'my': 1, 'eyes': 1})
Counter({'eyes': 9, 'the': 5, 'look': 4, 'my': 4, 'into': 3, 'not': 2, 'around': 2, "don't": 1, "you're": 1, 'under': 1, 'why': 1, 'are': 1, 'you': 1, 'looking': 1, 'in': 1})
Counter({'eyes': 7, 'the': 5, 'look': 4, 'into': 3, 'my': 2, 'around': 2, "don't": 1, "you're": 1, 'under': 1})


In [33]:
# 1.13 通过某个关键字排序一个字典列表
from operator import itemgetter

rows = [
    {'fname': 'Brian', 'lname': 'Jones', 'uid': 1003},
    {'fname': 'David', 'lname': 'Beazley', 'uid': 1002},
    {'fname': 'John', 'lname': 'Cleese', 'uid': 1001},
    {'fname': 'Big', 'lname': 'Jones', 'uid': 1004}
]

rows_by_fname = sorted(rows, key=itemgetter('fname'))
rows_by_uid = sorted(rows, key=itemgetter('uid'))
print(rows_by_fname)
print(rows_by_uid)

rows_by_fname = sorted(rows, key=lambda k: k['fname'])
print(rows_by_fname)

rows_by_fname = sorted(rows, key=itemgetter('lname', 'fname'))
print(rows_by_fname)

rows_by_fname = sorted(rows, key=lambda k: (k['lname'], k['fname']))
print(rows_by_fname)

min_uid = min(rows, key=itemgetter('uid'))
max_uid = max(rows, key=itemgetter('uid'))
print(min_uid)
print(max_uid)

[{'fname': 'Big', 'lname': 'Jones', 'uid': 1004}, {'fname': 'Brian', 'lname': 'Jones', 'uid': 1003}, {'fname': 'David', 'lname': 'Beazley', 'uid': 1002}, {'fname': 'John', 'lname': 'Cleese', 'uid': 1001}]
[{'fname': 'John', 'lname': 'Cleese', 'uid': 1001}, {'fname': 'David', 'lname': 'Beazley', 'uid': 1002}, {'fname': 'Brian', 'lname': 'Jones', 'uid': 1003}, {'fname': 'Big', 'lname': 'Jones', 'uid': 1004}]
[{'fname': 'Big', 'lname': 'Jones', 'uid': 1004}, {'fname': 'Brian', 'lname': 'Jones', 'uid': 1003}, {'fname': 'David', 'lname': 'Beazley', 'uid': 1002}, {'fname': 'John', 'lname': 'Cleese', 'uid': 1001}]
[{'fname': 'David', 'lname': 'Beazley', 'uid': 1002}, {'fname': 'John', 'lname': 'Cleese', 'uid': 1001}, {'fname': 'Big', 'lname': 'Jones', 'uid': 1004}, {'fname': 'Brian', 'lname': 'Jones', 'uid': 1003}]
[{'fname': 'David', 'lname': 'Beazley', 'uid': 1002}, {'fname': 'John', 'lname': 'Cleese', 'uid': 1001}, {'fname': 'Big', 'lname': 'Jones', 'uid': 1004}, {'fname': 'Brian', 'lname'

In [36]:
# 1.14 排序不支持原生比较的对象
class User:
    def __init__(self, user_id):
        self.user_id = user_id
    
    def __repr__(self):
        return "User({})".format(self.user_id)

users = [User(23), User(3), User(99)]
print(users)
print(sorted(users, key=lambda u: u.user_id))

from operator import attrgetter

by_id = sorted(users, key=attrgetter('user_id'))
print(by_id)

[User(23), User(3), User(99)]
[User(3), User(23), User(99)]
[User(3), User(23), User(99)]


In [42]:
# 1.15 通过某个字段将记录分组
from operator import itemgetter
from itertools import groupby

rows = [
    {'address': '5412 N CLARK', 'date': '07/01/2012'},
    {'address': '5148 N CLARK', 'date': '07/04/2012'},
    {'address': '5800 E 58TH', 'date': '07/02/2012'},
    {'address': '2122 N CLARK', 'date': '07/03/2012'},
    {'address': '5645 N RAVENSWOOD', 'date': '07/02/2012'},
    {'address': '1060 W ADDISON', 'date': '07/02/2012'},
    {'address': '4801 N BROADWAY', 'date': '07/01/2012'},
    {'address': '1039 W GRANVILLE', 'date': '07/04/2012'},
]

rows.sort(key=itemgetter("date"))
for date, items in groupby(rows, key=itemgetter("date")):
    print(date)
    for i in items:
        print(' ', i)

from collections import defaultdict

rows_by_date = defaultdict(list)
for row in rows:
    rows_by_date[row['date']].append(row)
for r in rows_by_date['07/01/2012']:
    print(r)

07/01/2012
  {'address': '5412 N CLARK', 'date': '07/01/2012'}
  {'address': '4801 N BROADWAY', 'date': '07/01/2012'}
07/02/2012
  {'address': '5800 E 58TH', 'date': '07/02/2012'}
  {'address': '5645 N RAVENSWOOD', 'date': '07/02/2012'}
  {'address': '1060 W ADDISON', 'date': '07/02/2012'}
07/03/2012
  {'address': '2122 N CLARK', 'date': '07/03/2012'}
07/04/2012
  {'address': '5148 N CLARK', 'date': '07/04/2012'}
  {'address': '1039 W GRANVILLE', 'date': '07/04/2012'}
{'address': '5412 N CLARK', 'date': '07/01/2012'}
{'address': '4801 N BROADWAY', 'date': '07/01/2012'}


In [49]:
# 1.16 过滤序列元素
mylist = [1, 4, -5, 10, -7, 2, 3, -1]
new_list = [n for n in mylist if n > 0]
print(new_list)

pos = (n for n in mylist if n > 0)
print(pos)
for x in pos:
    print(x, end=' ')
print()

values = ['1', '2', '-3', '-', '4', 'N/A', '5']
def is_int(val):
    try:
        x = int(val)
        return True
    except ValueError:
        return False
ivals = list(filter(is_int, values))
print(ivals)

clip_neg = [n if n > 0 else 0 for n in mylist]
print(clip_neg)

from itertools import compress

addresses = [
    '5412 N CLARK',
    '5148 N CLARK',
    '5800 E 58TH',
    '2122 N CLARK',
    '5645 N RAVENSWOOD',
    '1060 W ADDISON',
    '4801 N BROADWAY',
    '1039 W GRANVILLE',
]
counts = [ 0, 3, 10, 4, 1, 7, 6, 1]
more5 = [n > 5 for n in counts]
print(more5)
print(list(compress(addresses, more5)))

[1, 4, 10, 2, 3]
<generator object <genexpr> at 0x000001D8A4A7E620>
1 4 10 2 3 
['1', '2', '-3', '4', '5']
[1, 4, 0, 10, 0, 2, 3, 0]
[False, False, True, False, False, True, True, False]
['5800 E 58TH', '1060 W ADDISON', '4801 N BROADWAY']


In [51]:
# 1.17 从字典中提取子集
prices = {
    'ACME': 45.23,
    'AAPL': 612.78,
    'IBM': 205.55,
    'HPQ': 37.20,
    'FB': 10.75
}
p1 = {key: value for key, value in prices.items() if value > 200}
tech_names = {'AAPL', 'IBM', 'HPQ', 'MSFT'}
p2 = {key: value for key, value in prices.items() if key in tech_names}
print(p1)
print(p2)

{'AAPL': 612.78, 'IBM': 205.55}
{'AAPL': 612.78, 'IBM': 205.55, 'HPQ': 37.2}
{'AAPL': 612.78, 'HPQ': 37.2, 'IBM': 205.55}


{'AAPL', 'HPQ', 'IBM'}

In [57]:
# 1.18 映射名称到序列元素
from collections import namedtuple 

Subscriber = namedtuple('Subscriber', ['addr', 'joined'])
sub = Subscriber('jonesy@example.com', '2012-10-19')
print(sub.addr, sub.joined)

def compute_cost(records):
    total = 0.0
    for rec in records:
        total += rec[1] * rec[2]
    return total

Stock = namedtuple("Stock", ["name", "shares", "price"])
def compute_cost(records):
    total = 0.0
    for rec in records:
        total += rec.shares * rec.price
    return total
records = [
    Stock('stock1', 1, 2),
    Stock('stock2', 1, 2),
    Stock('stock3', 1, 2),
    Stock('stock4', 1, 2),
    Stock('stock5', 1, 2),    
]
print(compute_cost(records))

records[3] = records[3]._replace(shares=12)
print(records)

Stock = namedtuple('Stock', ['name', 'shares', 'price', 'date', 'time'])
stock_prototype = Stock('', 0, 0.0, None, None)
def dict_to_stock(s):
    return stock_prototype._replace(**s)
a = {'name': 'ACME', 'shares': 100, 'price': 123.45}
print(dict_to_stock(a))
Stock(name='ACME', shares=100, price=123.45, date=None, time=None)
b = {'name': 'ACME', 'shares': 100, 'price': 123.45, 'date': '12/17/2012'}
print(dict_to_stock(b))

jonesy@example.com 2012-10-19
10.0
[Stock(name='stock1', shares=1, price=2), Stock(name='stock2', shares=1, price=2), Stock(name='stock3', shares=1, price=2), Stock(name='stock4', shares=12, price=2), Stock(name='stock5', shares=1, price=2)]
Stock(name='ACME', shares=100, price=123.45, date=None, time=None)
Stock(name='ACME', shares=100, price=123.45, date='12/17/2012', time=None)


In [72]:
# 1.19 转换并同时计算数据
nums = [1, 2, 3, 4, 5]
s = sum(x * x for x in nums)
print(s)

import os
files = os.listdir('D:\\download\\')
if (any(name.endswith('.pdf') for name in files)):
    print("There are pdf!")
else:
    print("No pdf")

s = ('ACME', 50, 123.45)
print(','.join(str(x) for x in s))

portfolio = [
    {'name':'GOOG', 'shares': 50},
    {'name':'YHOO', 'shares': 75},
    {'name':'AOL', 'shares': 20},
    {'name':'SCOX', 'shares': 65}
]
min_shares = min(s['shares'] for s in portfolio)
print(min_shares)

55
There are pdf!
ACME,50,123.45
20


In [74]:
# 1.20 合并多个字典或映射
from collections import ChainMap

a = {'x': 1, 'z': 3 }
b = {'y': 2, 'z': 4 }
c = ChainMap(a, b)
print(type(c), c)
print(c['x']) # Outputs 1 (from a)
print(c['y']) # Outputs 2 (from b)
print(c['z']) # Outputs 3 (from a)

<class 'collections.ChainMap'> ChainMap({'x': 1, 'z': 3}, {'y': 2, 'z': 4})
1
2
3
