# 第二章 序列构成的数组

# 内置序列类概览

* 内置序列都有很多丰富的操作
* 容器序列， 扁平序列
* 可变序列, 不可变序列

# 列表推导和生成器表达式

* 列表推导式是构建列表的快捷方式 listcomps
* 生成器表达式可以创建其他任何类型的序列 genexps
* 更具可读性

## 列表的推导和生成器表达式

In [69]:
# code1
symbols = '$^!#!@'
codes = []
for symbol in symbols:
    codes.append(ord(symbol))
print(codes)

[36, 94, 33, 35, 33, 64]


In [70]:
# code2
symbols = '$^!#!@'
codes = [ord(symbol) for symbol in symbols]
print(codes)

[36, 94, 33, 35, 33, 64]


* 一旦列表推到式超过两行就要考虑用for重写
* [] {} ()会忽略代码里的换行

In [71]:
x = "my precious"

dummy = [x for x in 'ABC']
print(x)

my precious


* 在python2中x会被替代成'c'

##  列表推导式同filter和map做比较

In [72]:
symbols = '!@#$%^&*()'
beyond_ascii = [ord(symbol) for symbol in symbols if ord(symbol) > 27]
print(beyond_ascii)

[33, 64, 35, 36, 37, 94, 38, 42, 40, 41]


In [73]:
beyond_ascii = list(filter(lambda c: c > 27 , map(ord, symbols)))
print(beyond_ascii)

[33, 64, 35, 36, 37, 94, 38, 42, 40, 41]


## 笛卡儿积

In [74]:
colors = ['black', 'white']
sizes = ['S', 'M', 'L']

tshirt = [(color, size) for color in colors for size in sizes] # 先颜色后尺码

In [75]:
tshirt

[('black', 'S'),
 ('black', 'M'),
 ('black', 'L'),
 ('white', 'S'),
 ('white', 'M'),
 ('white', 'L')]

In [76]:
tshirt = [(color, size) for size in sizes
                         for color in colors]#先尺码后颜色

In [77]:
tshirt

[('black', 'S'),
 ('white', 'S'),
 ('black', 'M'),
 ('white', 'M'),
 ('black', 'L'),
 ('white', 'L')]

## 生成器表达式

In [78]:
symbols = "!@#$%^&*"
tuple(ord(symbol) for symbol in symbols)

(33, 64, 35, 36, 37, 94, 38, 42)

In [79]:
import array
array.array('I', (ord(symbol) for symbol in symbols))

array('I', [33, 64, 35, 36, 37, 94, 38, 42])

* 将方括号换成圆括号
* 对于只有一个参数的函数 不需要括号

In [80]:
for tshirt in ("%s %s" % (c, s) for c in colors for s in sizes):
    print(tshirt)

black S
black M
black L
white S
white M
white L


In [81]:
("%s %s" % (c, s) for c in colors for s in sizes)

<generator object <genexpr> at 0x00000196B22FB048>

* 是一个生成器

# 元组不仅是不可变的列表
* 没有字段名的记录

## 元组和记录

* 元组中，每个元素都存放了一个字段数据 外加这个字段的位置， 位置信息给予了数据意义
* 如果仅把元组当作不可变来处理就忽视了位置信息

In [82]:
lax_coordinates = (33.9425, -118.408056) #洛杉矶国际机场经纬度

In [83]:
city, year, pop, chg, area = ('Tokyo', 2003, 32450, 0.66, 8014)
traveler_ids = [('USA', '31195955'), ('BRA', 'CE342567'),
               ('ESP', 'XDA205856')]
for passport in sorted(traveler_ids):
    print("%s/%s" % passport)

BRA/CE342567
ESP/XDA205856
USA/31195955


In [84]:
for country, _ in traveler_ids:
    print(country)

USA
BRA
ESP


## 原则拆包

In [85]:
a = 1
b = 2
#不使用中间变量交换两个值
a, b = b, a
print(a, b)

2 1


In [86]:
divmod(20 ,8)

(2, 4)

In [87]:
t = (20 ,8)
divmod(*t)

(2, 4)

In [88]:
print(*t)

20 8


In [89]:
import os
_, filename = os.path.split("/home/hj/tokyohot.avi") ##hhh
print(filename)

tokyohot.avi


In [90]:
a, b ,*rest = range(5)

In [91]:
rest

[2, 3, 4]

## 嵌套元组拆包

In [92]:
metro = [
    ('Tokyo', 'JP', 36, (35, 139)),
    ('Delhi NCR', 'IN', 21, (28, 77)),
    ('Mexico City', 'MX', 20.142, (19, -99)),
    ('New York-Newark', 'US', 20, (40, -74)),
    ('Sao Paulo', 'BR', 19, (-23, -46))
]
print('{:^15} | {:^9} | {:^9}'.format('', 'lat.', 'long.'))
fmt = '{:^15} | {:^9} | {:^9.4f}'
for name, cc, pop, (lat,long) in metro:
    print(fmt.format(name, lat, long))

                |   lat.    |   long.  
     Tokyo      |    35     | 139.0000 
   Delhi NCR    |    28     |  77.0000 
  Mexico City   |    19     | -99.0000 
New York-Newark |    40     | -74.0000 
   Sao Paulo    |    -23    | -46.0000 


* ^ 居中显示

## 具名元组

In [93]:
from collections import namedtuple
City = namedtuple('City', 'name country population coordinates') # 后者可以是迭代对象或字符串空格分开
tokyo = City('Tokyo', 'JP', 36, (35, 139))

In [94]:
tokyo

City(name='Tokyo', country='JP', population=36, coordinates=(35, 139))

In [95]:
tokyo.coordinates

(35, 139)

In [96]:
tokyo.population

36

In [97]:
tokyo[-1]

(35, 139)

In [98]:
City._fields

('name', 'country', 'population', 'coordinates')

In [99]:
LatLong = namedtuple('LatLong', 'lat long')
delhi_data = ("Delhi NCR", 'IN', 21, LatLong(28, 77))
delhi = City._make(delhi_data)
for key, value in delhi._asdict().items():
    print(key, ':' , value)

name : Delhi NCR
country : IN
population : 21
coordinates : LatLong(lat=28, long=77)


## 作为不可变列表的元组

* 没有__reversed__方法 但是这种方法只是一种加强, 元组能够使用reversed()

# 切片

* 听说比想象强大的多

## 为什么切片和区间会忽略最后一个元素

* 符合0为初始下标的习惯

* 当只有最后一个信息时我们可以快速看出切片有几个元素

In [100]:
range(3)

range(0, 3)

* 已知起始位置和终止位置可以知道列表长度 end-strat

* 可以将列表分为不重复的两部分

In [101]:
l = [1, 2, 3, 4, 5, 6 ,7]
print(l[:2])
print(l[2:])

[1, 2]
[3, 4, 5, 6, 7]


## 对对象进行切分

In [102]:
s = 'bicyle'
s[::3]

'by'

In [103]:
s[::-1]

'elycib'

每一个[]都是一个slice对象

## 多为切片和省略

In [104]:
import numpy as np

In [105]:
a = np.random.randn(4, 5)

In [106]:
a

array([[-0.27673464,  1.29579435, -0.96464071,  0.00342027,  1.00685309],
       [-1.16178079,  1.38039346, -0.6839954 , -1.24161261,  0.161131  ],
       [-1.67167278, -0.63896553,  1.41606973, -1.45915406, -0.58432803],
       [-0.66103118, -1.40525356, -0.31836802, -0.9485225 ,  0.98111039]])

In [107]:
a[0]

array([-0.27673464,  1.29579435, -0.96464071,  0.00342027,  1.00685309])

In [108]:
a[0,...]

array([-0.27673464,  1.29579435, -0.96464071,  0.00342027,  1.00685309])

# 给切片赋值

In [109]:
l = list(range(10))
l

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

In [110]:
l[2:5] = [20, 30]

In [111]:
l

[0, 1, 20, 30, 5, 6, 7, 8, 9]

In [112]:
len(l)

9

In [113]:
# l[2:5] = 100 WRONG

* 只能赋值一个可迭代的对象

# 对列表序列使用+ 和 *

In [114]:
[1, 2, 3] + [1, 2, 3]

[1, 2, 3, 1, 2, 3]

In [115]:
[1, 2] * 5

[1, 2, 1, 2, 1, 2, 1, 2, 1, 2]

* 不对原对象进行操作 生成新对象

In [116]:
[[]] * 3

[[], [], []]

## 建立由列表组成的列表

In [117]:
board = [['_'] * 3 for i in range(3)]

In [118]:
board

[['_', '_', '_'], ['_', '_', '_'], ['_', '_', '_']]

In [119]:
board[1][2] = 'X'

In [120]:
board

[['_', '_', '_'], ['_', '_', 'X'], ['_', '_', '_']]

In [121]:
weird_board = [['_'] * 3] * 3

In [122]:
weird_board

[['_', '_', '_'], ['_', '_', '_'], ['_', '_', '_']]

In [123]:
weird_board[1][2] = 'X'

In [124]:
weird_board

[['_', '_', 'X'], ['_', '_', 'X'], ['_', '_', 'X']]

* 列表是可以引用的！！！

In [125]:
#First
board = []
for i in range(3):
    board.append(['_'] * 3)

In [126]:
board

[['_', '_', '_'], ['_', '_', '_'], ['_', '_', '_']]

In [127]:
board[0][0] = 1

In [128]:
board

[[1, '_', '_'], ['_', '_', '_'], ['_', '_', '_']]

In [129]:
# Second
board = []
row = ['_'] * 3
for i in range(3):
    board.append(row)

In [130]:
board[0][0] = 1

In [131]:
board

[[1, '_', '_'], [1, '_', '_'], [1, '_', '_']]

# 列表的增量赋值

* a += b 是用__iadd__实现的 对于不可变得序列来说会产生新的对象

In [132]:
l = [1, 2, 3, 4]
id(l)

1746745822216

In [133]:
l *= 2
id(l)

1746745822216

In [134]:
t = (1, 2, 3)
id(t)

1746746136904

In [135]:
t *= 2
id(t) #会产生新的对象

1746744979080

## 一个关于+=的谜题

In [136]:
t = (1, 2, 3, [1, 2])
t[3] += [3, 4]

TypeError: 'tuple' object does not support item assignment

In [137]:
t

(1, 2, 3, [1, 2, 3, 4])

In [138]:
import dis

In [139]:
dis.dis('s[a] += b')

  1           0 LOAD_NAME                0 (s)
              2 LOAD_NAME                1 (a)
              4 DUP_TOP_TWO
              6 BINARY_SUBSCR
              8 LOAD_NAME                2 (b)
             10 INPLACE_ADD
             12 ROT_THREE
             14 STORE_SUBSCR
             16 LOAD_CONST               0 (None)
             18 RETURN_VALUE


* 将a放在栈顶 弹出a进行操作
* 不要将可变对象放在元组里
* 看Python字节码

# list.sort方法和内置函数

* 就地改变就返回None 让调用者知道是就地改变
* sorted和list.sorte

# 用bisect来管理已排序列

## 用bisect来搜索

In [140]:
import bisect
import sys

HAYSTACK = [1, 4, 5, 6, 8, 12, 15, 20 ,21, 23, 23, 26, 29, 30]
NEEDLES = [0, 1, 2, 5, 8, 10, 22, 23, 29, 30, 31]

ROW_FMT = "{0:2d} @ {1:2d}   {2}{0:<2d}"

def demo(bisert_fn):
    for needle in reversed(NEEDLES):
        position = bisert_fn(HAYSTACK, needle)
        offset = position * '   |'
        print(ROW_FMT.format(needle, position, offset))


In [141]:
bisect_fn = bisect.bisect
print('Demo:', bisect_fn.__name__)
print('haystack ->', ' '.join('%2d ' % n for n in HAYSTACK))
demo(bisect_fn)

Demo: bisect
haystack ->  1   4   5   6   8  12  15  20  21  23  23  26  29  30 
31 @ 14      |   |   |   |   |   |   |   |   |   |   |   |   |   |31
30 @ 14      |   |   |   |   |   |   |   |   |   |   |   |   |   |30
29 @ 13      |   |   |   |   |   |   |   |   |   |   |   |   |29
23 @ 11      |   |   |   |   |   |   |   |   |   |   |23
22 @  9      |   |   |   |   |   |   |   |   |22
10 @  5      |   |   |   |   |10
 8 @  5      |   |   |   |   |8 
 5 @  3      |   |   |5 
 2 @  1      |2 
 1 @  1      |1 
 0 @  0   0 


* 一个逆序的查找

In [142]:
def grade(score, breakpoints=[60, 70, 80, 90], grade='FDCBA'):
    i = bisect.bisect(breakpoints, score)
    return grade[i]
[grade(i) for i in [33, 99, 70, 77, 89, 90, 100]]

['F', 'A', 'C', 'C', 'B', 'A', 'A']

## 用bisect.insert来插入新元素

In [143]:
import bisect
import random

SIZE = 7
random.seed(42)

my_list = []
for i in range(SIZE):
    new_item = random.randrange(SIZE*2)
    bisect.insort(my_list, new_item)
    print('%2d -> ' % new_item, my_list)

10 ->  [10]
 1 ->  [1, 10]
 0 ->  [0, 1, 10]
11 ->  [0, 1, 10, 11]
 4 ->  [0, 1, 4, 10, 11]
 3 ->  [0, 1, 3, 4, 10, 11]
 3 ->  [0, 1, 3, 3, 4, 10, 11]


# 当列表不是首选时

* 队列
* 数组

## 数组

只包含数字array比list效率更高

In [145]:
from array import array
from random import random

floats = array('d', (random() for i in range(10 ** 7)))

In [147]:
floats[0]

0.1395379285251439

In [154]:
fp = open('floats.bin', 'wb')
floats.tofile(fp)

In [155]:
fp.close()

In [156]:
floats2 = array('d')
fp = open('floats.bin', 'rb')
floats2.fromfile(fp, 10 ** 7)
fp.close()

In [157]:
floats[0]

0.1395379285251439

In [158]:
floats == floats2

True