# 序列构成的数组

## 2.1 内置序列类型概览

* 容器序列: list tuple collections.deque    可以存放不同类型的数据
* 扁平序列: str bytes bytearray memoryview array.array 只能容纳一种类型

* 可变序列: list、bytearray 、array.array、 collections.deque 和 memoryview
* 不可变序列: tuple、str、bytes

In [234]:
import array
import collections
import collections.abc
def _isinstance(obj):
    for tp in (collections.abc.Container, 
            collections.abc.Sized, 
            collections.abc.Iterable, 
            collections.abc.Sequence, 
            collections.abc.MutableSequence):
        print(isinstance(obj, tp), end=" ")
    print()

In [241]:
_isinstance(list()) 
_isinstance(tuple())   # 元组不是可变序列 Mutable Sequence
_isinstance(collections.deque())    # 队列也属于可变序列
_isinstance(collections.namedtuple('stu', ['name', 'age'])('Rookie', 18))   # 具名元组是不可变序列
_isinstance(array.array('I', [1, 2, 3]))    # 数组也是可变序列
_isinstance("Hello")    # 字符串是不可变序列

True True True True True 
True True True True False 
True True True True True 
True True True True False 
True True True True True 
True True True True False 


In [244]:
import numpy as np
import torch
_isinstance(np.array([]))   # numpy 不属于序列, 但同样继承自collections.abc.Container、collections.abc.Sized 以及 collections.abc.Iterable
_isinstance(torch.Tensor([]))   # torch.Tensor 和numpy 一样

True True True False False 
True True True False False 


## 2.2 列表推导和生成器表达式

In [183]:
x = 'ABC'
dummy = [ord(x) for x in x]
dummy, x

([65, 66, 67], 'ABC')

> <mark/>Python3中都有自己的局部作用域, 表达式内部变量只在局部起作用, 表达式的上下文里同名变量可以被正常引用

#### 同map和filter 对比

In [184]:
symbols = '$¢£¥€¤'
beyond_ascii = [ord(s) for s in symbols if ord(s) > 127]
beyond_ascii

[162, 163, 165, 8364, 164]

In [185]:
beyond_ascii = list(filter(lambda c: c > 127, map(ord, symbols)))
beyond_ascii

[162, 163, 165, 8364, 164]

### 笛卡尔积

In [186]:
colors = ['black', 'write']
sizes = ['S', 'M', 'L']
tshirts = [(color, size) for color in colors for size in sizes]
tshirts

[('black', 'S'),
 ('black', 'M'),
 ('black', 'L'),
 ('write', 'S'),
 ('write', 'M'),
 ('write', 'L')]

### 生成式表达式

In [187]:
symbols = '$¢£¥€¤'
tuple(ord(x) for x in symbols)  # 使用生成器表达式初始化

(36, 162, 163, 165, 8364, 164)

In [188]:
import array
array.array('I', (ord(x) for x in symbols))

array('I', [36, 162, 163, 165, 8364, 164])

## 2.3 元组不仅仅是不可变的列表 

In [189]:
metro_areas = [
    ('Tokyo', 'JP', 36.933, (35.689722, 139.691667)),   # <1>
    ('Delhi NCR', 'IN', 21.935, (28.613889, 77.208889)),
    ('Mexico City', 'MX', 20.142, (19.433333, -99.133333)),
    ('New York-Newark', 'US', 20.104, (40.808611, -74.020386)),
    ('Sao Paulo', 'BR', 19.649, (-23.547778, -46.635833)),
]

print('{:15} | {:^9} | {:^9}'.format('', 'lat.', 'long.'))
fmt = '{:15} | {:9.4f} | {:9.4f}'
for name, cc, pop, (latitude, longitude) in metro_areas:  # <2>
    if longitude <= 0:  # <3>
        print(fmt.format(name, latitude, longitude))


                |   lat.    |   long.  
Mexico City     |   19.4333 |  -99.1333
New York-Newark |   40.8086 |  -74.0204
Sao Paulo       |  -23.5478 |  -46.6358


### 元组拆包

In [190]:
city, _, country, (lon, lat) = metro_areas[0]
city, country, lon, lat

('Tokyo', 36.933, 35.689722, 139.691667)

### 2.3.4 具名元组

参见第一章的例子

### 2.3.5 作为不可变列表的元组

列表不仅仅是不可变的列表, 它们在方法和属性上仍有很多不同

## 2.4 切片

### 给切片复制

In [191]:
l = list(range(10))
l[2:5] = [-1, -2, -3]
del l[5:7]
l

[0, 1, -1, -2, -3, 7, 8, 9]

## 2.5 对序列使用 + 和 *

In [192]:
l = [1, 2, 3]
l * 5

[1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3]

In [193]:
5 * 'abcd'

'abcdabcdabcdabcdabcd'

In [194]:
board1 = [[''] * 3 for i in range(3)]
board1[1][2] = 'X'
board1

[['', '', ''], ['', '', 'X'], ['', '', '']]

In [195]:
board2 = [ [''] * 3 ] * 3
board2[1][2] = "O"
board2

[['', '', 'O'], ['', '', 'O'], ['', '', 'O']]

> <mark/> board2 直接对['']*3 对象进行了复制, 实际是对对象引用的复制。在复制过程中就会暴露其本质 <mark>

## 2.6 序列的增量赋值 

In [196]:
x, y = [1, 2, 3], [3, 2, 1]
x += y
x

[1, 2, 3, 3, 2, 1]

In [197]:
from typing import Iterable


class MyList(list):

    def __iadd__(self, value: Iterable) -> MyList:
        if not isinstance(value, Iterable):
            raise TypeError(f"{value} not is Iterable type")
        if len(value) != len(self):
            raise ValueError(f"len({value}) != len({self})")
        self[:] = [x+y for x, y in zip(self, value)]    # 返回修改后的自身对象, 而不是创建一个对象
        return self

In [198]:
x, y = MyList([1, 3, 4, 5]), [2, 1, 2, 4]
x_id1 = id(x)
x += y
x_id2 = id(x)
x, x_id1, x_id2

([3, 4, 6, 9], 2241509895664, 2241509895664)

In [199]:
import numpy as np
y = np.array([10, 3, 4, 5])
x += y
x

[np.int64(13), np.int64(7), np.int64(10), np.int64(14)]

In [200]:
y = [1, 2]
x += y

ValueError: len([1, 2]) != len([np.int64(13), np.int64(7), np.int64(10), np.int64(14)])

In [None]:
x += 1

TypeError: 1 not is Iterable type

### 一个关于 += 的谜题

In [None]:
t = (1, 2, [30, 40])
t[2] += [50, 60]

TypeError: 'tuple' object does not support item assignment

In [None]:
t

(1, 2, [30, 40, 50, 60])

In [None]:
t = (1, 2, MyList([20, 30]))
t[2] += (50, 60)

TypeError: 'tuple' object does not support item assignment

In [None]:
t

(1, 2, [70, 90])

 即使 `__iadd__` 只是修改了自身对象并返回, 也仍然不可以。 元组引用的对象不可以进行赋值操作

In [None]:
t[2][:] += [30, 40]
t

(1, 2, [70, 90, 30, 40])

> 这样是可以的, 通过切片赋值

In [None]:
t[2].extend([20, 20])
t

(1, 2, [70, 90, 30, 40, 20, 20])

> 这样也是可以的, 通过exened进行追加

## 2.8 用bisect来管理自己已排序的序列

### 用bisect来搜索

In [None]:
import bisect
import sys

HAYSTACK = [1, 4, 5, 6, 8, 12, 15, 20, 21, 23, 23, 26, 29, 30]
NEEDLES = [0, 1, 2, 5, 8, 10, 22, 23, 29, 30, 31]

ROW_FMT = '{0:2d} @ {1:2d}    {2}{0:<2d}'

def demo(bisect_fn):
    print('DEMO:', bisect_fn.__name__)  # <5>
    print('haystack ->', ' '.join('%2d' % n for n in HAYSTACK))
    for needle in reversed(NEEDLES):
        position = bisect_fn(HAYSTACK, needle)  # <1>
        offset = position * '  |'  # <2>
        print(ROW_FMT.format(needle, position, offset))  # <3>

In [None]:
demo(bisect.bisect_left)    # 左查找

DEMO: bisect_left
haystack ->  1  4  5  6  8 12 15 20 21 23 23 26 29 30
31 @ 14      |  |  |  |  |  |  |  |  |  |  |  |  |  |31
30 @ 13      |  |  |  |  |  |  |  |  |  |  |  |  |30
29 @ 12      |  |  |  |  |  |  |  |  |  |  |  |29
23 @  9      |  |  |  |  |  |  |  |  |23
22 @  9      |  |  |  |  |  |  |  |  |22
10 @  5      |  |  |  |  |10
 8 @  4      |  |  |  |8 
 5 @  2      |  |5 
 2 @  1      |2 
 1 @  0    1 
 0 @  0    0 


In [None]:
demo(bisect.bisect_right)   # 右查找

DEMO: bisect_right
haystack ->  1  4  5  6  8 12 15 20 21 23 23 26 29 30
31 @ 14      |  |  |  |  |  |  |  |  |  |  |  |  |  |31
30 @ 14      |  |  |  |  |  |  |  |  |  |  |  |  |  |30
29 @ 13      |  |  |  |  |  |  |  |  |  |  |  |  |29
23 @ 11      |  |  |  |  |  |  |  |  |  |  |23
22 @  9      |  |  |  |  |  |  |  |  |22
10 @  5      |  |  |  |  |10
 8 @  5      |  |  |  |  |8 
 5 @  3      |  |  |5 
 2 @  1      |2 
 1 @  1      |1 
 0 @  0    0 


In [None]:
demo(bisect.bisect)     # 和 bisect_right 等价

DEMO: bisect_right
haystack ->  1  4  5  6  8 12 15 20 21 23 23 26 29 30
31 @ 14      |  |  |  |  |  |  |  |  |  |  |  |  |  |31
30 @ 14      |  |  |  |  |  |  |  |  |  |  |  |  |  |30
29 @ 13      |  |  |  |  |  |  |  |  |  |  |  |  |29
23 @ 11      |  |  |  |  |  |  |  |  |  |  |23
22 @  9      |  |  |  |  |  |  |  |  |22
10 @  5      |  |  |  |  |10
 8 @  5      |  |  |  |  |8 
 5 @  3      |  |  |5 
 2 @  1      |2 
 1 @  1      |1 
 0 @  0    0 


### 用bisect.insort 插入新元素

In [None]:
import bisect
import random

SIZE = 7

random.seed(1729)

my_list = []
for i in range(SIZE):
    new_item = random.randrange(SIZE * 2)
    bisect.insort(my_list, new_item)
    print('%2d ->' % new_item, my_list)


10 -> [10]
 0 -> [0, 10]
 6 -> [0, 6, 10]
 8 -> [0, 6, 8, 10]
 7 -> [0, 6, 7, 8, 10]
 2 -> [0, 2, 6, 7, 8, 10]
10 -> [0, 2, 6, 7, 8, 10, 10]


## 2.9 当列表不是首选时

### 2.9.1 数组

数组(array)的背后并不是float对象, 而是数字的机器翻译, 也就是字节表述

In [None]:
import os
from array import array
from random import random
floats = array('d', (random() for i in range(10**7)))
floats[-1]

0.051056611520245765

In [None]:
with open("floats.bin", 'wb') as f:
    floats.tofile(f)

In [None]:
with open("./floats.bin", 'rb') as f:
    floats2 = array('d')
    floats2.fromfile(f, 10**7)
os.remove("./floats.bin")
floats == floats2

True

In [None]:
a = array('I', [1, 2, 3])
b = array('I', [2, 3, 4])
a + b

array('I', [1, 2, 3, 2, 3, 4])

In [None]:
import sys
empty_array_size = sys.getsizeof(array('I'))
sys.getsizeof(array('I')), sys.getsizeof(array('B')), sys.getsizeof([])

(80, 80, 56)

In [None]:
x = list(range(10000000))
empty_array_size = sys.getsizeof(array('I'))
empty_list_size = sys.getsizeof([])
arr_x = array('I', x)
empty_array_size, empty_list_size, type(empty_list_size), type(empty_array_size)
arr_unit_size = (sys.getsizeof(arr_x) - empty_array_size) / len(arr_x)
list_unit_size = (sys.getsizeof(x) - empty_array_size) / len(x)
arr_unit_size, list_unit_size

(4.0, 7.9999976)

> 显然, 存储数字, array比list占用的内存资源更少

In [None]:
sorted(b)   # 排序方法

[2, 3, 4]

### 2.9.2 内存视图

### 2.9.4 双向队列和其它形式的队列

In [201]:
from collections import deque
dq = deque(range(10), 10)
dq

deque([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], maxlen=10)

In [202]:
dq.append(2)
dq

deque([1, 2, 3, 4, 5, 6, 7, 8, 9, 2], maxlen=10)

In [203]:
dq.appendleft(3)
dq

deque([3, 1, 2, 3, 4, 5, 6, 7, 8, 9], maxlen=10)

#### 栈的用法

In [205]:
dq.pop()
dq.append(23)
dq

deque([3, 1, 2, 3, 4, 5, 6, 7, 8, 23], maxlen=10)

#### 队列的用法

In [206]:
dq.popleft()
dq.append(10)
dq[0], dq

(1, deque([1, 2, 3, 4, 5, 6, 7, 8, 23, 10], maxlen=10))

## 2.10 本章小结

<img src="./images/第二章总结1.jpg" width="70%">
<img src="./images/第二章总结2.jpg" width="70%">