In [1]:
import pandas as pd

In [4]:
symbols = chr(36)+chr(162)+chr(163)+chr(165)+chr(8364)+chr(164)
symbols

'$¢£¥€¤'

In [6]:
codes = [ord(symbol) for symbol in symbols]
codes

[36, 162, 163, 165, 8364, 164]

In [7]:
tuple(ord(symbol) for symbol in symbols)

(36, 162, 163, 165, 8364, 164)

### 2.4.1 用作记录

In [1]:
lax_coordinates = (33,-118)
city, year, pop, chg, area = ('Tokyo', '2003', '32450', '0.66', '8014')

traveler_ids = [('USA','31195855'),('BRA','CE342567'),('ESP','XDA205856')]
for passport in sorted(traveler_ids):
    print('%s/%s' % passport)

BRA/CE342567
ESP/XDA205856
USA/31195855


In [None]:
# 元组拆包赋值
city, year, pop, chg, area = ('Tokyo', '2003', '32450', '0.66', '8014')

# 元组拆包赋值

## 2.5 序列和可迭代对象拆包

In [3]:
# 使用 * 运算符可以将一个可迭代对象拆开，作为函数的参数
print(divmod(20,8))

t = (20, 8)
print(divmod(*t))

quotient, remainder = divmod(*t )
print(quotient, remainder)

(2, 4)
(2, 4)
2 4


In [5]:
# 使用 os.path.split() 可以获取文件路径以及最后一个文件名组成的元组
import os 
_, filename = os.path.split('/home/Gowther/python_work/idrsa.pub')
print(filename)

idrsa.pub


### 2.5.1 使用 * 获取余下的项

In [11]:
# 使用 *arg 来获取不确定数量的参数
a, b, *rest = range(5)
print(a,b,rest)

a, b,*rest = range(2)
print(a, b, rest)

a, *rest, c, d = range(5)
print(a, rest, c, d)

0 1 [2, 3, 4]
0 1 []
0 [1, 2] 3 4


In [2]:
a, b, rest = range(5)
print(a,b,rest)

ValueError: too many values to unpack (expected 3)

### 2.5.2 在函数调用和序列字面量中使用 * 拆包

In [4]:
# 在函数调用中可以多次使用 * 
def fun(a,b,c,d,*rest):
    return a, b, c, d,rest

fun(*[1,2],3,*range(4,10))

(1, 2, 3, 4, (5, 6, 7, 8, 9))

In [8]:
# 定义列表，元组或集合字面量时，也可以使用 * 
print(*range(0,4),5)
print(range(0,4),5)

print([*range(1,4),5])
print([range(1,4),5])

print({5, *range(0,4)})
print({5, range(0,4)})


0 1 2 3 5
range(0, 4) 5
[1, 2, 3, 5]
[range(1, 4), 5]
{0, 1, 2, 3, 5}
{range(0, 4), 5}


### 2.5.3 嵌套拆包

In [9]:
## 元组嵌套拆包
metra_areas = [
    ('tokyo','JP',36.933,(35.689722,139.691667)),
    ('Delhi NCR','IN',21.935,(28.612889,77.208889)),
    ('Mexico ,City','MX',20.142,(19.433333,-99.133333)),
    ('New York-Newark','US',20.104,(40.808611,-74.020386)),
    ('Sao Paulo','BR',19.649,(-23.547778,-46.635833))
]

def main():
    print('{:15} | {:^9} | {:^9}'.format('','lat','long.'))
    fmt = '{:15} | {:^9.4f} | {:^9.4f}'
    for name, cc, pop, (latitude,longitude) in metra_areas:
        if longitude <= 0:
            print(fmt.format(name,latitude,longitude))
if __name__ == '__main__':
    main()

                |    lat    |   long.  
Mexico ,City    |  19.4333  | -99.1333 
New York-Newark |  40.8086  | -74.0204 
Sao Paulo       | -23.5478  | -46.6358 


In [18]:
text = '123456789'
print('{:5}'.format(text))

123456789


In [22]:
# 使用 collection.nametuple 建立一个具名元组
from collections import namedtuple

City = namedtuple('City', 'name country population coordinates')
tokyo = City('Tokyo','JP',36.933,(35.689    782,139.691667))

print(tokyo)
print(tokyo.population)
print(tokyo[0])

City(name='Tokyo', country='JP', population=36.933, coordinates=(35.689782, 139.691667))
36.933
Tokyo


### 2.6.3 序列模式匹配

In [None]:
# 析构，一种高级的拆包方法
# 假想的 Robot 类中的方法

def handle_command(self, message):
    match message: # match 后面的表达式是匹配对象，就是各个case语句尝试匹配的数据
        case ['BEEPER',frequency,times]: # 匹配一个含有3项的序列，第一项必须是 'BEEPER'，后面两项任意，会依次绑定到 frequency、time 上
            self.beep(times, frequency)
        case ['NECK',angle]: # 匹配含有任意两项，且第一项为'NECK'
            self.rotate_neck(angle)
        case ['LED',ident, intensity]: # 匹配3项的序列，第一项为'LED',如果项数不匹配，则匹配下一项
            self.leds[ident].set_brightness(ident,intensity)
        case ['LED',ident, red, green, blue]:
            self.leds[ident].set_color(ident,red,green, blue)
        case _: # 以上所有项都不匹配时执行
            raise InvalidCommand(message)

In [6]:
# 析构匹配
metra_areas = [
    ('tokyo','JP',36.933,(35.689722,139.691667)),
    ('Delhi NCR','IN',21.935,(28.612889,77.208889)),
    ('Mexico City','MX',20.142,(19.433333,-99.133333)),
    ('New York-Newark','US',20.104,(40.808611,-74.020386)),
    ('Sao Paulo','BR',19.649,(-23.547778,-46.635833))
]


def main():
    print('{:15}|{:^9}|{:^9}'.format('','lat','long'))
    fmt = '{:15}|{:^9.4f}|{:^9.4f}'
    for record in metra_areas:
        match record:
            case [name, _,_, (latitude, longitude)] if longitude <= 0:
                print(fmt.format(name, latitude, longitude))

if __name__ == '__main__':
    main()

               |   lat   |  long   
Mexico City    | 19.4333 |-99.1333 
New York-Newark| 40.8086 |-74.0204 
Sao Paulo      |-23.5478 |-46.6358 


## 2.7 切片

### 2.7.1 为什么切片和区间排除最后一项(即左闭右开)

In [1]:
l = [10,20,30,40,50,60]
print(l[:2])
print(l[2:])
print(l[:2])
print(l[2:])

[10, 20]
[30, 40, 50, 60]
[10, 20]
[30, 40, 50, 60]


### 2.7.2 切片对象

In [3]:
s = 'bicycle'
print(s[::3])
print(s[::-1])
print(s[::-2])

bye
elcycib
eccb


### 2.7.3 多维切片与省略号

In [1]:
# 多维切片，这里只展示常规的序列并不能满足多维切片
a = [1,2,3,4,5,6,7,8]

a[2:5]

[3, 4, 5]

In [6]:
a[0:7:2]

[1, 3, 5, 7]

In [2]:
a[1:3,4:7]

TypeError: list indices must be integers or slices, not tuple

In [8]:
# 省略号
my_list = [[1, 2, 3], [4, 5, 6], [7, 8, 9]]

# 使用 Ellipsis 来切片
result = my_list[1, ...]
print(result)  # 输出: [4, 5, 6]

# 或者使用省略号表示完整切片
result = my_list[1, :, ...]
print(result)  # 输出: [4, 5, 6]


TypeError: list indices must be integers or slices, not tuple

In [10]:
# 省略号
import numpy as np

array = np.array([[1, 2, 3], [4, 5, 6]])

print(array[...]) 
# [[1 2 3]
#  [4 5 6]]

print(array[..., 1])  
# [2 5]

print(array[0, ...])
# [1 2 3]

print(array[..., ::2])
# [[1 3]
#  [4 6]]


[[1 2 3]
 [4 5 6]]
[2 5]
[1 2 3]
[[1 3]
 [4 6]]


### 2.7.4 为切片赋值

In [15]:
a = [0,1,2,3,4,5,6,7,8]

a[2:4] = [20,30]
# [0,1,20,30,4,5,6,7,8]

del a[5:7]
# [0,1,20,30,4,7,8]

a[3::2] = [100,200]
# # [0,1,20,100,4,200,8]

a

[0, 1, 20, 100, 4, 200, 8]

## 2.8 使用 + 与 * 处理序列

In [17]:
# + 通常用来将同种序列拼接成一个新的同类型序列
a = [1,2,3]
b = [4,5,6]
a + b

# [1, 2, 3, 4, 5, 6]

[1, 2, 3, 4, 5, 6]

In [18]:
# * 通常用来多次拼接同一个序列
a = [1,2,3]
b = a * 3
b

# [1, 2, 3, 1, 2, 3, 1, 2, 3]

[1, 2, 3, 1, 2, 3, 1, 2, 3]

### 2.8.3 构建可变列表

In [20]:
# 下面构建的这个新列表中的三个子项，相当于每次添加的都是新的列表，三个子项并不相同
board = [['_'] * 3 for i in range(3)]
print(board)

board[1][2] = 'X'
print(board)

# 下面构建的这个新列表中的三个子项，都指向同一个列表，所以改了一个，3个全变
weird_board = [['_'] * 3] * 3
print(weird_board)
weird_board[1][2] = 'X'
print(weird_board)

[['_', '_', '_'], ['_', '_', '_'], ['_', '_', '_']]
[['_', '_', '_'], ['_', '_', 'X'], ['_', '_', '_']]
[['_', '_', '_'], ['_', '_', '_'], ['_', '_', '_']]
[['_', '_', 'X'], ['_', '_', 'X'], ['_', '_', 'X']]


### 2.8.2使用增量运算符处理序列

In [21]:
# 对于可变序列来说，使用 += 相当于对原有的对象进行扩展,对象ID不变
a = [1,2,3]
print(id(a))

b = [4,5,6]
a += b
print(id(a))


2292844216832
2292844216832


In [22]:
# 对于不可变序列来说，使用 += 相当于将得到的结果绑定到一个新的对象上，ID变化
a = (1,2,3)
print(id(a))

b = (4,5,6)
a += b
print(id(a))

2292844223040
2292843049888


### 2.8.3 一个 += 运算符赋值谜题

In [26]:
t = (1,2,[30,40])
t[2] += [10,20]

TypeError: 'tuple' object does not support item assignment

In [27]:
t[2]

[30, 40, 10, 20]

In [28]:
t

(1, 2, [30, 40, 10, 20])

In [32]:
# 查看字节码
import dis
t = (1, 2, [30,40]) 
a = 2
b = [10, 20]
def test():
    t[a] += b  

dis.dis(test)

  7           0 LOAD_GLOBAL              0 (t)
              2 LOAD_GLOBAL              1 (a)
              4 DUP_TOP_TWO
              6 BINARY_SUBSCR
              8 LOAD_GLOBAL              2 (b)
             10 INPLACE_ADD
             12 ROT_THREE
             14 STORE_SUBSCR
             16 LOAD_CONST               0 (None)
             18 RETURN_VALUE


## 2.9 list.sort() 与内置函数 sorted

In [33]:
fruits = ['grape', 'raspberry', 'apple', 'banana',]

print(sorted(fruits))
print(fruits)

print(sorted(fruits, reverse=True))
print(fruits)

print(sorted(fruits, key=len))
print(fruits)

print(sorted(fruits, key=len, reverse=True))
print(fruits)

print(fruits.sort())
print(fruits)

['apple', 'banana', 'grape', 'raspberry']
['grape', 'raspberry', 'apple', 'banana']
['raspberry', 'grape', 'banana', 'apple']
['grape', 'raspberry', 'apple', 'banana']
['grape', 'apple', 'banana', 'raspberry']
['grape', 'raspberry', 'apple', 'banana']
['raspberry', 'banana', 'grape', 'apple']
['grape', 'raspberry', 'apple', 'banana']
None
['apple', 'banana', 'grape', 'raspberry']


## 2.10 当列表不再适用

### 2.10.1 数组

In [9]:
# 创建、保存和加载一个大型浮点数组
from array import array # 导入array类型
from random import random

floats = array('d',(random() for i in range(10**7))) # 创建一个双精度浮点数数组，类型代码为d,长度为一千万
print(floats[-1]) # 输出最后一位
print(len(floats))

fp = open('floats.bin','wb')
floats.tofile(fp) #保存到二进制文件
fp.close()

float2 = array('d') # 创建一个存放双精度浮点数的空数组
fp = open('floats.bin','rb')
float2.fromfile(fp, 10**7) # 从这个文件中读取一千万个数
print(float2[-1])

print(floats == float2)

0.9551111980842537
10000000
0.9551111980842537
True


### 2.10.2 memoryview

In [6]:
from array import array
octets = array('B',range(6))

m1 = memoryview(octets)
print(m1.tolist())

m2 = m1.cast('B',[2,3])
print(m2.tolist())

m3 = m1.cast('B',[3,2])
print(m3.tolist())

m2[1,1] = 22
m3[1,1] = 33

print(m1.tolist())
print(octets)

[0, 1, 2, 3, 4, 5]
[[0, 1, 2], [3, 4, 5]]
[[0, 1], [2, 3], [4, 5]]
[0, 1, 2, 33, 22, 5]
array('B', [0, 1, 2, 33, 22, 5])


### 2.10.3 Numpy

In [10]:
import numpy as np
a = np.arange(12) # 一维数组，共12个元素
print(a)

print(a.shape)

print(a.shape)

a.shape = 3,4
print('增加一个维度','\n', a)

print('第三行','\n', a[2])

print('第二列','\n', a[: ,1])

print('a的转置', '\n', a.transpose())


[ 0  1  2  3  4  5  6  7  8  9 10 11]
(12,)
(12,)
增加一个维度 
 [[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]
第三行 
 [ 8  9 10 11]
第二列 
 [1 5 9]
a的转置 
 [[ 0  4  8]
 [ 1  5  9]
 [ 2  6 10]
 [ 3  7 11]]


In [15]:
import numpy as np
from random import random

floats_10M_lines = np.random.rand(10**7)
with open('floats-10M-lines.txt','w') as fp:
    for num in floats_10M_lines:
        fp.write(str(num) + '\n')

In [16]:
fp = open('floats-10M-lines.txt', 'r')
floats = np.loadtxt(fp)
print('Length', len(floats))

print(floats[-3:])

floats *= 5
print(floats[-3:])

Length 10000000
[0.97557426 0.75669806 0.62199622]
[4.87787132 3.78349028 3.10998109]


In [17]:
from time import perf_counter as pc

t0 = pc()
floats /= 3
print(pc() - t0)

0.007163600064814091


In [14]:
np.save('floats-10M',floats)

floats2 = np.load('floats-10M.npy', 'r+')
floats2 *= 6
print(floats2[-3:])

[0.01757648 0.43244756 0.10060515]


### 2.10.4 双端队列和其他队列

In [28]:
from collections import deque
dq = deque(range(10), maxlen=10)

print(dq)

dq.rotate(3) # 轮转，当n>0时，取右边的几项放到左边，当n<0时，取左边的几项放到右边

dq.rotate(-4) 
print(dq)

dq.appendleft(-1) # 向已满的deque对象的一段追加几项，则另一端就要丢弃几项
print(dq)

dq.extend([11,22,33]) # 在右边添加3项，把左端前三项丢弃掉
print(dq)

dq.extendleft([10,20,30,40]) # extendleft 会依此把参数中的各项追加到deque对象的左端，因此之前的位置顺序得到保留
print(dq)

deque([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], maxlen=10)
deque([1, 2, 3, 4, 5, 6, 7, 8, 9, 0], maxlen=10)
deque([-1, 1, 2, 3, 4, 5, 6, 7, 8, 9], maxlen=10)
deque([3, 4, 5, 6, 7, 8, 9, 11, 22, 33], maxlen=10)
deque([40, 30, 20, 10, 3, 4, 5, 6, 7, 8], maxlen=10)
