# Python科学运算2:矢量化操作与广播运算

In [1]:
# 导包
import numpy as np

## 矢量化操作
数据在不需要编写循环的情况下就可以进行批量运算，这就叫矢量化操作
### 加法

In [2]:
arr1 = np.array([1,2,3,4])
arr2 = np.array([5,6,7,8])
arr1 + arr2

array([ 6,  8, 10, 12])

In [3]:
arr1 = np.array([[1,2,3,4],[-3,2,4,9]])
arr2 = np.array([[5,6,7,8],[4,5,1,-10]])
arr1 + arr2

array([[ 6,  8, 10, 12],
       [ 1,  7,  5, -1]])

### 减法

In [4]:
arr1-arr2

array([[-4, -4, -4, -4],
       [-7, -3,  3, 19]])

### 乘法

In [5]:
arr1*arr2

array([[  5,  12,  21,  32],
       [-12,  10,   4, -90]])

In [6]:
5 * arr1

array([[  5,  10,  15,  20],
       [-15,  10,  20,  45]])

In [7]:
### 除法

In [8]:
arr1 / arr2

array([[ 0.2       ,  0.33333333,  0.42857143,  0.5       ],
       [-0.75      ,  0.4       ,  4.        , -0.9       ]])

### 基本的索引和切片操作

In [9]:
# 初始化
arr =  np.arange(10)
arr

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [10]:
# 索引第三个
arr[3]

3

In [11]:
# 访问4及以后的元素
arr[4:]

array([4, 5, 6, 7, 8, 9])

In [12]:
# 将一个标量的值赋这一个切片时，该值会自动的传播到整个区域，并且直接影响原数据的值
arr[0:4] = 9
arr

array([9, 9, 9, 9, 4, 5, 6, 7, 8, 9])

In [13]:
# 如果不想影响原数据操作
# 使用copy
a = arr.copy()
a

array([9, 9, 9, 9, 4, 5, 6, 7, 8, 9])

In [14]:
# 访问2维数组
arr3 = np.array([[1,2,3,4],[-3,2,4,9]])
arr3

array([[ 1,  2,  3,  4],
       [-3,  2,  4,  9]])

In [15]:
# 读取第一行
arr3[0]

array([1, 2, 3, 4])

In [16]:
# 读取第二行第四个
arr3[1][3]

9

In [17]:
# 第二种方式
arr3[1,3]

9

### 比较运算
- 在之前我们学习多条件运算时，我们使用的是and 和 or
- 使用numpy对象进行多条件的比较运算时，不能使用and和or
- 必须使用 &（and） 和 ｜（or）

In [18]:
names = np.array(['John','Tony','Jackie','Ben'])
names

array(['John', 'Tony', 'Jackie', 'Ben'], dtype='<U6')

In [19]:
names == "Tony"

array([False,  True, False, False])

In [20]:
# 选择 同时为 Tony 和 Ben
(names == "Tony") & (names == "Ben")

array([False, False, False, False])

In [21]:
# 选择 Tony 或 Ben
(names == "Tony") | (names == "Ben")

array([False,  True, False,  True])

### 花式索引（fancy indexing）
是一个NumPy中的术语，它指的是利用整数数组来进行索引的方式


In [22]:
# 声明一个8*4的空的列表
arr = np.empty((8,4))
arr

array([[-1.49166815e-154,  2.00389662e+000,  7.90505033e-323,
         0.00000000e+000],
       [ 6.93616764e-310,  1.16097020e-028,  7.25380779e-043,
         2.73417956e-052],
       [ 1.57177608e-076,  5.01163173e+217,  8.37170074e-144,
         9.59581405e-071],
       [ 3.88585636e-033,  7.42763976e-091,  2.57278426e+184,
         5.01163185e+217],
       [ 5.49419094e-143,  6.47287734e+170,  7.50189709e+247,
         1.35617292e+248],
       [ 2.74045673e+180,  8.41208666e+164,  1.04032465e-042,
         1.65612686e-076],
       [ 6.64437004e-066,  5.04621361e+180,  8.37170571e-144,
         9.30350598e+199],
       [ 5.60230216e-067,  2.10458520e-308,  6.93620996e-310,
         2.22507810e-308]])

In [23]:
# 给列表赋值
for i in range(8):
    arr[i] = i
arr

array([[0., 0., 0., 0.],
       [1., 1., 1., 1.],
       [2., 2., 2., 2.],
       [3., 3., 3., 3.],
       [4., 4., 4., 4.],
       [5., 5., 5., 5.],
       [6., 6., 6., 6.],
       [7., 7., 7., 7.]])

In [26]:
# 获取第5，4，1，7行
# 以一个特定的顺序来选取行中的子集，我们传入一个用于指定顺序的整数列表，或数组
# 这里也可以使用负数来进行索引
# -1 是最后一行
arr[[4,3,0,6,-1]]

array([[4., 4., 4., 4.],
       [3., 3., 3., 3.],
       [0., 0., 0., 0.],
       [6., 6., 6., 6.],
       [7., 7., 7., 7.]])

In [29]:
arr = np.arange(32)
arr

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31])

In [30]:
arr = np.arange(32).reshape((8,4))
arr

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15],
       [16, 17, 18, 19],
       [20, 21, 22, 23],
       [24, 25, 26, 27],
       [28, 29, 30, 31]])

In [33]:
# 选取第2，6，8，3行
arr[[1,5,7,2]]

array([[ 4,  5,  6,  7],
       [20, 21, 22, 23],
       [28, 29, 30, 31],
       [ 8,  9, 10, 11]])

In [32]:
# 选去第2行的第1个，第6行第4个，第8行第2个，第3行第三个
arr[[1,5,7,2],[0,3,1,2]]

array([ 4, 23, 29, 10])

In [43]:
# 行和列同时进行花式索引操作
arr[[1,5,7,2]][[1],[0,3,1]]

array([20, 23, 21])

In [41]:
arr[[1,5,7,2]][:,[0,3,1]]

array([[ 4,  7,  5],
       [20, 23, 21],
       [28, 31, 29],
       [ 8, 11,  9]])

In [51]:
arr[:,[0,3,1]]

array([[ 0,  3,  1],
       [ 4,  7,  5],
       [ 8, 11,  9],
       [12, 15, 13],
       [16, 19, 17],
       [20, 23, 21],
       [24, 27, 25],
       [28, 31, 29]])

### 转置操作

In [56]:
arr = np.arange(15).reshape((3,5))
arr

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

In [57]:
# 使用transpose 
arr.transpose()

array([[ 0,  5, 10],
       [ 1,  6, 11],
       [ 2,  7, 12],
       [ 3,  8, 13],
       [ 4,  9, 14]])

In [58]:
arr.T

array([[ 0,  5, 10],
       [ 1,  6, 11],
       [ 2,  7, 12],
       [ 3,  8, 13],
       [ 4,  9, 14]])

In [62]:
arr

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

### numpy的降维操作

In [65]:
arr = np.array([[1,100,1000],[2,200,2000],[3,300,3000]])
arr

array([[   1,  100, 1000],
       [   2,  200, 2000],
       [   3,  300, 3000]])

In [66]:
# 默认排序的降维
arr.ravel()


array([   1,  100, 1000,    2,  200, 2000,    3,  300, 3000])

In [67]:
arr.reshape(-1)

array([   1,  100, 1000,    2,  200, 2000,    3,  300, 3000])

In [68]:
arr.flatten()

array([   1,  100, 1000,    2,  200, 2000,    3,  300, 3000])

#### 改变排序模式的降维操作

In [69]:
arr.ravel(order = "F")

array([   1,    2,    3,  100,  200,  300, 1000, 2000, 3000])

In [70]:
arr.reshape(-1,order = "F")

array([   1,    2,    3,  100,  200,  300, 1000, 2000, 3000])

In [71]:
arr.flatten(order = "F")

array([   1,    2,    3,  100,  200,  300, 1000, 2000, 3000])

#### 降维以后值的修改

In [75]:
arr = np.array([[1,100,1000],[2,200,2000],[3,300,3000]])
arr.flatten()

array([   1,  100, 1000,    2,  200, 2000,    3,  300, 3000])

In [76]:
arr.flatten()[0] = 200

In [77]:
# flatten 方法降维之后对值修改不改变原数据
arr

array([[   1,  100, 1000],
       [   2,  200, 2000],
       [   3,  300, 3000]])

In [78]:
# ravel 方法降维之后对值修改改变原数据
arr.ravel()[1] = 1000
arr

array([[   1, 1000, 1000],
       [   2,  200, 2000],
       [   3,  300, 3000]])

In [79]:
arr.reshape(-1)

array([   1, 1000, 1000,    2,  200, 2000,    3,  300, 3000])

In [81]:
# reshape 本质还是形状的改变，同时降维后改变值也修改原数据
arr.reshape(-1)[2] = 50000 
arr

array([[    1,  1000, 50000],
       [    2,   200,  2000],
       [    3,   300,  3000]])

### NumPy 堆叠操作

In [83]:
arr1 = np.array([[1,100,1000],[2,200,2000],[3,300,3000]])
arr1

array([[   1,  100, 1000],
       [   2,  200, 2000],
       [   3,  300, 3000]])

In [84]:
arr2 = np.array([[-12,321,3332]])
arr2

array([[ -12,  321, 3332]])

In [86]:
# 纵向堆叠
np.vstack([arr1,arr2])

array([[   1,  100, 1000],
       [   2,  200, 2000],
       [   3,  300, 3000],
       [ -12,  321, 3332]])

In [87]:
np.row_stack([arr1,arr2])

array([[   1,  100, 1000],
       [   2,  200, 2000],
       [   3,  300, 3000],
       [ -12,  321, 3332]])

In [88]:
arr3 = np.array([[5],[6],[7]])
arr3

array([[5],
       [6],
       [7]])

In [90]:
# 横线堆叠
np.hstack([arr1,arr3])

array([[   1,  100, 1000,    5],
       [   2,  200, 2000,    6],
       [   3,  300, 3000,    7]])

In [91]:
np.column_stack([arr1,arr3])

array([[   1,  100, 1000,    5],
       [   2,  200, 2000,    6],
       [   3,  300, 3000,    7]])

## 广播运算

In [99]:
arr1 = np.arange(12).reshape(3,4)
arr1

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [100]:
arr2 = np.arange(101,113).reshape(3,4)
arr2

array([[101, 102, 103, 104],
       [105, 106, 107, 108],
       [109, 110, 111, 112]])

In [102]:
# 这不是广播运算，因为其纬度一致
arr1+arr2

array([[101, 103, 105, 107],
       [109, 111, 113, 115],
       [117, 119, 121, 123]])

In [103]:
arr3 = np.arange(60).reshape(5,4,3)
arr3

array([[[ 0,  1,  2],
        [ 3,  4,  5],
        [ 6,  7,  8],
        [ 9, 10, 11]],

       [[12, 13, 14],
        [15, 16, 17],
        [18, 19, 20],
        [21, 22, 23]],

       [[24, 25, 26],
        [27, 28, 29],
        [30, 31, 32],
        [33, 34, 35]],

       [[36, 37, 38],
        [39, 40, 41],
        [42, 43, 44],
        [45, 46, 47]],

       [[48, 49, 50],
        [51, 52, 53],
        [54, 55, 56],
        [57, 58, 59]]])

In [104]:
arr4 = np.arange(12).reshape(4,3)
arr4

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11]])

#### 纬度不一致，但是末尾的纬度值是一致的

In [107]:
arr3 + arr4

array([[[ 0,  2,  4],
        [ 6,  8, 10],
        [12, 14, 16],
        [18, 20, 22]],

       [[12, 14, 16],
        [18, 20, 22],
        [24, 26, 28],
        [30, 32, 34]],

       [[24, 26, 28],
        [30, 32, 34],
        [36, 38, 40],
        [42, 44, 46]],

       [[36, 38, 40],
        [42, 44, 46],
        [48, 50, 52],
        [54, 56, 58]],

       [[48, 50, 52],
        [54, 56, 58],
        [60, 62, 64],
        [66, 68, 70]]])

In [108]:
arr5 = np.arange(4).reshape(4,1)
arr5

array([[0],
       [1],
       [2],
       [3]])

In [109]:
arr3 + arr5

array([[[ 0,  1,  2],
        [ 4,  5,  6],
        [ 8,  9, 10],
        [12, 13, 14]],

       [[12, 13, 14],
        [16, 17, 18],
        [20, 21, 22],
        [24, 25, 26]],

       [[24, 25, 26],
        [28, 29, 30],
        [32, 33, 34],
        [36, 37, 38]],

       [[36, 37, 38],
        [40, 41, 42],
        [44, 45, 46],
        [48, 49, 50]],

       [[48, 49, 50],
        [52, 53, 54],
        [56, 57, 58],
        [60, 61, 62]]])

In [112]:
# 自动补齐
arr1 = np.arange(12).reshape(4,3)
arr1

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11]])

In [113]:
arr2 = np.array([1,2,3])
arr2

array([1, 2, 3])

In [114]:
arr1 + arr2

array([[ 1,  3,  5],
       [ 4,  6,  8],
       [ 7,  9, 11],
       [10, 12, 14]])

#### 不能广播运算的情况

In [116]:
arr1 = np.arange(60).reshape(5,4,3)
arr1

array([[[ 0,  1,  2],
        [ 3,  4,  5],
        [ 6,  7,  8],
        [ 9, 10, 11]],

       [[12, 13, 14],
        [15, 16, 17],
        [18, 19, 20],
        [21, 22, 23]],

       [[24, 25, 26],
        [27, 28, 29],
        [30, 31, 32],
        [33, 34, 35]],

       [[36, 37, 38],
        [39, 40, 41],
        [42, 43, 44],
        [45, 46, 47]],

       [[48, 49, 50],
        [51, 52, 53],
        [54, 55, 56],
        [57, 58, 59]]])

In [117]:
arr2 = np.arange(8).reshape(4,2)
arr2

array([[0, 1],
       [2, 3],
       [4, 5],
       [6, 7]])

In [118]:
arr1+ arr2

ValueError: operands could not be broadcast together with shapes (5,4,3) (4,2) 

## 条件逻辑转数组
- x if condition else y 
- 当条件condition成立的时候，表达式返回的值是x,不成立的时候返回的是y
- np.where 一般用于根据一个数组产生一个新的数组

In [120]:
arr1 = np.array([1.1,1.2,1.3])
arr2 = np.array([7.1,2.4,3.5])
condition = np.array([True,False,True])

# zip的参数是可迭代的对象
# zip就会将对象中对应的元素打包成一个个的元组，然后返回由这些元组组成的列表
result = [(x if c else y) for x,y,c in zip(arr1,arr2,condition)]
result

[1.1, 2.4, 1.3]

In [121]:
# 使用where
result = np.where(condition,arr1,arr2)
result

array([1.1, 2.4, 1.3])

### 值的替换

In [122]:
# 随机生成
arr = np.random.randn(4,4)
arr

array([[-0.49443894, -0.13046277,  0.61851392, -0.54658335],
       [-0.52188691, -1.32530799, -0.83592973,  0.44704209],
       [ 1.36588911,  0.47763326, -0.170995  ,  0.2513003 ],
       [-0.26673873, -1.24067555,  0.63468353, -0.4797104 ]])

In [124]:
# np.where 支持3个参数，第一个参数是条件
# 第二个参数是条件成立的时候返回的值
# 第三个参数是条件不成立的时候返回的值
result = np.where(arr > 0.2,2,-2)
result

array([[-2, -2,  2, -2],
       [-2, -2, -2,  2],
       [ 2,  2, -2,  2],
       [-2, -2,  2, -2]])

In [125]:
result = np.where(arr > 0.2,2,arr)
result

array([[-0.49443894, -0.13046277,  2.        , -0.54658335],
       [-0.52188691, -1.32530799, -0.83592973,  2.        ],
       [ 2.        ,  2.        , -0.170995  ,  2.        ],
       [-0.26673873, -1.24067555,  2.        , -0.4797104 ]])

# 下一节：时间戳