## NumPy
可以提供高效的数组运算

In [3]:
import numpy as np

In [4]:
my_arr = np.arange(100000)
my_list = list(range(100000))

%time for _ in range(10):my_arr2 = my_arr *2

CPU times: user 894 µs, sys: 449 µs, total: 1.34 ms
Wall time: 1.25 ms


In [5]:
%time for _ in range(10):my_list2 = [x*2 for x in my_list]

CPU times: user 64.2 ms, sys: 12.1 ms, total: 76.2 ms
Wall time: 75 ms


## ndarray 数组运算
把数组中每项都进行计算

In [6]:
## randn 生成服从标准正态分布的数组，数组维度由入参指定
data = np.random.randn(2,3)
data

array([[-0.05657265,  0.02131172, -0.83325287],
       [-1.12958607, -0.19666625, -0.43137555]])

In [7]:
data * 10

array([[ -0.56572647,   0.21311719,  -8.33252874],
       [-11.29586069,  -1.96666252,  -4.31375552]])

In [8]:
data + data

array([[-0.11314529,  0.04262344, -1.66650575],
       [-2.25917214, -0.3933325 , -0.8627511 ]])

In [9]:
## *会传递到每一个对应位置相乘
data * data

array([[3.20046435e-03, 4.54189382e-04, 6.94310353e-01],
       [1.27596469e+00, 3.86776146e-02, 1.86084867e-01]])

In [10]:
## 比较会产生bool数组
arr = np.zeros((2,3))
data > arr

array([[False,  True, False],
       [False, False, False]])

### shape属性，记录数组是几x几的

In [11]:
data.shape

(2, 3)

### dtype属性，记录数组数据类型

In [12]:
data.dtype

dtype('float64')

### ndim属性，记录是几维数组

In [13]:
data.ndim

2

## 数组生成
array(data,dtype),zeros(),ones(),empty()

In [14]:
data1 = [6,7.5,8,0,1]
arr1 = np.array(data1)
arr1

array([6. , 7.5, 8. , 0. , 1. ])

In [15]:
data2 = [[1,2,3,4],[5,6,7,8]]
## array中可以指定dtype
arr2 = np.array(data2,dtype = np.float64)
arr2

array([[1., 2., 3., 4.],
       [5., 6., 7., 8.]])

In [16]:
##传入元组，生成指定shape的0数组
arr3 = np.zeros((2,3,2))
arr3

array([[[0., 0.],
        [0., 0.],
        [0., 0.]],

       [[0., 0.],
        [0., 0.],
        [0., 0.]]])

### arange()  range的数组版

In [17]:
np.arange(3,15)

array([ 3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

### astype()转换数据类型
可使用np.float64，其他数组类型arr1.dtype，或者类型代码’u4‘来指定类型

In [18]:
arr = np.array([1,2,3,4])
arr.dtype

dtype('int64')

In [19]:
## astype会生成新的数组
float_arr = arr.astype(np.float64)
float_arr.dtype

dtype('float64')

## 索引和切片
### 一维数组索引和切片类似list

In [20]:
arr = np.arange(10)
arr[5]

5

In [21]:
## 切片是一个视图，赋值会改变原数组值
arr[3:5] = 12
arr

array([ 0,  1,  2, 12, 12,  5,  6,  7,  8,  9])

### 多维数组索引
可传入多个数轴上的索引

In [22]:
arr2d = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
print(arr2d.shape)
arr2d[2]

(3, 3)


array([7, 8, 9])

In [23]:
arr2d[2,2]

9

In [24]:
arr3d = np.array([[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]])
print(arr3d.shape)
arr3d[0,1]

(2, 2, 3)


array([4, 5, 6])

### 多维数组切片
可以进行多维度切片

In [25]:
arr2d

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [26]:
##逗号隔开多组切片，示例选择第2行，0～1列
arr2d[1,:2]

array([4, 5])

### 布尔索引
使用数组比较产生bool数组，～可以取反，生成的是数据的copy

In [27]:
## 假设每个名字对应data中的一行，目前要把Bob对应的这行筛选出来
names = np.array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe', 'Joe'])
data = np.random.randn(7, 4)
data

array([[ 0.54599483, -0.28809999, -0.25175556,  2.17376599],
       [-0.76669172, -1.73186146, -0.02494975, -1.56643029],
       [-1.16743507,  0.85230399, -0.63356802, -1.73074852],
       [ 0.23294186,  0.16521651,  0.65413949, -1.47505693],
       [ 0.26468026, -1.22309461, -1.15534695,  1.31905654],
       [ 0.1274056 ,  1.39042493, -1.43190315, -2.04847181],
       [ 0.1131956 , -0.60891906,  0.05195751, -1.17294929]])

In [28]:
## 数组的比较产生bool数组
names == 'Bob'

array([ True, False, False,  True, False, False, False])

In [29]:
data[names == 'Bob',2:]

array([[-0.25175556,  2.17376599],
       [ 0.65413949, -1.47505693]])

In [30]:
data[~(names == 'Bob')]

array([[-0.76669172, -1.73186146, -0.02494975, -1.56643029],
       [-1.16743507,  0.85230399, -0.63356802, -1.73074852],
       [ 0.26468026, -1.22309461, -1.15534695,  1.31905654],
       [ 0.1274056 ,  1.39042493, -1.43190315, -2.04847181],
       [ 0.1131956 , -0.60891906,  0.05195751, -1.17294929]])

In [31]:
## 可以使用&和|联合bool条件
mask = (names == 'Bob') | (names == 'Will')
data[mask]

array([[ 0.54599483, -0.28809999, -0.25175556,  2.17376599],
       [-1.16743507,  0.85230399, -0.63356802, -1.73074852],
       [ 0.23294186,  0.16521651,  0.65413949, -1.47505693],
       [ 0.26468026, -1.22309461, -1.15534695,  1.31905654]])

### 神奇索引
单个索引数组（注意使用方括号）

In [32]:
arr = np.random.randn(8,4)
arr

array([[ 0.28866948,  0.64414011,  1.77036844, -0.09532878],
       [-0.86034233,  1.64166593, -0.57925021, -0.01380849],
       [-0.16089355, -0.92046637, -1.7264932 ,  2.2814209 ],
       [ 0.91082281, -0.60557597,  0.21723949,  0.34202153],
       [ 1.46541423, -0.42960542,  0.26875151,  1.96524319],
       [ 0.3808915 ,  0.44927998, -0.54545898, -0.55889641],
       [ 0.5371292 , -0.68682898, -0.95841272,  0.54626779],
       [ 1.46587989,  0.97850988,  0.83476143, -0.14992236]])

In [33]:
## 挑选出指定行
arr[[4,3,0,6]]

array([[ 1.46541423, -0.42960542,  0.26875151,  1.96524319],
       [ 0.91082281, -0.60557597,  0.21723949,  0.34202153],
       [ 0.28866948,  0.64414011,  1.77036844, -0.09532878],
       [ 0.5371292 , -0.68682898, -0.95841272,  0.54626779]])

In [34]:
## 负索引从尾部选则
arr[[-2,-3,-5]]

array([[ 0.5371292 , -0.68682898, -0.95841272,  0.54626779],
       [ 0.3808915 ,  0.44927998, -0.54545898, -0.55889641],
       [ 0.91082281, -0.60557597,  0.21723949,  0.34202153]])

传入多个索引数组，选出的是(1,0)(5,3)(7,1)(2,2)四个位置的值，并不是矩形区域

In [35]:
arr[[1,5,7,2],[0,3,1,2]]

array([-0.86034233, -0.55889641,  0.97850988, -1.7264932 ])

In [36]:
## 实现矩形区域选择的一种办法
arr[[1,5,7,2]][:,[0,3,1,2]]

array([[-0.86034233, -0.01380849,  1.64166593, -0.57925021],
       [ 0.3808915 , -0.55889641,  0.44927998, -0.54545898],
       [ 1.46587989, -0.14992236,  0.97850988,  0.83476143],
       [-0.16089355,  2.2814209 , -0.92046637, -1.7264932 ]])

## 数组转置
### 二维数组转置T，对应矩阵转置

In [37]:
arr = np.arange(15).reshape(3,5)
arr

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

In [38]:
arr.T

array([[ 0,  5, 10],
       [ 1,  6, 11],
       [ 2,  7, 12],
       [ 3,  8, 13],
       [ 4,  9, 14]])

### transpose(坐标轴编号)，用于多维数组

In [39]:
arr = np.arange(16).reshape(2,2,4)
arr

array([[[ 0,  1,  2,  3],
        [ 4,  5,  6,  7]],

       [[ 8,  9, 10, 11],
        [12, 13, 14, 15]]])

In [40]:
## 把第一和第二轴交换
arr.transpose(1,0,2)

array([[[ 0,  1,  2,  3],
        [ 8,  9, 10, 11]],

       [[ 4,  5,  6,  7],
        [12, 13, 14, 15]]])

### swapaxes(一对轴号)
交换轴号，返回视图

In [41]:
arr

array([[[ 0,  1,  2,  3],
        [ 4,  5,  6,  7]],

       [[ 8,  9, 10, 11],
        [12, 13, 14, 15]]])

In [42]:
arr.swapaxes(1,2)

array([[[ 0,  4],
        [ 1,  5],
        [ 2,  6],
        [ 3,  7]],

       [[ 8, 12],
        [ 9, 13],
        [10, 14],
        [11, 15]]])

## 通用函数-快速逐元素数组函数
### 一元通用函数 sqrt()开方， exp() e的次方

In [43]:
arr = np.arange(10)
np.sqrt(arr)

array([0.        , 1.        , 1.41421356, 1.73205081, 2.        ,
       2.23606798, 2.44948974, 2.64575131, 2.82842712, 3.        ])

In [44]:
np.exp(arr)

array([1.00000000e+00, 2.71828183e+00, 7.38905610e+00, 2.00855369e+01,
       5.45981500e+01, 1.48413159e+02, 4.03428793e+02, 1.09663316e+03,
       2.98095799e+03, 8.10308393e+03])

### 二元通用函数 add()逐项相加， maximum()逐项取大

In [45]:
x = np.random.randn(8)
x

array([ 1.63396345, -1.86205108, -0.11677371,  1.27945869, -0.44200503,
        0.51934117, -1.29314446,  1.08988007])

In [46]:
y = np.random.randn(8)
y

array([-0.04334784,  0.90850878,  0.03992077,  0.0173435 ,  0.91289387,
       -0.52841324,  2.82996429, -1.0357315 ])

In [47]:
np.maximum(x,y)

array([1.63396345, 0.90850878, 0.03992077, 1.27945869, 0.91289387,
       0.51934117, 2.82996429, 1.08988007])

通用函数表

![4-3ufunc.png](attachment:4-3ufunc.png)

![4-3ufunc2.png](attachment:4-3ufunc2.png)

## 4.3 面向数组编程
### 条件逻辑作为数组操作：numpy.where(条件，x，y) x和y可以是数组和标量
numpy.where 是  x if condition else y的向量化版本

In [48]:
## cond为true取xarr，否则取yarr
xarr = np.array([1.1, 1.2, 1.3, 1.4, 1.5])
yarr = np.array([2.1, 2.2, 2.3, 2.4, 2.5])
cond = np.array([True, False, True, True, False])

In [50]:
## 列表推导式实现
result = [x if c else y for x,y,c in zip(xarr,yarr,cond)]
result

[1.1, 2.2, 1.3, 1.4, 2.5]

In [52]:
## np.where实现
result = np.where(cond,xarr,yarr)
result

array([1.1, 2.2, 1.3, 1.4, 2.5])

### 4.3.2 数学和统计方法
sum，mean(平均)，std(标准差)

In [53]:
arr = np.random.randn(5,4)
arr

array([[ 0.40903868, -1.66958906,  1.5607178 , -1.80038858],
       [-0.16233521,  0.16037589, -0.19097162, -0.39716896],
       [-0.1733622 , -1.37845775, -0.43112971,  0.46407822],
       [ 1.44059264, -0.56549783, -0.62003935, -0.40650865],
       [-0.08202249,  1.99164522, -0.2701388 , -1.51922556]])

In [55]:
arr.sum()

-3.640387314936996

In [56]:
np.mean(arr)

-0.1820193657468498

In [57]:
## sum 和 mean可以指定axis数轴，在这个轴方向上进行计算
arr.mean(axis = 1)

array([-0.37505529, -0.14752498, -0.37971786, -0.0378633 ,  0.03006459])

![4-3-2mean.jpg](attachment:4-3-2mean.jpg)

cunsum，cumprod累计函数，也可指定数轴

In [58]:
arr = np.arange(8)
arr

array([0, 1, 2, 3, 4, 5, 6, 7])

In [59]:
## 每个位置输出到这个位置为止的和/连乘
arr.cumsum()

array([ 0,  1,  3,  6, 10, 15, 21, 28])

In [60]:
arr = np.arange(9).reshape(3,3)
arr

array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

In [61]:
## 列轴方向上连乘
arr.cumprod(axis=1)

array([[  0,   0,   0],
       [  3,  12,  60],
       [  6,  42, 336]])

![4-3-2sta.png](attachment:4-3-2sta.png)

### 4.3.3 布尔值数组
数组方法中true被处理为1，false处理为0，所以可以用all() any()检查

In [63]:
arr = np.random.randn(100)
(arr>0).sum()

59

In [64]:
## 是否有true
bools = np.array([False, False, True, False])
bools.any()

True

In [65]:
## 是否全为true
bools.all()

False

### 4.3.4 排序 sort()

In [67]:
## 类似列表的sort()
arr = np.random.randn(5)
arr

array([ 1.04493218, -1.29969959, -0.98812305, -0.74004564,  2.10095322])

In [68]:
arr.sort()
arr

array([-1.29969959, -0.98812305, -0.74004564,  1.04493218,  2.10095322])

In [70]:
## 多维数组可指定axis
arr = np.random.randn(5,3)
arr

array([[ 0.49155471,  0.51685842, -0.20202667],
       [-1.29935148, -0.01567247,  1.298262  ],
       [-1.86589701, -1.09505662,  1.50146879],
       [ 1.62298654,  0.24441796,  1.56945246],
       [-0.11153859, -0.64031208, -0.26503029]])

In [71]:
## 按1轴排序
arr.sort(1)
arr

array([[-0.20202667,  0.49155471,  0.51685842],
       [-1.29935148, -0.01567247,  1.298262  ],
       [-1.86589701, -1.09505662,  1.50146879],
       [ 0.24441796,  1.56945246,  1.62298654],
       [-0.64031208, -0.26503029, -0.11153859]])

### 4.3.5 集合处理
对应python中的set进行理解 
np.unique把唯一值排序

In [72]:
names = np.array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe', 'Joe'])
np.unique(names)

array(['Bob', 'Joe', 'Will'], dtype='<U4')

np.inld(x,y) 计算x是否在y中，返回和x同型的bool数组

In [73]:
values = np.array([6, 0, 0, 3, 2, 5, 6])
np.in1d(values, [2, 3, 6])

array([ True, False, False,  True,  True, False,  True])

![4-3-5set.png](attachment:4-3-5set.png)

## 4.4 数组文件储存
np.save 和 np.load可以把数组储存在文件中

In [74]:
arr = np.arange(10)
np.save('some_array',arr)

In [76]:
np.load('some_array.npy')

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

np.savez(文件名，a=arr1，b=arr2)可以保存多个数组,savez_compressed()可以把数组存入压缩文件

In [77]:
arr2 = np.arange(10,20)
np.savez('array_archive.npz',a = arr,b = arr2)

In [78]:
arch = np.load('array_archive.npz')
arch['b']

array([10, 11, 12, 13, 14, 15, 16, 17, 18, 19])

## 4.5 线性代数
np.dot(x,y)相当于矩阵乘法