# numpy array
在内存空间中连续存储，能够实现连续快速访问。

In [7]:
import numpy as np
# 连续对象的内存
my_arr = np.arange(1000000)
print(my_arr)
# 需要反复寻址的内存
my_list = list(range(1000000))
print(my_arr)
# 比较计算耗时
# array 23ms
%time for _ in range(10) : my_arr = my_arr *2
# list 932ms 
%time for _ in range(10) : my_list = [x*2 for x in my_list]   

[     0      1      2 ... 999997 999998 999999]
[     0      1      2 ... 999997 999998 999999]
Wall time: 23 ms
Wall time: 932 ms


## 创建N 维数组 ndarray

In [25]:
# 使用 list 创建 N维数组
data1 = [6.5, 7, 8, 0, 1]
arr1 = np.array(data1)
# 浮点数
arr1

array([6.5, 7. , 8. , 0. , 1. ])

In [26]:
data2 = [[1,2,3,4],[5,6,7,8]]
arr2 = np.array(data2)
arr2

array([[1, 2, 3, 4],
       [5, 6, 7, 8]])

## array ndim shape dtype
数组维度，形状，元素类型

In [27]:
# 数组维数
print(arr2.ndim) # 2
# 数组形状
print(arr2.shape) # (2,4)
# 数组元素类型
print(arr1.dtype) # float64
print(arr2.dtype) # int32

2
(2, 4)
float64
int32


In [41]:
# 创建全零数组
print(np.zeros(10))
# 创建多维数组
print(np.zeros((3,6)))
print('--------------')
print(np.zeros([2,3,4]))

[0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
[[0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0.]]
--------------
[[[0. 0. 0. 0.]
  [0. 0. 0. 0.]
  [0. 0. 0. 0.]]

 [[0. 0. 0. 0.]
  [0. 0. 0. 0.]
  [0. 0. 0. 0.]]]


In [42]:
# 创建一维整数数组 array + range(15)
np.arange(15)
# np.arange(0,15)

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

创建 array 的方法总结
- array 将参数转成 ndarray
- asarray 将参数转成 ndarray，如果参数已经是ndarray则不操作
- arange 类似range
- ones,ones_like 生成全1数组
- zeros,zeros_like 生成全0数组
- empty, empty_like 分配内存空间，但不赋值（注意赋值前不要参与运算）
- full,full_like 分配内存空间，赋值成指定值
- eye,identity 生成单位矩阵(对角线全为1)

array 的元素类型
- 整数 int8,uint8,int16,uint16,int32,uint32,int64,uint64(i1-i8,u1-u8)
- 浮点数 float16,float32,float64,float128(f2-16,f/d/g)
- 复数 complex64,complex128,complex256(c8-c32)
- bool (?)
- object (O)
- string_ (S)
- unicode_ (U)

In [45]:
arr = np.array([1,2,3,4,5])
print(arr.dtype)
# 数组元素格式转换
float_arr = arr.astype(np.float64)
print(float_arr.dtype)


int32
float64


In [51]:
## array 算术操作
arr = np.array([[1.,2.,3.],[4.,5.,6.]])
print(arr)
# 乘法
print(arr * arr)
# 减法 
print(arr - arr)    
# 除法
print(1/arr)
# 乘方
print(arr ** 0.5)
# 比较2个 array 大小
arr2 = np.array( [[0.,4.,1.],[7.,2.,12.]])
res = arr2 > arr
print(res)
print(res.dtype)

[[1. 2. 3.]
 [4. 5. 6.]]
[[ 1.  4.  9.]
 [16. 25. 36.]]
[[0. 0. 0.]
 [0. 0. 0.]]
[[1.         0.5        0.33333333]
 [0.25       0.2        0.16666667]]
[[1.         1.41421356 1.73205081]
 [2.         2.23606798 2.44948974]]
[[False  True False]
 [ True False  True]]
bool


## array indexing 数组索引
- 与列表一样取数
- 可以赋值

In [57]:
arr = np.arange(10)
print(arr)
# 取数，从0开始索引
print(arr[5])
print(arr[5:8])
# 赋值
arr[5:8]=12
print(arr)

[0 1 2 3 4 5 6 7 8 9]
5
[5 6 7]
[ 0  1  2  3  4 12 12 12  8  9]


## array slice 数组切片
- 切片类似视图(view)，修改切片会修改原值
- 浅复制与深复制 [numpy 中的 copy 问题详解](https://blog.csdn.net/u010099080/article/details/59111207)

In [58]:
# 引用(指针) -完全不复制
arr_b = arr
print(arr_b)
print(id(arr_b) == id(arr))
# 切片 - 浅复制（数据由arr托管）
arr_slice = arr[5:8]
print(arr_slice)
print(id(arr_slice) == id(arr))
# 修改切片会修改原值
arr_slice[:]=64
print(arr)
arr[5]=24
print(arr_slice)
# 深复制（数据独立）
arr_deepcopy = arr.copy()
arr_deepcopy[:]=65
print(arr)
arr[5]=64
print(arr_deepcopy)

[ 0  1  2  3  4 12 12 12  8  9]
True
[12 12 12]
False
[ 0  1  2  3  4 64 64 64  8  9]
[24 64 64]
[ 0  1  2  3  4 24 64 64  8  9]
[65 65 65 65 65 65 65 65 65 65]


## array index 多维索引

In [59]:
arr2d = np.array([[1,2,3],[4,5,6],[7,8,9]])
# 结果为 1 维向量
print(arr2d[0])
# 2 种索引方法均可
print(arr2d[0][2])
print(arr2d[0,2])

arr3d = np.array([[[1,2,3],[4,5,6]],[[7,8,9],[10,11,12]]])
# 结果为 2 维向量
print(arr3d[0]) 
print(arr3d[0,0,2])  

[1 2 3]
3
3
[[1 2 3]
 [4 5 6]]
3


## array slicing 切片后的数组维度
- 矩阵退化成向量
- 注意数组维度，容易引起bug

In [70]:
arr2d = np.array([[1,2,3],[4,5,6],[7,8,9]])
print(arr2d)
# 第[0,2)行，[1-2]列
print(arr2d[:2,1:])
print(arr2d[:2,:1].shape)
# 一维向量(3,) 与二维矩阵(1,3)
# 第二行
print(arr2d[2])
print(arr2d[2].shape)
print(arr2d[2].ndim)
# 第二行
print(arr2d[2,:])
print(arr2d[2,:].shape)
print(arr2d[2,:].ndim)

# 第[2-end)行，[0-2]列
print(arr2d[2:,:])
print(arr2d[2:,:].shape)
print(arr2d[2:,:].ndim)
# 第[0-2]列，[2-end)行
print(arr2d[:,2:])
print(arr2d[:,2:].shape)
print(arr2d[:,2:].ndim)

# 一维向量(2,) 与二维矩阵(1,2)
# 第1行，[0-2)列
print(arr2d[1,:2])
print(arr2d[1,:2].shape)
print(arr2d[1,:2].ndim)
# 第[1-2)行，[0-2)列
print(arr2d[1:2,:2])
print(arr2d[1:2,:2].shape)
print(arr2d[1:2,:2].ndim)

[[1 2 3]
 [4 5 6]
 [7 8 9]]
[[2 3]
 [5 6]]
(2, 1)
[7 8 9]
(3,)
1
[7 8 9]
(3,)
1
[[7 8 9]]
(1, 3)
2
[[3]
 [6]
 [9]]
(3, 1)
2
[4 5]
(2,)
1
[[4 5]]
(1, 2)
2


## boolean indexing 
使用布尔向量作为索引

In [72]:
# 制作布尔向量
names = np.array(['Bob','Joe','Will','Bob','Will','Joe','Joe'])
names_bool = (names == 'Bob')
print(names_bool)
print(names_bool.dtype)
# 使用布尔向量作为索引
data = np.random.randn(7,4)
print(data)
# 取第0行和第4行
print(data[names_bool])

[ True False False  True False False False]
bool
[[ 1.12085796 -0.3753818   0.75534541  0.19234998]
 [ 0.10539093  0.41057297 -2.04288387 -0.24652568]
 [-0.33218234  0.19234378  0.06033334  0.38900337]
 [-0.06307636  0.98194088  0.88782765  1.71489731]
 [-0.36058871 -0.17192994  0.10967564 -0.55793018]
 [-0.31479687 -1.03531499 -0.31328639 -1.13556865]
 [ 0.59565209  0.41933027  0.10175083  0.36983017]]
[[ 1.12085796 -0.3753818   0.75534541  0.19234998]
 [-0.06307636  0.98194088  0.88782765  1.71489731]]


## fancy indexing 
迷之索引，万能索引！

In [83]:
# 按指定顺序选择某几行
arr = np.empty((8,4))
for i in range(8):
    arr[i] = i
print(arr)
print('----------------------')
row_choose = [4,3,0,6]
print(arr[row_choose])
print('----------------------')
# 按指定顺序选择哪行哪列
arr = np.arange(32).reshape(8,4)
print(arr)
print('----------------------')
row_column = ([1,5,7,2],[0,3,1,2])
print(arr[row_column])
print('----------------------')
# 按指定顺序选择哪几行，并将列按指定顺序替换
#row = [[1,5,7,2]]
# column = [:,[0,3,1,2]]
print(arr[[1,5,7,2]][:,[0,3,1,2]])

[[0. 0. 0. 0.]
 [1. 1. 1. 1.]
 [2. 2. 2. 2.]
 [3. 3. 3. 3.]
 [4. 4. 4. 4.]
 [5. 5. 5. 5.]
 [6. 6. 6. 6.]
 [7. 7. 7. 7.]]
----------------------
[[4. 4. 4. 4.]
 [3. 3. 3. 3.]
 [0. 0. 0. 0.]
 [6. 6. 6. 6.]]
----------------------
[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]
 [12 13 14 15]
 [16 17 18 19]
 [20 21 22 23]
 [24 25 26 27]
 [28 29 30 31]]
----------------------
[ 4 23 29 10]
----------------------
[[ 4  7  5  6]
 [20 23 21 22]
 [28 31 29 30]
 [ 8 11  9 10]]


## transpose/swapaxes 交换坐标轴
- ndarray.transpose(*axes)
- ndarray.swapaxes(axis1,axis2)

In [91]:
arr = np.arange(16).reshape(2,2,4)
print(arr)
print('-'*20)
print(arr.transpose(1,0,2))
print('-'*20)
print(arr.transpose(1,2,0))
print('-'*20)
print(arr.transpose(2,0,1))
print('-'*20)
print(arr.transpose(2,1,0))
print('-'*20)
# default [0,1,...,n-1,n] => [n,n-1,...,1,0]
print(arr.transpose())
print('-'*20)
print(arr.swapaxes(0,1))
print('-'*20)
print(arr.swapaxes(1,2))

[[[ 0  1  2  3]
  [ 4  5  6  7]]

 [[ 8  9 10 11]
  [12 13 14 15]]]
--------------------
[[[ 0  1  2  3]
  [ 8  9 10 11]]

 [[ 4  5  6  7]
  [12 13 14 15]]]
--------------------
[[[ 0  8]
  [ 1  9]
  [ 2 10]
  [ 3 11]]

 [[ 4 12]
  [ 5 13]
  [ 6 14]
  [ 7 15]]]
--------------------
[[[ 0  4]
  [ 8 12]]

 [[ 1  5]
  [ 9 13]]

 [[ 2  6]
  [10 14]]

 [[ 3  7]
  [11 15]]]
--------------------
[[[ 0  8]
  [ 4 12]]

 [[ 1  9]
  [ 5 13]]

 [[ 2 10]
  [ 6 14]]

 [[ 3 11]
  [ 7 15]]]
--------------------
[[[ 0  8]
  [ 4 12]]

 [[ 1  9]
  [ 5 13]]

 [[ 2 10]
  [ 6 14]]

 [[ 3 11]
  [ 7 15]]]
--------------------
[[[ 0  1  2  3]
  [ 8  9 10 11]]

 [[ 4  5  6  7]
  [12 13 14 15]]]
--------------------
[[[ 0  4]
  [ 1  5]
  [ 2  6]
  [ 3  7]]

 [[ 8 12]
  [ 9 13]
  [10 14]
  [11 15]]]
