# NumPy
**First install numpy**
```
$ pip install numpy
```

## What is NumPy?
* NumPy is a multi-dimensional array library.
* NumPy is much faster than list.

## Why is NumPy faster? 
* Fixed Type so it can read less bytes of memory faster.
* Contiguous Memory
    * In list, it just contains pointers while the information is scattered around.
    * In NumPy, it uses contigous memory.

In [60]:
import numpy as np

## Basics

In [61]:
a = np.array([1, 2, 3]) # 将一个列表变为numpy数组 一维
a

array([1, 2, 3])

In [62]:
b = np.array([[9.0, 8.0, 7.0], [6.0, 5.0, 4.0]]) # 将一个列表变为numpy数组 二维，同理更高维度的
print(b)

[[9. 8. 7.]
 [6. 5. 4.]]


In [63]:
# get dimension
a.ndim

1

In [64]:
# get shape
b.shape # 矩阵2*3

(2, 3)

In [65]:
a.shape # 矩阵3*1

(3,)

In [66]:
c = np.array([[[1], [2]]])
c.shape

(1, 2, 1)

In [67]:
# get type
a.dtype

dtype('int64')

In [68]:
# 我们也可以指定数据类型
d = np.array([1, 2, 3], dtype="int16")
d.dtype

dtype('int16')

In [69]:
# get size
a.itemsize # a的元素大小，64bit/8=8Byte

8

In [70]:
# get total size
print(c.size) # 元素的个数
print(c.itemsize) # 每个元素的大小
print(c.nbytes) # nbytes = size * itemsize

2
8
16


## Accessing/Changing specific elements, rows, columns, etc

In [71]:
a = np.array([[1, 2, 3, 4, 5, 6, 7], [8, 9, 10, 11, 12, 13, 14]])
print(a)

[[ 1  2  3  4  5  6  7]
 [ 8  9 10 11 12 13 14]]


In [72]:
# get a specific element [r, c]
print(a[1, 5])
print(a[1, -2])

13
13


In [73]:
# get a specific row
a[0, :]

array([1, 2, 3, 4, 5, 6, 7])

In [74]:
# get a specific column
a[:, 2]

array([ 3, 10])

In [75]:
# 按照步长获取数据 [startindex:endindex:stepsize]
a[0, 1:6:2] # 第0行，按照传统方式获取数据

array([2, 4, 6])

In [76]:
# change specific element
a[1, 5] = 66
print(a)

[[ 1  2  3  4  5  6  7]
 [ 8  9 10 11 12 66 14]]


In [77]:
# change specific column
a[:, 2] = 5 # 第2列全变成5
print(a)

a[:, 2] = [1, 6] # 第2列变成指定的数字
print(a)

[[ 1  2  5  4  5  6  7]
 [ 8  9  5 11 12 66 14]]
[[ 1  2  1  4  5  6  7]
 [ 8  9  6 11 12 66 14]]


## Initializing Different Types of Arrays

In [78]:
# All 0s matrix
np.zeros((2, 3)) # 默认数据类型为float64

array([[0., 0., 0.],
       [0., 0., 0.]])

In [79]:
# All 1s matrix
np.ones((4, 2, 2), dtype="int32") # 指定数据类型

array([[[1, 1],
        [1, 1]],

       [[1, 1],
        [1, 1]],

       [[1, 1],
        [1, 1]],

       [[1, 1],
        [1, 1]]], dtype=int32)

In [80]:
# Any other number
np.full((2, 2), 99) # 第一个参数是shape，第二个参数是填充的值

array([[99, 99],
       [99, 99]])

In [81]:
# Any other number full_like
e = np.array([[1, 2, 3], [4, 5, 6]])
np.full_like(e, 4) # 将形状与e矩阵相同的矩阵全部填充4

array([[4, 4, 4],
       [4, 4, 4]])

In [82]:
# Random decimal numbers
np.random.rand(4, 2, 3) # 参数为shape

array([[[0.05467677, 0.59504076, 0.54582189],
        [0.21332505, 0.19627247, 0.69312302]],

       [[0.93690575, 0.60072122, 0.38624711],
        [0.85836911, 0.83008956, 0.55707837]],

       [[0.92375594, 0.23267472, 0.59820304],
        [0.23827142, 0.12157275, 0.48853298]],

       [[0.19159851, 0.95180931, 0.09949446],
        [0.11355956, 0.49160934, 0.08703488]]])

In [83]:
# Random Integer values
np.random.randint(2, 9, size=(2, 3)) # 前两个为随机数范围，size为shape

array([[6, 7, 4],
       [6, 2, 4]])

In [84]:
# The identity matrix
np.identity(3, dtype="int16") # 单位矩阵

array([[1, 0, 0],
       [0, 1, 0],
       [0, 0, 1]], dtype=int16)

In [85]:
# Repeat an array
arr = np.array([[1, 2, 3]])
r1 = np.repeat(arr, 3, axis=0) # 3-重复次数，axis=0-在0轴上进行重复（即1维上）
print(r1)

[[1 2 3]
 [1 2 3]
 [1 2 3]]


In [86]:
output = np.ones((5, 5))
print(output)

z = np.zeros((3, 3))
z[1, 1] = 9
print(z)

output[1:4, 1:4] = z
print(output)

[[1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1.]]
[[0. 0. 0.]
 [0. 9. 0.]
 [0. 0. 0.]]
[[1. 1. 1. 1. 1.]
 [1. 0. 0. 0. 1.]
 [1. 0. 9. 0. 1.]
 [1. 0. 0. 0. 1.]
 [1. 1. 1. 1. 1.]]


## Be careful when copying arrays

In [87]:
# 单纯等号拷贝，是指针拷贝，不是值拷贝
a = np.array([1, 2, 3])
b = a
b[0] = 100
print('a =', a)
# 值拷贝需要使用方法 copy()
c = a.copy()
c[1] = 100
print('c =', c)
print('a =', a)

a = [100   2   3]
c = [100 100   3]
a = [100   2   3]


## Mathematics

In [88]:
a = np.array([1, 2, 3, 4])
print(a)

[1 2 3 4]


In [89]:
print('a + 2 =', a + 2)
print('a - 2 =', a - 2)
print('a * 2 =', a * 2)
print('a / 2 =', a / 2)

a + 2 = [3 4 5 6]
a - 2 = [-1  0  1  2]
a * 2 = [2 4 6 8]
a / 2 = [0.5 1.  1.5 2. ]


In [90]:
b = np.array([1, 0, 1, 0])
print(a + b)

[2 2 4 4]


In [91]:
a ** 2

array([ 1,  4,  9, 16])

In [92]:
# take the sin
np.sin(a)

array([ 0.84147098,  0.90929743,  0.14112001, -0.7568025 ])

In [93]:
# take the cos
np.cos(a)

array([ 0.54030231, -0.41614684, -0.9899925 , -0.65364362])

### For more https://numpy.org/doc/stable/reference/routines.math.html#rounding
关于数学基本运算

## Linear Algebra

In [94]:
# 矩阵乘法
a = np.ones((2, 3))
print(a)

b = np.full((3, 2), 2)
print(b)

np.matmul(a, b)

[[1. 1. 1.]
 [1. 1. 1.]]
[[2 2]
 [2 2]
 [2 2]]


array([[6., 6.],
       [6., 6.]])

In [95]:
# 行列式的值
c = np.identity(3)
np.linalg.det(c)

1.0

### For more https://numpy.org/doc/stable/reference/routines.linalg.html#rounding
关于线性代数运算

## Statistics 统计

In [96]:
stats = np.array([[1, 2, 3], [4, 5, 6]])
stats

array([[1, 2, 3],
       [4, 5, 6]])

In [97]:
print(np.min(stats)) # 所有元素中的最小值
print(np.min(stats, axis=0)) # 按照第0维的最小值
print(np.min(stats, axis=1)) # 按照第1维的最小值

1
[1 2 3]
[1 4]


In [98]:
np.max(stats)

6

In [99]:
print(np.sum(stats)) # 所有元素的和
print(np.sum(stats, axis=0)) # 按照第0位求和

21
[5 7 9]


## Reorganizing Arrays

In [100]:
# 相同的数据，不同的shape
before = np.array([[1, 2, 3, 4], [5, 6, 7, 8]])
print(before)

after = before.reshape((8, 1))
print(after)

after_1 = before.reshape((2, 2, 2))
print(after_1)

[[1 2 3 4]
 [5 6 7 8]]
[[1]
 [2]
 [3]
 [4]
 [5]
 [6]
 [7]
 [8]]
[[[1 2]
  [3 4]]

 [[5 6]
  [7 8]]]


In [101]:
# Vertically stacking vectors 垂直叠堆向量
v1 = np.array([1, 2, 3, 4])
v2 = np.array([5, 6, 7, 8])
np.vstack([v1, v2, v2, v2])

array([[1, 2, 3, 4],
       [5, 6, 7, 8],
       [5, 6, 7, 8],
       [5, 6, 7, 8]])

In [102]:
# Horizontal stack 水平向量叠堆
h1 = np.ones((2, 4))
h2 = np.zeros((2, 2))

np.hstack([h1, h2])

array([[1., 1., 1., 1., 0., 0.],
       [1., 1., 1., 1., 0., 0.]])

## Miscellaneous 其他的

### Load Data from File

In [103]:
filedata = np.genfromtxt('data.txt', delimiter=',') # 按照','分割
print(filedata.astype('int32')) # 将数据按照所希望的格式使用，数据格式不会改变
print(filedata)

[[ 1  2  3  4  5 99 49]
 [ 6  7  8  9 10 98 99]
 [11 12 13 14 15 97 98]]
[[ 1.  2.  3.  4.  5. 99. 49.]
 [ 6.  7.  8.  9. 10. 98. 99.]
 [11. 12. 13. 14. 15. 97. 98.]]


### Boolean Masking and Advanced Indexing 布尔值判断和高级索引

In [104]:
filedata > 50 # 布尔判断

array([[False, False, False, False, False,  True, False],
       [False, False, False, False, False,  True,  True],
       [False, False, False, False, False,  True,  True]])

In [105]:
filedata[filedata > 50] # 高级索引

array([99., 98., 99., 97., 98.])

In [106]:
# you can index with a list in numpy 在numpy中可以使用list作为索引
a = np.array([1,2,3,4,5,6,7,8,9])
a[[7,1,8]]

array([8, 2, 9])

In [107]:
np.any(filedata > 50, axis=0) # 在第0维判断是否有元素大于50

array([False, False, False, False, False,  True,  True])

In [108]:
np.all(filedata > 50, axis=0) # 在第0位判断是否所有元素大于50

array([False, False, False, False, False,  True, False])

In [110]:
(filedata > 50) & (filedata < 100) # 将两个bool数组并起来，逻辑位运算符

array([[False, False, False, False, False,  True, False],
       [False, False, False, False, False,  True,  True],
       [False, False, False, False, False,  True,  True]])

In [111]:
filedata

array([[ 1.,  2.,  3.,  4.,  5., 99., 49.],
       [ 6.,  7.,  8.,  9., 10., 98., 99.],
       [11., 12., 13., 14., 15., 97., 98.]])

In [114]:
filedata[[0,1,2],[0,1,2]]

array([ 1.,  7., 13.])

In [127]:
filedata[[0,2], 5:7]

array([[99., 49.],
       [97., 98.]])