![``numpy-cheat-sheet_``](numpy-cheat-sheet_.png)

# 惯用导入

In [1]:
import numpy as np

# Create Data

## np.array

In [2]:
np.array([1,2,3,4])

array([1, 2, 3, 4])

In [3]:
np.array([
    [1,2,3,4], 
    [5,6,7,8]
])

array([[1, 2, 3, 4],
       [5, 6, 7, 8]])

## np.arange(start, stop, step)

In [4]:
np.arange(10)

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [5]:
np.arange(5, 10)

array([5, 6, 7, 8, 9])

In [6]:
np.arange(5, 20, 2)

array([ 5,  7,  9, 11, 13, 15, 17, 19])

## linspace(start, stop, nums)
##### > 生成从 start 到 stop，含有 nums 个数的等差数列

In [7]:
np.arange(5, 20, 2)

array([ 5,  7,  9, 11, 13, 15, 17, 19])

In [8]:
np.linspace(0, 10, 3)

array([ 0.,  5., 10.])

## random.[random( ) / randn( )]

##### > 生成一个 [0,1) 之间的随机数组

In [9]:
np.random.random((2, 4))

array([[0.50967423, 0.97343544, 0.51942337, 0.04955891],
       [0.10205621, 0.43965901, 0.34662033, 0.57756733]])

##### > 生成一个服从正态分布的数组

In [10]:
np.random.randn(2, 4)

array([[ 0.39010333, -0.61270249, -0.1918508 , -1.28536533],
       [-0.37837725, -0.38995107,  0.79527795, -0.91831317]])

##### > seed() 用于制定随机数产生时所用算法开始的整数值
##### > 如果使用相同的 seed() 值， 则每次产生的随机数都相同

In [11]:
np.random.seed(1)
print(np.random.random(1))
print(np.random.random(1))

[0.417022]
[0.72032449]


## np.full(shape, fill_value)

In [12]:
np.full((2,3), 0x400)

array([[1024, 1024, 1024],
       [1024, 1024, 1024]])

## Special Matrix
### one / zeros, empty / eye, indetity

In [13]:
np.ones((3, 4), dtype=int)

array([[1, 1, 1, 1],
       [1, 1, 1, 1],
       [1, 1, 1, 1]])

In [14]:
np.zeros((3, 4))

array([[0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.]])

In [15]:
np.empty((3, 4))

array([[0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.]])

In [16]:
np.eye(4,4)

array([[1., 0., 0., 0.],
       [0., 1., 0., 0.],
       [0., 0., 1., 0.],
       [0., 0., 0., 1.]])

In [17]:
np.identity(3)

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

### np.diag( )

##### > diag() : 返回一个矩阵的对角线， 或者是创建一个对角矩阵

In [18]:
np.diag([0,4,8])

array([[0, 0, 0],
       [0, 4, 0],
       [0, 0, 8]])

In [19]:
a = np.arange(9).reshape((3,3))
a

array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

In [20]:
np.diag(a)

array([0, 4, 8])

# Data Property

In [21]:
a = np.array([[1,2,3,4], [5,6,7,8]])
a

array([[1, 2, 3, 4],
       [5, 6, 7, 8]])

## ary.ndim

In [22]:
a.ndim

2

## ary.shape

In [23]:
a.shape

(2, 4)

In [24]:
a.shape[0]

2

In [25]:
a.shape[1]

4

## ary.dtype

In [26]:
a = np.array([2, 23, 3], dtype=np.float64)
a

array([ 2., 23.,  3.])

# reshape( )

In [27]:
a = np.arange(12)
a

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11])

In [28]:
a.reshape((3, 4))

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

## “-1”：模糊数字

##### > 会根据另一个参数的维度、或者数组长度， 计算出未给出的 shape 属性值

In [29]:
a.reshape(1, -1)

array([[ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11]])

In [30]:
a.reshape(-1, 1)

array([[ 0],
       [ 1],
       [ 2],
       [ 3],
       [ 4],
       [ 5],
       [ 6],
       [ 7],
       [ 8],
       [ 9],
       [10],
       [11]])

# np.newaxis

##### > 添加新的 {列 or 行}

In [31]:
t = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]).reshape((2, 5))
t

array([[ 1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10]])

In [32]:
print(t[np.newaxis,:])
print(t[np.newaxis,:].shape)

[[[ 1  2  3  4  5]
  [ 6  7  8  9 10]]]
(1, 2, 5)


In [33]:
print(t[:,np.newaxis])
print(t[:,np.newaxis].shape)

[[[ 1  2  3  4  5]]

 [[ 6  7  8  9 10]]]
(2, 1, 5)


In [34]:
print(t[:,:,np.newaxis])
print(t[:,:,np.newaxis].shape)

[[[ 1]
  [ 2]
  [ 3]
  [ 4]
  [ 5]]

 [[ 6]
  [ 7]
  [ 8]
  [ 9]
  [10]]]
(2, 5, 1)


# np.copy( )

In [35]:
a = np.arange(4)
b = a.copy()

In [36]:
b

array([0, 1, 2, 3])

In [37]:
a[3] = 1314
b

array([0, 1, 2, 3])

# Select Data

## 单个元素提取

In [38]:
a = np.array([[1,2,3,4], [5,6,7,8], [10,11,12,13]])

In [39]:
a[1,2]

7

In [40]:
a[1][2]

7

## 按行选取

##### > 与 list 相同，通过 [ ] 来截取

In [41]:
a = np.array([[1,2,3,4], [5,6,7,8], [10,11,12,13]])
a

array([[ 1,  2,  3,  4],
       [ 5,  6,  7,  8],
       [10, 11, 12, 13]])

In [42]:
a[1:]

array([[ 5,  6,  7,  8],
       [10, 11, 12, 13]])

In [43]:
a[1, :]

array([5, 6, 7, 8])

In [44]:
a[1, 1:3]

array([6, 7])

## 按条件截取

In [45]:
a = np.array([[1,2,3,4], [5,6,7,8], [10,11,12,13]])
a

array([[ 1,  2,  3,  4],
       [ 5,  6,  7,  8],
       [10, 11, 12, 13]])

In [46]:
b = a[a>6]
b

array([ 7,  8, 10, 11, 12, 13])

In [47]:
a[a>6] = 0
a

array([[1, 2, 3, 4],
       [5, 6, 0, 0],
       [0, 0, 0, 0]])

# Math About

## 四则运算

In [48]:
a = np.array([10, 20, 30, 40])
b = np.array([1, 2, 3, 4])
print(a, b, sep='\n')

[10 20 30 40]
[1 2 3 4]


In [49]:
print('a + b\t', a + b)
print('a - b\t', a - b)
print('a * b\t', a * b)
print('a / b\t', a / b)
print('a ** 2\t', a ** 2)
print('10 * np.sin(a)\t', 10 * np.sin(a))

a + b	 [11 22 33 44]
a - b	 [ 9 18 27 36]
a * b	 [ 10  40  90 160]
a / b	 [10. 10. 10. 10.]
a ** 2	 [ 100  400  900 1600]
10 * np.sin(a)	 [-5.44021111  9.12945251 -9.88031624  7.4511316 ]


## 比较运算

In [50]:
print('b\t', b)
print('b < 3\t', b < 3)

b	 [1 2 3 4]
b < 3	 [ True  True False False]


## Linear Algebra

### T

In [51]:
a = np.arange(12).reshape((3,4))
a

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [52]:
a.T

array([[ 0,  4,  8],
       [ 1,  5,  9],
       [ 2,  6, 10],
       [ 3,  7, 11]])

### reshape() & -1
##### > 通过运行 reshape 构建行列向量

In [53]:
a = np.arange(5)
a

array([0, 1, 2, 3, 4])

In [54]:
T = a.reshape(1, -1)
print('T\t', T)
print('T.ndim\t', T.ndim)
print('T.shape\t', T.shape)

T	 [[0 1 2 3 4]]
T.ndim	 2
T.shape	 (1, 5)


In [55]:
T = a.reshape(-1, 1)
print('T.ndim\t', T.ndim)
print('T.shape\t', T.shape)

T.ndim	 2
T.shape	 (5, 1)


### dot() 点乘

In [56]:
a = np.arange(6).reshape((2,3))
b = np.arange(12).reshape((3,-1))

In [57]:
a.dot(b)

array([[20, 23, 26, 29],
       [56, 68, 80, 92]])

In [58]:
np.dot(a, b)

array([[20, 23, 26, 29],
       [56, 68, 80, 92]])

In [59]:
a @ b

array([[20, 23, 26, 29],
       [56, 68, 80, 92]])

### np.linalg.pinv( )

##### > A - n*n，需要先导入 numpy.linalg

In [60]:
a = np.array([[1,2,3], [4,5,6], [7,8,9]])
np.linalg.pinv(a)

array([[-6.38888889e-01, -1.66666667e-01,  3.05555556e-01],
       [-5.55555556e-02,  1.38777878e-16,  5.55555556e-02],
       [ 5.27777778e-01,  1.66666667e-01, -1.94444444e-01]])

In [61]:
a = np.eye(3)
np.linalg.pinv(a)

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

## 统计信息

In [62]:
np.random.seed(0)
a = np.random.random((2,4))
a

array([[0.5488135 , 0.71518937, 0.60276338, 0.54488318],
       [0.4236548 , 0.64589411, 0.43758721, 0.891773  ]])

### 最值：max / min

In [63]:
a = np.array([[1,2,3], [4,5,6]])
print('max :', a.max())
print('min :', a.min())

max : 6
min : 1


##### > axis=0 提取每列方向最值
##### > axis=1 提取每行方向最值

In [64]:
a = np.array([[1,2,3], [4,5,6]])
print('a : \n', a)

a : 
 [[1 2 3]
 [4 5 6]]


In [65]:
print('a.max(axis=1) - row :', a.max(axis=1))
print('a.max(axis=0) - col :', a.max(axis=0))

a.max(axis=1) - row : [3 6]
a.max(axis=0) - col : [4 5 6]


### sum : 求和

In [66]:
a = np.array([[1,2,3], [4,5,6]])
print('sum :', a.sum())
print('sum(axis=1) - row :', a.sum(axis=1))
print('sum(axis=0) - col :', a.sum(axis=0))

sum : 21
sum(axis=1) - row : [ 6 15]
sum(axis=0) - col : [5 7 9]


### mean : 均值

In [67]:
a = np.array([[1,2,3], [4,5,6]])
print('np.mean : ', np.mean(a))
print('np.average :', np.average(a))
print('ary.mean :', a.mean())

np.mean :  3.5
np.average : 3.5
ary.mean : 3.5


In [68]:
print('mean(a, axis=1) - row :', np.mean(a, axis=1))  # rows
print('mean(a, axis=0) - col :', np.mean(a, axis=0))  # columns

mean(a, axis=1) - row : [2. 5.]
mean(a, axis=0) - col : [2.5 3.5 4.5]


### var : 方差

In [69]:
a = np.array([[1,2,3], [4,5,6]])
print('a.var() : ', a.var())
print('a.var(axis=1) - row : ', a.var(axis=1))
print('a.var(axis=0) - col : ', a.var(axis=0))

a.var() :  2.9166666666666665
a.var(axis=1) - row :  [0.66666667 0.66666667]
a.var(axis=0) - col :  [2.25 2.25 2.25]


### std : 标准差

In [70]:
a = np.array([[1,2,3], [4,5,6]])
print('a.std() : ', a.std())
print('a.std(axis=1) - row : ', a.std(axis=1))
print('a.std(axis=0) - col : ', a.std(axis=0))

a.std() :  1.707825127659933
a.std(axis=1) - row :  [0.81649658 0.81649658]
a.std(axis=0) - col :  [1.5 1.5 1.5]


### median : 中值

In [71]:
a = np.array([[1,2,3], [4,5,6]])
print('np.median(a) : ', np.median(a))
print('np.median(a, axis=1) - row : ', np.median(a, axis=1))
print('np.median(a, axis=0) - col : ', np.median(a, axis=0))

np.median(a) :  3.5
np.median(a, axis=1) - row :  [2. 5.]
np.median(a, axis=0) - col :  [2.5 3.5 4.5]


### cumsum : 累积和

In [72]:
a = np.array([[1,2,3], [4,5,6]])
print('a.cumsum() : ', a.cumsum())
print('a.cumsum(axis=1) - row : \n', a.cumsum(axis=1), sep='')
print('a.cumsum(axis=0) - col : \n', a.cumsum(axis=0), sep='')

a.cumsum() :  [ 1  3  6 10 15 21]
a.cumsum(axis=1) - row : 
[[ 1  3  6]
 [ 4  9 15]]
a.cumsum(axis=0) - col : 
[[1 2 3]
 [5 7 9]]


### 最值索引：argmin( ) / argmax( )

In [73]:
np.random.seed(0)
a = np.random.random((2,4))
a

array([[0.5488135 , 0.71518937, 0.60276338, 0.54488318],
       [0.4236548 , 0.64589411, 0.43758721, 0.891773  ]])

In [74]:
print('np.argmax() : ', np.argmax(a))
print('np.argmax(a, axis=1) : ', np.argmax(a, axis=1))
print('np.argmax(a, axis=0) : ', np.argmax(a, axis=0))

np.argmax() :  7
np.argmax(a, axis=1) :  [1 3]
np.argmax(a, axis=0) :  [0 0 0 1]


### argsort( )

In [75]:
#返回的是数组从小大大的索引值

In [76]:
a = np.random.random((2,4))
a

array([[0.96366276, 0.38344152, 0.79172504, 0.52889492],
       [0.56804456, 0.92559664, 0.07103606, 0.0871293 ]])

In [77]:
print('np.argsort(a) : \n', np.argsort(a))

np.argsort(a) : 
 [[1 3 2 0]
 [2 3 0 1]]


In [78]:
print('np.argsort(a, axis = 0) : \n', np.argsort(a, axis = 0))

np.argsort(a, axis = 0) : 
 [[1 0 1 1]
 [0 1 0 0]]


In [79]:
print('np.argsort(a, axis = 1) : \n', np.argsort(a, axis = 1))

np.argsort(a, axis = 1) : 
 [[1 3 2 0]
 [2 3 0 1]]


In [80]:
a[0,np.argsort(a)[0]]

array([0.38344152, 0.52889492, 0.79172504, 0.96366276])

# 矩阵合并

In [81]:
A = np.array([1, 1, 1])
B = np.array([2, 2, 2])

##### > vertical stack

In [82]:
np.vstack((A, B))

array([[1, 1, 1],
       [2, 2, 2]])

##### > horizontal stack

In [83]:
np.hstack((A, B))

array([1, 1, 1, 2, 2, 2])

##### > 不能把一个行向量通过转置变成矩阵

In [84]:
A = np.array([1, 1, 1])
B = np.array([2, 2, 2])
print(A.T, A.T.shape)
print(B.T, B.T.shape)

[1 1 1] (3,)
[2 2 2] (3,)


# 矩阵分割

In [85]:
A = np.arange(12).reshape((3,4))
A

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [86]:
np.split(A, 2, axis=1)

[array([[0, 1],
        [4, 5],
        [8, 9]]), array([[ 2,  3],
        [ 6,  7],
        [10, 11]])]

In [87]:
np.split(A, 3)

[array([[0, 1, 2, 3]]), array([[4, 5, 6, 7]]), array([[ 8,  9, 10, 11]])]

In [88]:
np.vsplit(A, 3)

[array([[0, 1, 2, 3]]), array([[4, 5, 6, 7]]), array([[ 8,  9, 10, 11]])]

In [89]:
np.hsplit(A, 4)

[array([[0],
        [4],
        [8]]), array([[1],
        [5],
        [9]]), array([[ 2],
        [ 6],
        [10]]), array([[ 3],
        [ 7],
        [11]])]

# 矩阵信息获取

In [90]:
data = np.loadtxt('data.csv', delimiter=',')
data

array([[ 1. ,  3. ],
       [ 1.2,  3. ],
       [ 1.2,  4. ],
       [ 1.5,  4.5],
       [ 1.6,  4.3],
       [ 6.5, 12. ],
       [ 3.6,  7.1],
       [ 2.5,  9. ],
       [ 5.7, 14. ],
       [ 6. , 11. ],
       [ 9. , 17. ],
       [ 8.9, 17. ],
       [ 7.1, 15. ],
       [ 7. , 14. ],
       [ 2.5,  4. ],
       [ 0.8,  2. ],
       [ 0.5,  2. ],
       [ 3.4,  7. ],
       [ 3.6,  9. ],
       [ 5.6, 12. ],
       [ 6.7, 15. ],
       [ 6.9, 15. ],
       [ 7.1, 14. ],
       [ 7.5, 17. ],
       [ 7.8, 16. ],
       [ 8.1, 15. ],
       [ 8.3, 15. ],
       [ 8.5, 15. ],
       [ 8.7, 16. ],
       [ 8.7, 17. ],
       [ 8.8, 18. ],
       [ 8.8, 20. ],
       [ 8. , 16. ],
       [ 9. , 19. ],
       [ 9.2, 18. ],
       [10.1, 20. ],
       [ 1.1,  3.2],
       [ 1.6,  4.2],
       [ 4. ,  9. ],
       [12. , 25. ],
       [ 9.5, 20. ]])

# 快速构建参数矩阵

In [91]:
data = np.loadtxt('data.csv', delimiter=',')

# Vector Feature
feature_0 = np.ones(len(data))
feature_1 = data[:,0].reshape((1,len(data)))
X = np.vstack((feature_0, feature_1))

In [92]:
X

array([[ 1. ,  1. ,  1. ,  1. ,  1. ,  1. ,  1. ,  1. ,  1. ,  1. ,  1. ,
         1. ,  1. ,  1. ,  1. ,  1. ,  1. ,  1. ,  1. ,  1. ,  1. ,  1. ,
         1. ,  1. ,  1. ,  1. ,  1. ,  1. ,  1. ,  1. ,  1. ,  1. ,  1. ,
         1. ,  1. ,  1. ,  1. ,  1. ,  1. ,  1. ,  1. ],
       [ 1. ,  1.2,  1.2,  1.5,  1.6,  6.5,  3.6,  2.5,  5.7,  6. ,  9. ,
         8.9,  7.1,  7. ,  2.5,  0.8,  0.5,  3.4,  3.6,  5.6,  6.7,  6.9,
         7.1,  7.5,  7.8,  8.1,  8.3,  8.5,  8.7,  8.7,  8.8,  8.8,  8. ,
         9. ,  9.2, 10.1,  1.1,  1.6,  4. , 12. ,  9.5]])

###### 数组降维 - np.ravel() & np.flatten()

+ **`将多维数组降为一维 - 数组平摊`**
+ `np.flatten()` : 返回一份**拷贝**，对拷贝的修改不会影响原始矩阵
+ `np.ravel()`   : 返回**`视图/view`**，对其修改则会影响`原始`矩阵

In [93]:
arr = np.arange(0, 12).reshape((3,4))

In [94]:
arr.ravel()

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11])

In [95]:
arr.flatten()[1] = -12345
arr

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [96]:
arr.ravel()[1] = -12345
arr

array([[     0, -12345,      2,      3],
       [     4,      5,      6,      7],
       [     8,      9,     10,     11]])

###### 打乱数据 -  np.random.shuffle()

+ **`修改原始数组`**

In [97]:
row = np.arange(10)
np.random.shuffle(row)
row

array([4, 1, 6, 7, 2, 8, 5, 9, 0, 3])

+ Multi-dimensional arrays are only shuffled along the first axis

In [98]:
arr = np.arange(12).reshape((3,4))
np.random.shuffle(arr)
arr

array([[ 0,  1,  2,  3],
       [ 8,  9, 10, 11],
       [ 4,  5,  6,  7]])

###### np.random.permutation()

+ randomly permute a sequence, or return a permuted range

In [99]:
np.random.permutation(10)

array([2, 4, 8, 9, 5, 6, 1, 0, 7, 3])

In [100]:
row = np.arange(12)
np.random.permutation(row)

array([ 1,  9,  0,  5, 11,  2,  8,  6,  3,  7,  4, 10])

In [101]:
np.random.permutation(row).reshape((-1, 4))

array([[ 3,  7,  2,  9],
       [ 8,  1, 11,  0],
       [ 6, 10,  5,  4]])

###### 重复数组 - np.tile(A, reps)

+ construct an array by repeating A the number of times given by reps

In [102]:
row = np.arange(2)

In [103]:
np.tile(row, 2)

array([0, 1, 0, 1])

In [104]:
np.tile(row, (2,2))

array([[0, 1, 0, 1],
       [0, 1, 0, 1]])

In [105]:
np.tile(row, (4,1))

array([[0, 1],
       [0, 1],
       [0, 1],
       [0, 1]])

###### np.random.__generateData()

+ rand(d1, d2, ..., dn):a uniform distribution over [0,1) / [0,1) 均匀分布
+ randn(d1, d2, ..., dn):a univariate "normal" (Gaussian) distribution of mean 0 and variance 1 / 标准正态分布
+ random(size):return random floats in the half-open interval [0.0, 1.0)
+ randint(low, high=None, size=None):[low,high) / 半开，随机整数

In [106]:
np.random.rand(1, 2)

array([[0.61306346, 0.90234858]])

In [107]:
np.random.randn(2, 4)

array([[-1.02617878,  0.47752547,  1.29269823, -0.73145824],
       [-1.60540226,  0.98947618,  0.11081461, -0.38093141]])

In [108]:
np.random.random((3,4))

array([[0.95894927, 0.65279032, 0.63505887, 0.99529957],
       [0.58185033, 0.41436859, 0.4746975 , 0.6235101 ],
       [0.33800761, 0.67475232, 0.31720174, 0.77834548]])

In [109]:
np.random.randint(0, 10, (3,4))

array([[3, 0, 5, 0],
       [1, 2, 4, 2],
       [0, 3, 2, 0]])

In [110]:
np.random.randint(10, size=(3,4))

array([[7, 5, 9, 0],
       [2, 7, 2, 9],
       [2, 3, 3, 2]])

###### np.random.__distribution()

**`np.random.normal(loc, scale, size)`**:random samples from a normal (Gaussian) distribution
+ loc : Mean of the distribution
+ scale : standard deviation of the distribution

In [111]:
mean = 0.0
std = 1.0

np.random.normal(mean, std, size=(3,4))

array([[ 1.07612104,  0.19214083,  0.85292596,  0.01835718],
       [ 0.42830357,  0.99627783, -0.49114966,  0.71267817],
       [ 1.11334035, -2.15367459, -0.41611148, -1.07089699]])

**`np.random.multivariate_normal(mean, cov, size)`**:random samples from a multivariate normal distribution

In [112]:
mean = [0, 0]
cov = [[1,0], [0,100]]

np.random.multivariate_normal(mean, cov, (3, 4))

array([[[ -1.12305712,   2.21138805],
        [  1.01207905, -10.50757957],
        [ -0.40211489,  15.4371643 ],
        [ -0.22686923,   8.64749101]],

       [[  0.28779461,   8.1116027 ],
        [  1.9084621 ,  -5.3846816 ],
        [  1.44073112,  -4.75776001],
        [  0.80571455, -11.49075681]],

       [[  0.97297991,  17.5610703 ],
        [  0.19114348, -15.51119112],
        [ -1.42002593,  16.04776071],
        [ -2.05633855, -24.43782684]]])

[``numpy Save & Load``](https://www.cnblogs.com/wushaogui/p/9142019.html)

**`np.loadtxt`**(fname, delimiter=None, skiprows=0, usecols=None， unpack=False)
+ fname : file, str, or paathlib.path
+ delimiter : str, optional
    + 分隔符、默认为 '空格'
+ skiprows : int, optional
    + skip the first skipwors lines; default 0
+ usecols : int or sequence, optional
    + 要读取哪些列，0 为第一列；默认读取所有列
    + usecols=(1,4,5), 将读取 "第 2、5、6 列"
+ unpack : bool, optional
    + 如果为 True, 返回的数组将被 '转置'

In [113]:
datafile = 'dataset/ex1data1.txt'

In [114]:
data = np.loadtxt(datafile, delimiter=',', usecols=(0,1), unpack=True)
data.shape

(2, 97)

In [115]:
data = np.loadtxt(datafile, delimiter=',')
data.shape

(97, 2)

**`np.savetxt`**(fname, fmt='%.18e', delimiter=' ', header='')

In [116]:
np.random.seed(1004)
arr = np.random.randn(3,4)
arr

array([[ 0.59440307,  0.40260871, -0.80516223,  0.1151257 ],
       [-0.75306522, -0.7841178 ,  1.46157577,  1.57607553],
       [-0.17131776, -0.91448182,  0.86013945,  0.35880192]])

In [117]:
np.savetxt('dataset/test0.txt', arr)
np.savetxt('dataset/test1.txt', arr, delimiter=',', header="Test Save")

In [118]:
np.loadtxt('dataset/test1.txt', delimiter=',')

array([[ 0.59440307,  0.40260871, -0.80516223,  0.1151257 ],
       [-0.75306522, -0.7841178 ,  1.46157577,  1.57607553],
       [-0.17131776, -0.91448182,  0.86013945,  0.35880192]])

**`np.unique(ary)`**
+ find the unique elements of an array
+ return_index=True : 返回原位置
+ return_inverse=True : 返回旧元素在新列表的位置
+ return_counts=True : 返回每个元素在 ary 中出现的次数

In [119]:
ary = np.random.randint(10, size=(4,6))
ary

array([[6, 7, 5, 3, 5, 8],
       [6, 0, 2, 1, 4, 0],
       [9, 2, 7, 1, 9, 9],
       [3, 2, 9, 2, 3, 5]])

In [120]:
np.unique(ary)

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [121]:
arr, old, new, cou = np.unique(ary, return_index=True, return_inverse=True, return_counts=True)
print(arr)
print(old)
print(new)
print(cou)

[0 1 2 3 4 5 6 7 8 9]
[ 7  9  8  3 10  2  0  1  5 12]
[6 7 5 3 5 8 6 0 2 1 4 0 9 2 7 1 9 9 3 2 9 2 3 5]
[2 2 4 3 1 3 2 2 1 4]


**`np.nan & np.inf`**

+ 当运算的数据中出现了 `np.nan & np.inf` 时，则会导致程序无法运行，原因在除的过程中分母出现 0 的缘故
+ np.nan : Not A Number

In [122]:
a = np.array([[np.nan, np.inf], [-np.nan, -np.inf]])
a

array([[ nan,  inf],
       [ nan, -inf]])

**`处理 / 判断`**
+ np.isnan(X) : Test element-wise for NaN and return result as a boolean array
+ np.isinf(X) : Test element-wise for positive or negative infinity, return a boolean array of the same shape as x, True where `x == +/-inf`, otherwise False
+ np.nan_to_num() : Replace NaN with zero and infinity with large finite numbers
+ np.isneginf() / np.isposinf()

In [123]:
np.nan_to_num(a)

array([[ 0.00000000e+000,  1.79769313e+308],
       [ 0.00000000e+000, -1.79769313e+308]])

In [124]:
np.isinf(a)

array([[False,  True],
       [False,  True]])

In [125]:
np.isnan(a)

array([[ True, False],
       [ True, False]])

**`RuntimWarning : divide by zero encountered in log`**
+ **`np.count_nonzero(a, axis=None)`** : counts the number of non-zero values in the array a
+ **`np.nonzero(a)`** : return the indices of the elements that ara non-zero

**`Application`**
+ cost function : hyp = 0

In [126]:
def replace_zeros(data):
    if np.count_nonzero(data):
        min_nonzero = np.min(data[np.nonzero(data)])
    else:
        min_nonzero = 0.000000000001
    data[data == 0] = min_nonzero
    
    return data

In [127]:
ary = np.array([[0, 0.01, 1], [0, 0.001, 0]])
replace_zeros(ary)

array([[0.001, 0.01 , 1.   ],
       [0.001, 0.001, 0.001]])

In [128]:
ary = np.zeros_like(ary)
replace_zeros(ary)

array([[1.e-12, 1.e-12, 1.e-12],
       [1.e-12, 1.e-12, 1.e-12]])

**`np.where(condition)`**
+ `Application` : Logistic Regression split sample to `pos & neg`

In [129]:
ary = np.array([[1, 0, 1, 0, 1, 1, 0, 1]])
pos_ind = np.where(ary == 1)
neg_ind = np.where(ary == 0)
print('Positive Index :', pos_ind)
print('Positive :', ary[pos_ind])

Positive Index : (array([0, 0, 0, 0, 0], dtype=int64), array([0, 2, 4, 5, 7], dtype=int64))
Positive : [1 1 1 1 1]


**`np.argwhere(a / condition)`**
+ find the indices of array element that are non-zero, grouped by element

In [130]:
ary = np.array([[1, 0, 1, 0, 1, 1, 0, 1, 3]])

In [131]:
ind = np.argwhere(ary[0])
ary[0, ind]

array([[1],
       [1],
       [1],
       [1],
       [1],
       [3]])

In [132]:
np.argwhere(ary > 1)

array([[0, 8]], dtype=int64)

**`合并`**
+ np.concatenate()
+ np.vstack() / np.r_[]
+ np.hstack() / np.c_[]

In [133]:
a = np.arange(0,4).reshape(1,-1)
b = np.arange(5,9).reshape(1,-1)

+ **`axis`**
    + 一般默认 : axis=0
    + axis=0 : 第一个维度，row
    + axis=1 : 第儿个维度，col

**`np.concatenate()`**
+ axis=0 : 沿着`第一个维度`连接，即：沿着行连接，增加行维度
+ axis=1 : 沿着`第二个维度`连接，即：沿着列连接，增加列维度

In [134]:
np.concatenate([a, b])

array([[0, 1, 2, 3],
       [5, 6, 7, 8]])

In [135]:
np.concatenate([a, b], axis=1)

array([[0, 1, 2, 3, 5, 6, 7, 8]])

In [136]:
np.hstack([a,b])

array([[0, 1, 2, 3, 5, 6, 7, 8]])

In [137]:
np.c_[a,b]

array([[0, 1, 2, 3, 5, 6, 7, 8]])

In [138]:
np.vstack([a,b])

array([[0, 1, 2, 3],
       [5, 6, 7, 8]])

In [139]:
np.r_[a,b]

array([[0, 1, 2, 3],
       [5, 6, 7, 8]])

**`Add one column to matrix`**
+ np.insert(arr, obj, values, axis=None)
+ np.c_[] / np.r_[]
+ np.hstarck() / np.vstack()

In [140]:
ori = np.arange(12.).reshape(3,4)
ones = np.ones((ori.shape[0],1))
ori

array([[ 0.,  1.,  2.,  3.],
       [ 4.,  5.,  6.,  7.],
       [ 8.,  9., 10., 11.]])

**`np.insert`**(arr, obj, values, axis=None)
+ obj : int, slice or sequence of ints
    + Object tha defines the index or indeices before which values is inserted

In [141]:
np.insert(ori, 0, 1., axis=1)

array([[ 1.,  0.,  1.,  2.,  3.],
       [ 1.,  4.,  5.,  6.,  7.],
       [ 1.,  8.,  9., 10., 11.]])

In [142]:
np.c_[ones, ori]

array([[ 1.,  0.,  1.,  2.,  3.],
       [ 1.,  4.,  5.,  6.,  7.],
       [ 1.,  8.,  9., 10., 11.]])

In [143]:
np.hstack((ones, ori))

array([[ 1.,  0.,  1.,  2.,  3.],
       [ 1.,  4.,  5.,  6.,  7.],
       [ 1.,  8.,  9., 10., 11.]])

**`np.delete`**(arr, obj, axis=None)
    + Returen a new array with sub-arrays alone an axis deleted
    + obj : slice, int or arrays of ints
        + indicate which sub-arrays to remove

In [144]:
ori = np.arange(12).reshape(3,4)
np.delete(ori, 0, axis=1)

array([[ 1,  2,  3],
       [ 5,  6,  7],
       [ 9, 10, 11]])

**`np.append`**(arr, values, axis=None)
    + append values to the end of an array

In [145]:
ori = np.arange(12).reshape(3,4)
ones = np.ones((ori.shape[0],1))
np.append(ori, ones, axis=1)

array([[ 0.,  1.,  2.,  3.,  1.],
       [ 4.,  5.,  6.,  7.,  1.],
       [ 8.,  9., 10., 11.,  1.]])

**`np.meshgrid`**(*xi, **kwargs)
    + Return coordinate matrices from coordinate vectors
    + x1, x2, ..., xn : array_like
        + 1-D arrays representing the coordinates of a grid

+ **`Application`**
    + hyperparam choose
    + draw counter

![``numpy-cheat-sheet_``](meshgrid.jpeg)

In [146]:
x = np.arange(-3, 3, 1)
y = np.arange(-5, 5, 1)
xx, yy = np.meshgrid(x, y)

In [147]:
xx

array([[-3, -2, -1,  0,  1,  2],
       [-3, -2, -1,  0,  1,  2],
       [-3, -2, -1,  0,  1,  2],
       [-3, -2, -1,  0,  1,  2],
       [-3, -2, -1,  0,  1,  2],
       [-3, -2, -1,  0,  1,  2],
       [-3, -2, -1,  0,  1,  2],
       [-3, -2, -1,  0,  1,  2],
       [-3, -2, -1,  0,  1,  2],
       [-3, -2, -1,  0,  1,  2]])

In [148]:
yy

array([[-5, -5, -5, -5, -5, -5],
       [-4, -4, -4, -4, -4, -4],
       [-3, -3, -3, -3, -3, -3],
       [-2, -2, -2, -2, -2, -2],
       [-1, -1, -1, -1, -1, -1],
       [ 0,  0,  0,  0,  0,  0],
       [ 1,  1,  1,  1,  1,  1],
       [ 2,  2,  2,  2,  2,  2],
       [ 3,  3,  3,  3,  3,  3],
       [ 4,  4,  4,  4,  4,  4]])

In [149]:
import matplotlib.pyplot as plt
plt.figure()
plt.scatter(xx, yy)

<matplotlib.collections.PathCollection at 0x198ea6a87f0>

**``**

**``**

**``**

**``**

**``**

**``**

**``**

**``**

**``**

**``**

**``**

**``**