![``numpy-cheat-sheet_``](numpy-cheat-sheet_.png)

In [1]:
import numpy as np

# Create Arrays

+ **`np.array()`**
+ **`np.arrange() / np.linspace()`**
+ **`np.zeros / np.ones() / np.empty()`**
+ **`np.zeros_like() / np.ones_like()`**
+ **`np.eye() / np.identity() / np.diag()`**
+ **`np.full() / np.tile()`**
+ **`np.random.__genreateDate()`**
    + **`np.random.RandomState()`**
    + **`np.random.rand()`**
    + **`np.random.randn()`**
    + **`np.random.random()`**
    + **`np.random.randint()`**

**`np.array()`**

In [2]:
np.array([1,2,3,4])

array([1, 2, 3, 4])

In [3]:
np.array([
    [1,2,3,4], 
    [5,6,7,8]
])

array([[1, 2, 3, 4],
       [5, 6, 7, 8]])

**`np.arange`**(strat, stop, step)
+ [start, stop)
+ python range 不可以传入'浮点数'
    + [Wrong] : range(0, 1, 0.2)

In [4]:
np.arange(0, 2, 0.3)

array([0. , 0.3, 0.6, 0.9, 1.2, 1.5, 1.8])

In [5]:
np.arange(0, 10)

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [6]:
np.arange(10)

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [7]:
np.arange(10, step=1)

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

**`np.linspace`**(start, stop, nums)
    + [start, stop] 等长取 num 个点

In [8]:
np.linspace(0, 8, 5)

array([0., 2., 4., 6., 8.])

**`np.zeros`**(shape)

In [9]:
np.zeros((3,4))

array([[0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.]])

**`np.ones`**(shape)

In [10]:
np.ones((3,4))

array([[1., 1., 1., 1.],
       [1., 1., 1., 1.],
       [1., 1., 1., 1.]])

**`np.empty`**(shape)
    + Return a new array of given shape and type, without initializing entries

In [11]:
np.empty((3,4))

array([[1., 1., 1., 1.],
       [1., 1., 1., 1.],
       [1., 1., 1., 1.]])

**`np.zeros_like`**(arr)

In [12]:
arr = np.arange(12).reshape(3,4)
np.zeros_like(arr)

array([[0, 0, 0, 0],
       [0, 0, 0, 0],
       [0, 0, 0, 0]])

**`np.ones_like`**(arr)

In [13]:
arr = np.arange(12).reshape(3,4)
np.ones_like(arr)

array([[1, 1, 1, 1],
       [1, 1, 1, 1],
       [1, 1, 1, 1]])

**`np.eye`**(N, M=None)
+ Return a 2-D array with ones on the diagonal and zeros elsewhere
+ N : int, Number of rows in the output
+ M : int, Number of columns in the output; default equal to 'N'

In [14]:
np.eye(4,4)

array([[1., 0., 0., 0.],
       [0., 1., 0., 0.],
       [0., 0., 1., 0.],
       [0., 0., 0., 1.]])

**`np.identity`**(n)
+ Return the identity array.
+ n : int; number of rows (and columns) in "n * n" output

In [15]:
np.identity(3)

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

**`np.diag`**(v)
+ 返回一个矩阵的对角线，或者是创建一个对角矩阵

In [16]:
np.diag([0,4,8])

array([[0, 0, 0],
       [0, 4, 0],
       [0, 0, 8]])

In [17]:
arr = np.arange(9).reshape(3,3)
np.diag(arr)

array([0, 4, 8])

**`np.full`**(shape, fill_value)
+ Return a new array of given shape and type, filled with `fill_value`

In [18]:
np.full((2,3), 0x400)

array([[1024, 1024, 1024],
       [1024, 1024, 1024]])

In [19]:
np.full((3,5), fill_value=1)

array([[1, 1, 1, 1, 1],
       [1, 1, 1, 1, 1],
       [1, 1, 1, 1, 1]])

**`np.tile`**()

In [20]:
arr = np.arange(2)

In [21]:
np.tile(arr, 2)

array([0, 1, 0, 1])

In [22]:
np.tile(arr, (2,2))

array([[0, 1, 0, 1],
       [0, 1, 0, 1]])

In [23]:
np.tile(arr, (4,1))

array([[0, 1],
       [0, 1],
       [0, 1],
       [0, 1]])

**`np.random.__generateData`**()
+ `rand`(d1, d2, ..., dn) : a uniform distribution over [0,1) / [0,1) 均匀分布
+ `randn`(d1, d2, ..., dn) : a univariate "normal" (Gaussian) distribution of mean 0 and variance 1 / 标准正态分布
+ `random`(size) : return random floats in the half-open interval [0.0, 1.0)
+ `randint`(low, high=None, size=None) : [low,high) / 半开，随机整数

+ **`np.random.RandomState(seed=None)`** : 为随机数产生器固定生成种子
    +  随机数种子seed只有一次有效，在下一次调用产生随机数函数前没有设置seed，则还是产生随机数

In [24]:
Rand = np.random.RandomState(seed=1004)
print(Rand.rand(1,2))

Rand = np.random.RandomState(seed=1004)
print(Rand.rand(1,2))

Rand = np.random.RandomState(seed=1004)
print(Rand.rand(1,2))

[[0.03895164 0.85649586]]
[[0.03895164 0.85649586]]
[[0.03895164 0.85649586]]


+ **`np.random.seed()`** : 固定产生伪随机数的种子

In [25]:
np.random.rand(1, 2)

array([[6.11163949e-01, 1.33155875e-04]])

In [26]:
np.random.randn(2, 4)

array([[ 1.49576599,  0.511621  ,  0.62134189,  0.58343225],
       [ 1.19483349, -1.06691569, -0.06300797, -0.41381171]])

In [27]:
np.random.random((3,4))

array([[0.38102046, 0.44050048, 0.14543644, 0.54998303],
       [0.70598476, 0.11780589, 0.76796544, 0.74921105],
       [0.11214836, 0.91685614, 0.9743161 , 0.49601294]])

In [28]:
np.random.randint(0, 10, (3,4))

array([[5, 6, 7, 8],
       [5, 9, 8, 2],
       [9, 1, 0, 7]])

In [29]:
np.random.randint(10, size=(3,4))

array([[6, 8, 8, 1],
       [5, 2, 7, 1],
       [1, 6, 3, 7]])

# Inspecting Property

+ **`arr.size`** : return number of elements in arr
+ **`arr.shape / arr.ndim`**
+ **`arr.dtype`**
+ **`arr.astype(dtype)`** : Convert arr elements to type `dtype`
+ **`arr.tolist()`** : Convert arr to a Python list

In [30]:
arr = np.arange(12).reshape(3,4)

In [31]:
arr.size

12

In [32]:
arr.shape

(3, 4)

In [33]:
arr.ndim

2

In [34]:
arr.dtype

dtype('int64')

In [35]:
arr.astype(np.float)

array([[ 0.,  1.,  2.,  3.],
       [ 4.,  5.,  6.,  7.],
       [ 8.,  9., 10., 11.]])

In [36]:
arr.tolist()

[[0, 1, 2, 3], [4, 5, 6, 7], [8, 9, 10, 11]]

# Copy / Sort / Reshape

**`np.copy(arr)`** : Return a copy of 'arr'

In [37]:
arr = np.arange(4)
new = np.copy(arr)
new[3] = -1
print('arr :', arr)
print('new :', new)

arr : [0 1 2 3]
new : [ 0  1  2 -1]


**`np.sort`**(a, axis=-1)
+ return a sorted copy of an array

In [38]:
np.random.seed(1004)
arr = np.random.randint(0, 10, 5)
np.sort(arr)

array([2, 3, 4, 5, 7])

In [39]:
np.random.seed(1004)
arr = np.random.randint(0, 10, 12).reshape(3,4)
arr

array([[2, 5, 3, 7],
       [4, 1, 2, 5],
       [5, 8, 8, 4]])

In [40]:
np.sort(arr, axis=0)

array([[2, 1, 2, 4],
       [4, 5, 3, 5],
       [5, 8, 8, 7]])

In [41]:
np.sort(arr, axis=1)

array([[2, 3, 5, 7],
       [1, 2, 4, 5],
       [4, 5, 8, 8]])

**`np.argsort`**(a, axis=-1)
+ Returns the indices that would sort an array
+ axis
    + Axis along which to sort
    + Default is -1 (the last axis)
    + If None, the flattened array is used

In [42]:
np.random.seed(1004)
arr = np.random.randint(0, 10, 12).reshape(3,4)
print(">> arr:\n", arr)

>> arr:
 [[2 5 3 7]
 [4 1 2 5]
 [5 8 8 4]]


In [43]:
np.argsort(arr)

array([[0, 2, 1, 3],
       [1, 2, 0, 3],
       [3, 0, 1, 2]])

In [44]:
np.argsort(arr, axis=0)

array([[0, 1, 1, 2],
       [1, 0, 0, 1],
       [2, 2, 2, 0]])

**`arr.reshape`**(shape)
+ `-1` : Auto calc size

In [45]:
np.arange(12).reshape(-1, 4)

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

# Add / Remove Element

**`np.append`**(arr, values, axis=None)
+ append values to the end of an array

In [46]:
ori = np.arange(12).reshape(3,4)
ones = np.ones((ori.shape[0],1))
np.append(ori, ones, axis=1)

array([[ 0.,  1.,  2.,  3.,  1.],
       [ 4.,  5.,  6.,  7.,  1.],
       [ 8.,  9., 10., 11.,  1.]])

**`np.insert`**(arr, obj, values, axis=None)
+ obj : int, slice or sequence of ints
    + Object tha defines the index or indeices before which values is inserted

In [47]:
np.insert(ori, 0, 1., axis=1)

array([[ 1,  0,  1,  2,  3],
       [ 1,  4,  5,  6,  7],
       [ 1,  8,  9, 10, 11]])

**`np.delete`**(arr, obj, axis=None)
    + Returen a new array with sub-arrays alone an axis deleted
    + obj : slice, int or arrays of ints
        + indicate which sub-arrays to remove

In [48]:
ori = np.arange(12).reshape(3,4)
np.delete(ori, 0, axis=1)

array([[ 1,  2,  3],
       [ 5,  6,  7],
       [ 9, 10, 11]])

# Assemble / Split Data

+ **`axis`**
    + 一般默认 : axis=0
    + axis=0 : 第一个维度，row
    + axis=1 : 第儿个维度，col

**`>>>>> Assemble`**
+ np.concatenate( )
+ np.vstack( ) / np.r_[ ]
+ np.hstack( ) / np.c_[ ]

In [49]:
a = np.arange(0,4).reshape(1,-1)
b = np.arange(5,9).reshape(1,-1)

**`np.concatenate()`**
+ axis=0 : 沿着`第一个维度`连接，即：沿着行连接，增加行维度
+ axis=1 : 沿着`第二个维度`连接，即：沿着列连接，增加列维度

In [50]:
np.concatenate([a, b], axis=1)

array([[0, 1, 2, 3, 5, 6, 7, 8]])

In [51]:
np.hstack([a,b])

array([[0, 1, 2, 3, 5, 6, 7, 8]])

In [52]:
np.c_[a,b]

array([[0, 1, 2, 3, 5, 6, 7, 8]])

---------------------

In [53]:
np.concatenate([a, b])

array([[0, 1, 2, 3],
       [5, 6, 7, 8]])

In [54]:
np.vstack([a,b])

array([[0, 1, 2, 3],
       [5, 6, 7, 8]])

In [55]:
np.r_[a,b]

array([[0, 1, 2, 3],
       [5, 6, 7, 8]])

**`>>>>> Split`**
+ np.split( )
+ np.vsplit( )
+ np.hsplit( )

**`np.split(ary, indices_or_sections)`** : 分割点，n-dim对应第一维度

In [56]:
ary = np.arange(10)
ary

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [57]:
np.split(ary, [2,5,8])

[array([0, 1]), array([2, 3, 4]), array([5, 6, 7]), array([8, 9])]

In [58]:
arr = np.arange(32).reshape(8,-1)
arr

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15],
       [16, 17, 18, 19],
       [20, 21, 22, 23],
       [24, 25, 26, 27],
       [28, 29, 30, 31]])

In [59]:
np.split(arr, [2,5,6])

[array([[0, 1, 2, 3],
        [4, 5, 6, 7]]), array([[ 8,  9, 10, 11],
        [12, 13, 14, 15],
        [16, 17, 18, 19]]), array([[20, 21, 22, 23]]), array([[24, 25, 26, 27],
        [28, 29, 30, 31]])]

In [60]:
np.vsplit(arr, [2,5,6])

[array([[0, 1, 2, 3],
        [4, 5, 6, 7]]), array([[ 8,  9, 10, 11],
        [12, 13, 14, 15],
        [16, 17, 18, 19]]), array([[20, 21, 22, 23]]), array([[24, 25, 26, 27],
        [28, 29, 30, 31]])]

In [61]:
ary = np.arange(10)
np.hsplit(ary, [2,5,8])

[array([0, 1]), array([2, 3, 4]), array([5, 6, 7]), array([8, 9])]

# Access Array : Index / Slice / Subset

**`Index`**
+ Same in Python
+ ! **Boolean** Index

**`Slice`** : [a,b)
+ X[a:b:c] : c - 步长
+ **`多层索引 or 切片`**
    + X[2][2] / X[:2][:2] : 不建议
    + X[2,2] / X[:2, 2] : Recomand

In [62]:
arr = np.arange(12).reshape(3,4)

###### Sigal Element

In [63]:
arr[1,2]

6

###### Row

In [64]:
arr[1]

array([4, 5, 6, 7])

In [65]:
arr[1:]

array([[ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [66]:
arr[0:2]

array([[0, 1, 2, 3],
       [4, 5, 6, 7]])

In [67]:
arr[[0,2]]

array([[ 0,  1,  2,  3],
       [ 8,  9, 10, 11]])

**`Columns`**

In [68]:
arr[:, 1]

array([1, 5, 9])

In [69]:
arr[:, [1, 3]]

array([[ 1,  3],
       [ 5,  7],
       [ 9, 11]])

In [70]:
arr[:, 1:3]

array([[ 1,  2],
       [ 5,  6],
       [ 9, 10]])

In [71]:
arr[1,1:3]

array([5, 6])

**`Boolean Index`**

In [72]:
arr = np.arange(12).reshape(3,4)

In [73]:
arr > 6

array([[False, False, False, False],
       [False, False, False,  True],
       [ True,  True,  True,  True]])

In [74]:
arr[arr>6]

array([ 7,  8,  9, 10, 11])

In [75]:
(arr>6) & (arr<10)

array([[False, False, False, False],
       [False, False, False,  True],
       [ True,  True, False, False]])

In [76]:
arr[(arr>6) & (arr<10)]

array([7, 8, 9])

**`Subset`**
+ numpy 中修改`子矩阵`，`原矩阵`也会跟着修改，`反之相同`
+ **`np.copy()`** : 对拷贝矩阵修改，不会影响`原矩阵`

In [77]:
arr = np.arange(12).reshape(3,4)
sub = arr[:2, :2]

sub[0,0] = 100
print(">> arr\n", arr)
print("\n>> sub\n", sub)

>> arr
 [[100   1   2   3]
 [  4   5   6   7]
 [  8   9  10  11]]

>> sub
 [[100   1]
 [  4   5]]


In [78]:
arr = np.arange(12).reshape(3,4)
sub = arr[:2, :2]

arr[0,0] = 100
print(">> Arr\n", arr)
print("\n>> sub\n", sub)

>> Arr
 [[100   1   2   3]
 [  4   5   6   7]
 [  8   9  10  11]]

>> sub
 [[100   1]
 [  4   5]]


# Simple Proccess Data

**`np.allclose`**(a, b, rtol=1e-05, atol=1e-08, equal_nan=False)
+ Returns True if two arrays are element-wise equal within a tolerance.
+ True : absolute(a - b) <= (atol + rtol * absolute(b))


**`np.isclose`**(a, b, rtol=1e-05, atol=1e-08, equal_nan=False)
+ Returns a boolean array where two arrays are element-wise equal within a tolerance.
+ True : absolute(a - b) <= (atol + rtol * absolute(b))

In [79]:
np.allclose([1e10,1e-7], [1.00001e10,1e-8])

False

In [80]:
np.allclose([1e10,1e-8], [1.00001e10,1e-9])

True

In [81]:
np.isclose([1e10,1e-7], [1.00001e10,1e-8])

array([ True, False])

In [82]:
np.isclose([1e10,1e-8], [1.00001e10,1e-9])

array([ True,  True])

**`np.unique(ary)`**
+ find the unique elements of an array
+ return_index=True : 返回原位置
+ return_inverse=True : 返回旧元素在新列表的位置
+ return_counts=True : 返回每个元素在 ary 中出现的次数

In [83]:
ary = np.random.randint(10, size=(4,6))
ary

array([[8, 7, 7, 3, 3, 0],
       [6, 7, 5, 3, 5, 8],
       [6, 0, 2, 1, 4, 0],
       [9, 2, 7, 1, 9, 9]])

In [84]:
np.unique(ary)

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [85]:
arr, old, new, cou = np.unique(ary, return_index=True, return_inverse=True, return_counts=True)
print(arr)
print(old)
print(new)
print(cou)

[0 1 2 3 4 5 6 7 8 9]
[ 5 15 14  3 16  8  6  1  0 18]
[8 7 7 3 3 0 6 7 5 3 5 8 6 0 2 1 4 0 9 2 7 1 9 9]
[3 2 2 3 1 2 2 4 2 3]


------------------

###### 数组降维 - np.ravel() & np.flatten()
+ **`将多维数组降为一维 - 数组平摊`**
+ `np.flatten()` : 返回一份**拷贝**，对拷贝的修改不会影响原始矩阵
+ `np.ravel()`   : 返回**`视图/view`**，对其修改则会影响`原始`矩阵

In [86]:
arr = np.arange(0, 12).reshape((3,4))

In [87]:
arr.ravel()

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11])

In [88]:
arr.flatten()[1] = -12345
arr

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [89]:
arr.ravel()[1] = -12345
arr

array([[     0, -12345,      2,      3],
       [     4,      5,      6,      7],
       [     8,      9,     10,     11]])

--------------------------

**`np.any() / np.all()`**

In [90]:
X = np.array([0,1,2,3,4])

In [91]:
np.any(X)

True

In [92]:
np.all(X)

False

------------------------

**`np.nan & np.inf`**

+ 当运算的数据中出现了 `np.nan & np.inf` 时，则会导致程序无法运行，原因在除的过程中分母出现 0 的缘故
+ np.nan : Not A Number

In [93]:
a = np.array([[np.nan, np.inf], [-np.nan, -np.inf]])
a

array([[ nan,  inf],
       [ nan, -inf]])

**`处理 / 判断`**
+ np.`nan_to_num`() : Replace NaN with zero and infinity with large finite numbers


+ np.`isnan`(X) : Test element-wise for NaN and return result as a boolean array


+ np.`isinf`(X) : Test element-wise for positive or negative infinity, return a boolean array of the same shape as x, True where `x == +/-inf`, otherwise False
+ np.`isneginf`() / np.isposinf()

In [94]:
a = np.array([[np.nan, np.inf], [-np.nan, -np.inf]])
a

array([[ nan,  inf],
       [ nan, -inf]])

In [95]:
np.nan_to_num(a)

array([[ 0.00000000e+000,  1.79769313e+308],
       [ 0.00000000e+000, -1.79769313e+308]])

In [96]:
np.isinf(a)

array([[False,  True],
       [False,  True]])

In [97]:
np.isnan(a)

array([[ True, False],
       [ True, False]])

-------------------

###### np.random.____

**`np.random.shuffle()`** - 打乱数据
+ **`修改原始数组`**

In [98]:
row = np.arange(10)
np.random.shuffle(row)
row

array([4, 6, 0, 5, 7, 9, 8, 1, 2, 3])

+ Multi-dimensional arrays are only `shuffled along the first axis`

In [99]:
arr = np.arange(12).reshape((3,4))
np.random.shuffle(arr)
arr

array([[ 8,  9, 10, 11],
       [ 4,  5,  6,  7],
       [ 0,  1,  2,  3]])

**`np.random.permutation`**()
+ randomly permute a sequence, or return a permuted range

In [100]:
np.random.permutation(10)

array([0, 5, 1, 3, 8, 4, 6, 2, 7, 9])

In [101]:
row = np.arange(12)
np.random.permutation(row)

array([ 8, 11,  1,  4,  9,  0,  6,  5,  2,  3,  7, 10])

In [102]:
np.random.permutation(row).reshape((-1, 4))

array([[11,  6,  8,  5],
       [ 3,  9,  2,  4],
       [ 0, 10,  1,  7]])

###### np.random.`__distribution`( )

**`np.random.normal(loc, scale, size)`**
+ random samples from a normal (Gaussian) distribution
+ loc : Mean of the distribution
+ scale : standard deviation of the distribution

In [103]:
mean = 0.0
std = 1.0

np.random.normal(mean, std, size=(3,4))

array([[ 0.04496968,  0.04993487, -0.02104528, -1.76086752],
       [-0.86077044, -0.79770853, -0.53229158,  1.8377797 ],
       [-0.08706854, -0.17493977, -0.18110561, -1.0728916 ]])

**`np.random.multivariate_normal(mean, cov, size)`**
+ random samples from a multivariate normal distribution

In [104]:
mean = [0, 0]
cov = [[1,0], [0,100]]

np.random.multivariate_normal(mean, cov, (3, 4))

array([[[ -1.06532908,  -5.91280312],
        [ -1.44892625, -11.70236786],
        [  0.12741299,  17.67977598],
        [ -0.69773622, -22.64721827]],

       [[  1.319054  ,   6.31007949],
        [  1.10513583,   0.78003418],
        [  0.29553146,   3.73077154],
        [ -0.04974849,  -6.97925455]],

       [[  0.73199511,  -7.38080047],
        [  0.5796213 ,  -4.94921757],
        [ -0.60162965,   9.57642523],
        [  1.73662905,   1.67355526]]])

-----------------

**`np.where(condition)`**
+ `Application` : Logistic Regression split sample to `pos & neg`

In [105]:
ary = np.array([[1, 0, 1, 0, 1, 1, 0, 1]])
pos_ind = np.where(ary == 1)
neg_ind = np.where(ary == 0)
print('Positive Index :', pos_ind)
print('Positive :', ary[pos_ind])

Positive Index : (array([0, 0, 0, 0, 0]), array([0, 2, 4, 5, 7]))
Positive : [1 1 1 1 1]


**`np.argwhere(a / condition)`**
+ find the indices of array element that are non-zero, grouped by element

In [106]:
ary = np.array([[1, 0, 1, 0, 1, 1, 0, 1, 3]])

In [107]:
ind = np.argwhere(ary[0])
ary[0, ind]

array([[1],
       [1],
       [1],
       [1],
       [1],
       [3]])

In [108]:
np.argwhere(ary > 1)

array([[0, 8]])

--------------------

**`np.meshgrid`**(*xi, **kwargs)
    + Return coordinate matrices from coordinate vectors
    + x1, x2, ..., xn : array_like
        + 1-D arrays representing the coordinates of a grid

+ **`Application`**
    + hyperparam choose
    + draw counter

![``numpy-cheat-sheet_``](meshgrid.jpeg)

In [109]:
x = np.arange(-3, 3, 1)
y = np.arange(-5, 5, 1)
xx, yy = np.meshgrid(x, y)

In [110]:
xx

array([[-3, -2, -1,  0,  1,  2],
       [-3, -2, -1,  0,  1,  2],
       [-3, -2, -1,  0,  1,  2],
       [-3, -2, -1,  0,  1,  2],
       [-3, -2, -1,  0,  1,  2],
       [-3, -2, -1,  0,  1,  2],
       [-3, -2, -1,  0,  1,  2],
       [-3, -2, -1,  0,  1,  2],
       [-3, -2, -1,  0,  1,  2],
       [-3, -2, -1,  0,  1,  2]])

In [111]:
yy

array([[-5, -5, -5, -5, -5, -5],
       [-4, -4, -4, -4, -4, -4],
       [-3, -3, -3, -3, -3, -3],
       [-2, -2, -2, -2, -2, -2],
       [-1, -1, -1, -1, -1, -1],
       [ 0,  0,  0,  0,  0,  0],
       [ 1,  1,  1,  1,  1,  1],
       [ 2,  2,  2,  2,  2,  2],
       [ 3,  3,  3,  3,  3,  3],
       [ 4,  4,  4,  4,  4,  4]])

In [112]:
import matplotlib.pyplot as plt
plt.figure()
plt.scatter(xx, yy)

<matplotlib.collections.PathCollection at 0x7fc7f0ef0da0>

# Basic Math Method

**`+-*/`**

In [113]:
a = np.array([10, 20, 30, 40])
b = np.array([1, 2, 3, 4])
print(a, b, sep='\n')

[10 20 30 40]
[1 2 3 4]


In [114]:
print('a + b\t', a + b)
print('a - b\t', a - b)
print('a * b\t', a * b)
print('a / b\t', a / b)

a + b	 [11 22 33 44]
a - b	 [ 9 18 27 36]
a * b	 [ 10  40  90 160]
a / b	 [10. 10. 10. 10.]


**`Math Method`**

In [115]:
a = np.array([10, 20, 30, 40])
np.power(a, 2)

array([ 100,  400,  900, 1600])

In [116]:
a = np.array([10, 20, 30, 40])
np.log(a)

array([2.30258509, 2.99573227, 3.40119738, 3.68887945])

In [117]:
a = np.array([10, 20, 30, 40])
np.exp(a)

array([2.20264658e+04, 4.85165195e+08, 1.06864746e+13, 2.35385267e+17])

In [118]:
a = np.array([10, 20, 30, 40])
np.sign(a)

array([1, 1, 1, 1])

In [119]:
a = np.array([10, 20, 30, 40])
np.sqrt(a)

array([3.16227766, 4.47213595, 5.47722558, 6.32455532])

**`Compare / Logical`**

In [120]:
b = np.array([1, 2, 3, 4])
print('b\t', b)
print('b < 3\t', b < 3)

b	 [1 2 3 4]
b < 3	 [ True  True False False]


In [121]:
print('b\t\t', b)
print('2.1 < b < 29\t', (b>2.1) & (b<29))

b		 [1 2 3 4]
2.1 < b < 29	 [False False  True  True]


**`Linear Algebra`**
+ Transpose : T
+ Dot Product : np.dot() / @
+ Inverse : np.linalg.inv() / np.linalg.piv()
    + 逆：要求`方阵`
    + 伪逆：不要求形状
+ Determinant : 行列式
    + np.linalg.det(a)
+ SVD / Singular Value Decomposition / 奇异值分解
    + np.linalg.svd()

###### Transpose

In [122]:
arr = np.arange(12).reshape(3,4)
arr

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [123]:
arr.T

array([[ 0,  4,  8],
       [ 1,  5,  9],
       [ 2,  6, 10],
       [ 3,  7, 11]])

###### Dot Product

In [124]:
a = np.arange(6).reshape((2,3))
b = np.arange(12).reshape((3,-1))

In [125]:
np.dot(a, b)

array([[20, 23, 26, 29],
       [56, 68, 80, 92]])

In [126]:
a @ b

array([[20, 23, 26, 29],
       [56, 68, 80, 92]])

###### Inverse

In [127]:
arr = np.array([[1,5],[9,19]])

In [128]:
invArr = np.linalg.inv(arr)

In [129]:
invArr @ arr

array([[ 1.00000000e+00, -4.44089210e-16],
       [ 5.55111512e-17,  1.00000000e+00]])

In [130]:
arr @ invArr

array([[1., 0.],
       [0., 1.]])

---------

In [131]:
arr = np.arange(0, 9).reshape(3,3)

In [132]:
invArr = np.linalg.pinv(arr)

In [133]:
arr @ invArr

array([[ 0.83333333,  0.33333333, -0.16666667],
       [ 0.33333333,  0.33333333,  0.33333333],
       [-0.16666667,  0.33333333,  0.83333333]])

**`np.linalg.det`**(a)
+ Compute the determinant of an array
+ a : (…, M, M) array_like / 要求方阵

In [134]:
arr = np.arange(4,13).reshape(-1,3)
det = np.linalg.det(arr)

In [135]:
det

3.197442310920453e-15

**`SVD / Singular Value Decomposition`**

In [136]:
arr = np.arange(12).reshape(3,4)
cov = np.cov(arr.T)
U, S, V = np.linalg.svd(cov)

# Statistics Method

###### np.max( ) / np.min( )

In [137]:
a = np.array([[1,2,3], [4,5,6]])
print('max :', a.max())
print('min :', a.min())

max : 6
min : 1


In [138]:
a = np.array([[1,2,3], [4,5,6]])
print('a : \n', a)
print('a.max(axis=1) - row :', a.max(axis=1))
print('a.max(axis=0) - col :', a.max(axis=0))

a : 
 [[1 2 3]
 [4 5 6]]
a.max(axis=1) - row : [3 6]
a.max(axis=0) - col : [4 5 6]


###### np.argmax( ) /  np.argmin( )

In [139]:
np.random.seed(0)
a = np.random.random((2,4))
a

array([[0.5488135 , 0.71518937, 0.60276338, 0.54488318],
       [0.4236548 , 0.64589411, 0.43758721, 0.891773  ]])

In [140]:
print('np.argmax() : ', np.argmax(a))
print('np.argmax(a, axis=1) : ', np.argmax(a, axis=1))
print('np.argmax(a, axis=0) : ', np.argmax(a, axis=0))

np.argmax() :  7
np.argmax(a, axis=1) :  [1 3]
np.argmax(a, axis=0) :  [0 0 0 1]


######  np.sum( ) / cumsum( )

In [141]:
a = np.array([[1,2,3], [4,5,6]])
print('sum :', a.sum())
print('sum(axis=1) - row :', a.sum(axis=1))
print('sum(axis=0) - col :', a.sum(axis=0))

sum : 21
sum(axis=1) - row : [ 6 15]
sum(axis=0) - col : [5 7 9]


In [142]:
a = np.array([[1,2,3], [4,5,6]])
print('a.cumsum() : ', a.cumsum())
print('a.cumsum(axis=1) - row : \n', a.cumsum(axis=1), sep='')
print('a.cumsum(axis=0) - col : \n', a.cumsum(axis=0), sep='')

a.cumsum() :  [ 1  3  6 10 15 21]
a.cumsum(axis=1) - row : 
[[ 1  3  6]
 [ 4  9 15]]
a.cumsum(axis=0) - col : 
[[1 2 3]
 [5 7 9]]


###### np.mean( ) / np.var( ) / np.std( ) / np.cov( )

In [143]:
a = np.array([[1,2,3], [4,5,6]])
print('np.mean : ', np.mean(a))
print('np.average :', np.average(a))
print('ary.mean :', a.mean())

np.mean :  3.5
np.average : 3.5
ary.mean : 3.5


In [144]:
print('mean(a, axis=1) - row :', np.mean(a, axis=1))  # rows
print('mean(a, axis=0) - col :', np.mean(a, axis=0))  # columns

mean(a, axis=1) - row : [2. 5.]
mean(a, axis=0) - col : [2.5 3.5 4.5]


In [145]:
a = np.array([[1,2,3], [4,5,6]])
print('a.var() : ', a.var())
print('a.var(axis=1) - row : ', a.var(axis=1))
print('a.var(axis=0) - col : ', a.var(axis=0))

a.var() :  2.9166666666666665
a.var(axis=1) - row :  [0.66666667 0.66666667]
a.var(axis=0) - col :  [2.25 2.25 2.25]


In [146]:
a = np.array([[1,2,3], [4,5,6]])
print('a.std() : ', a.std())
print('a.std(axis=1) - row : ', a.std(axis=1))
print('a.std(axis=0) - col : ', a.std(axis=0))

a.std() :  1.707825127659933
a.std(axis=1) - row :  [0.81649658 0.81649658]
a.std(axis=0) - col :  [1.5 1.5 1.5]


In [147]:
np.cov(a.T)

array([[4.5, 4.5, 4.5],
       [4.5, 4.5, 4.5],
       [4.5, 4.5, 4.5]])

# Import / Export File

**`np.loadtxt`**(fname, delimiter=None, skiprows=0, usecols=None， unpack=False)
+ fname : file, str, or paathlib.path
+ delimiter : str, optional
    + 分隔符、默认为 '空格'
+ skiprows : int, optional
    + skip the first skipwors lines; default 0
+ usecols : int or sequence, optional
    + 要读取哪些列，0 为第一列；默认读取所有列
    + usecols=(1,4,5), 将读取 "第 2、5、6 列"
+ unpack : bool, optional
    + 如果为 True, 返回的数组将被 '转置'

In [148]:
datafile = 'dataset/ex1data1.txt'

In [149]:
data = np.loadtxt(datafile, delimiter=',', usecols=(0,1), unpack=True)
data.shape

(2, 97)

In [150]:
data = np.loadtxt(datafile, delimiter=',')
data.shape

(97, 2)

**`np.savetxt`**(fname, fmt='%.18e', delimiter=' ', header='')

In [151]:
np.random.seed(1004)
arr = np.random.randn(3,4)
arr

array([[ 0.59440307,  0.40260871, -0.80516223,  0.1151257 ],
       [-0.75306522, -0.7841178 ,  1.46157577,  1.57607553],
       [-0.17131776, -0.91448182,  0.86013945,  0.35880192]])

In [152]:
np.savetxt('dataset/test0.txt', arr)
np.savetxt('dataset/test1.txt', arr, delimiter=',', header="Test Save")

In [153]:
np.loadtxt('dataset/test1.txt', delimiter=',')

array([[ 0.59440307,  0.40260871, -0.80516223,  0.1151257 ],
       [-0.75306522, -0.7841178 ,  1.46157577,  1.57607553],
       [-0.17131776, -0.91448182,  0.86013945,  0.35880192]])

# Broadcasting

**`广播 / 向量&矩阵运算`**

In [154]:
x = np.array([[1,2,3], [4,5,6], [7,8,9], [10, 11, 12]])
v = np.array([1, 0, 1])

x + v

array([[ 2,  2,  4],
       [ 5,  5,  7],
       [ 8,  8, 10],
       [11, 11, 13]])

----------------

In [155]:
v = np.array([1,2,3])
w = np.array([4,5])

v.reshape(3,1) * w

array([[ 4,  5],
       [ 8, 10],
       [12, 15]])

In [156]:
x = np.array([[1,2,3], [4,5,6]])


x + v

array([[2, 4, 6],
       [5, 7, 9]])

# Application

**`Add one column to matrix`**
+ np.insert(arr, obj, values, axis=None)
+ np.c_[ ] / np.r_[ ]
+ np.hstarck( ) / np.vstack( )

In [157]:
ori = np.arange(12.).reshape(3,4)
ones = np.ones((ori.shape[0],1))
ori

array([[ 0.,  1.,  2.,  3.],
       [ 4.,  5.,  6.,  7.],
       [ 8.,  9., 10., 11.]])

**`np.insert`**(arr, obj, values, axis=None)
+ obj : int, slice or sequence of ints
    + Object tha defines the index or indeices before which values is inserted

In [158]:
np.insert(ori, 0, 1., axis=1)

array([[ 1.,  0.,  1.,  2.,  3.],
       [ 1.,  4.,  5.,  6.,  7.],
       [ 1.,  8.,  9., 10., 11.]])

In [159]:
np.c_[ones, ori]

array([[ 1.,  0.,  1.,  2.,  3.],
       [ 1.,  4.,  5.,  6.,  7.],
       [ 1.,  8.,  9., 10., 11.]])

In [160]:
np.hstack((ones, ori))

array([[ 1.,  0.,  1.,  2.,  3.],
       [ 1.,  4.,  5.,  6.,  7.],
       [ 1.,  8.,  9., 10., 11.]])

---------------------

**`RuntimWarning : divide by zero encountered in log`**
+ **`np.count_nonzero(a, axis=None)`** : counts the number of non-zero values in the array a
+ **`np.nonzero(a)`** : return the indices of the elements that ara non-zero

**`Application`**
+ cost function : hyp = 0

In [161]:
def replace_zeros(data):
    if np.count_nonzero(data):
        min_nonzero = np.min(data[np.nonzero(data)])
    else:
        min_nonzero = 0.000000000001
    data[data == 0] = min_nonzero
    
    return data

In [162]:
ary = np.array([[0, 0.01, 1], [0, 0.001, 0]])
replace_zeros(ary)

array([[0.001, 0.01 , 1.   ],
       [0.001, 0.001, 0.001]])

In [163]:
ary = np.zeros_like(ary)
replace_zeros(ary)

array([[1.e-12, 1.e-12, 1.e-12],
       [1.e-12, 1.e-12, 1.e-12]])

---------------------

**`SVD / Singular Value Decomposition`**
+ https://docs.scipy.org/doc/numpy/reference/generated/numpy.linalg.svd.html#numpy.linalg.svd

In [164]:
arr = np.arange(12).reshape(3,4)
cov = np.cov(arr.T)
U, S, V = np.linalg.svd(cov)