## Numpy Tutorial

### Summary
1. 认识数组
1. 创建数组
2. 数组的保存
4. 数组索引, 赋值
2. 数组运算
5. 数组形状操作
6. 数组排序
7. 其它

In [3]:
# import package
import numpy as np

### 1. 认识数组

In [6]:
data = np.random.randint(0, 10, (10, 100))
print('data \n',data[:10, :10])
# basic
print('shape ', data.shape)
print('dim ', data.ndim)
print('size', data.size)
print()
# dtype, astype
newdata = np.array([3, 2, 1])
newdata.itemsize
print(newdata.astype(float).dtype)
print(newdata[0].dtype)
print()
# length of array
print('Length of one array element in bytes', data.itemsize)
print('Total bytes consumed by array', data.nbytes)
print()
# one-dim array
print('one dim array', data.flat[:20])
# ravel
print('ravel \n', data.ravel())
print()
# complex number
newdata = np.array([[1 + 1j, 2 + 3j], [2 + 1j, 1 + 3j]])
print('real \n', newdata.real)
print('imag \n', newdata.imag)

data 
 [[9 3 3 1 0 5 7 1 4 2]
 [6 1 5 3 6 1 3 1 9 5]
 [5 6 3 8 1 4 0 3 8 2]
 [2 9 4 5 5 5 5 1 7 5]
 [6 2 4 0 8 9 7 5 7 2]
 [4 8 5 0 9 0 6 4 8 5]
 [1 6 8 4 7 7 5 7 1 6]
 [3 4 5 5 8 7 4 8 2 1]
 [1 0 2 8 2 5 9 3 5 9]
 [8 0 9 4 4 9 0 8 7 8]]
shape  (10, 100)
dim  2
size 1000

float64
int32

Length of one array element in bytes 4
Total bytes consumed by array 4000

one dim array [9 3 3 1 0 5 7 1 4 2 4 5 0 9 8 8 3 5 7 9]
ravel 
 [9 3 3 1 0 5 7 1 4 2 4 5 0 9 8 8 3 5 7 9 3 9 0 0 9 0 8 2 6 4 5 3 2 4 7 3 9
 3 5 4 9 2 2 7 4 2 7 5 3 5 6 5 8 4 2 6 1 3 0 9 0 9 1 9 3 6 4 7 7 0 2 5 4 4
 7 2 1 9 3 5 4 0 8 5 2 1 8 4 8 5 4 4 4 8 9 2 4 0 1 6 6 1 5 3 6 1 3 1 9 5 7
 6 8 3 4 3 5 4 6 4 8 8 3 7 2 2 9 8 1 7 3 0 6 9 1 0 1 4 0 9 8 8 4 7 0 8 3 5
 7 1 7 6 8 4 2 5 5 8 1 2 2 4 3 7 9 8 8 4 5 9 5 3 5 7 3 8 4 0 2 8 0 2 5 7 4
 3 9 1 1 5 1 1 9 1 4 6 8 8 8 1 5 6 3 8 1 4 0 3 8 2 5 5 0 5 4 9 4 1 1 2 5 9
 9 6 6 8 5 7 2 0 7 0 6 4 2 5 7 2 3 1 0 7 2 9 6 3 8 5 9 5 6 2 3 3 0 8 2 3 4
 3 2 4 7 1 7 0 0 1 0 8 9 5 4 0 3 7 4 4 2 9 3 7 0

### 2. 创建数组

In [9]:
print('np.zeros str \n', np.zeros((2,3), str))
print('np.zeros float \n', np.zeros((2,3), float))
print('np.full \n', np.full((2, 3), 5))
print('np.ones \n', np.ones((2,3), str))
print('np.eye \n', np.eye(5))
# diag
newdata = np.random.randint(0, 10, (2,2))
print('np.diag \n', np.diag([1, 2, 3]))
print('diag, k = 1, k = -1 ', np.diag(newdata), np.diag(newdata, k = 1), np.diag(newdata, k = -1))
# unique
print('np.unique ', np.unique(data))
print('np.arange \n', np.arange(0, 10, 2))
print('np.linspace \n', np.linspace(0, 10, 21))
# uniform distribution
print('np.random.rand \n', np.random.rand(2, 3, 2))
print('np.random.random \n', np.random.random((2, 3, 2)))
print('np.random.randint \n', np.random.randint(0, 10, (2,3)))
# standard normal distribution
print('np.random.randn \n', np.random.randn(2,3))
# normal distribution
print('np.random.normal \n', np.random.normal(0, 2, (2,3)))
# generate with function
print('np.fromfunction \n', np.fromfunction(lambda i, j, k: i + j + k, (3, 3, 3)))

np.zeros str 
 [['' '' '']
 ['' '' '']]
np.zeros float 
 [[0. 0. 0.]
 [0. 0. 0.]]
np.full 
 [[5 5 5]
 [5 5 5]]
np.ones 
 [['1' '1' '1']
 ['1' '1' '1']]
np.eye 
 [[1. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0.]
 [0. 0. 1. 0. 0.]
 [0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 1.]]
np.diag 
 [[1 0 0]
 [0 2 0]
 [0 0 3]]
diag, k = 1, k = -1  [4 9] [2] [6]
np.unique  [0 1 2 3 4 5 6 7 8 9]
np.arange 
 [0 2 4 6 8]
np.linspace 
 [ 0.   0.5  1.   1.5  2.   2.5  3.   3.5  4.   4.5  5.   5.5  6.   6.5
  7.   7.5  8.   8.5  9.   9.5 10. ]
np.random.rand 
 [[[0.11967231 0.12045295]
  [0.40694603 0.13285921]
  [0.27158961 0.82424992]]

 [[0.84424185 0.01438801]
  [0.46050032 0.97300762]
  [0.27233836 0.72515415]]]
np.random.random 
 [[[0.21731645 0.23423666]
  [0.96093664 0.47125953]
  [0.12353198 0.74236195]]

 [[0.43235339 0.94964614]
  [0.98325011 0.5571717 ]
  [0.14372721 0.37232143]]]
np.random.randint 
 [[5 9 8]
 [0 0 4]]
np.random.randn 
 [[-0.35399624  0.30583987  1.1543685 ]
 [-0.27362513 -1.11429576  0.52179209]]
np.r

### 3. 数组的保存

In [188]:
# saved_array.npy
np.save('saved_array', data)
testdata = np.load('saved_array.npy')

### 4. 数组索引, 赋值

In [10]:
# slice
data = np.random.randint(0, 10, (10, 100))
print('10 rows and 10 cols from data \n', data[:5, :5])
data[:5, :5] = 0
print('10 rows and 10 cols from data \n', data[:5, :5])

# delete
newdata = np.random.randint(0, 10, (2,2))
print('Original array \n', newdata)
print('General \n', np.delete(newdata, [0], axis = 1))
print('Special \n', np.delete(newdata, [0, 1]))

# append
print('original \n', newdata)
print('append axis = 0 \n', np.append(newdata, [[1, 2]], axis = 0))
print('append axis = 1 \n', np.append(newdata, [[1], [2]], axis = 1))
# insert
print('insert between first col and second col \n', np.insert(newdata, 0, [1, 2], axis = 1))
print('insert between first row and second row \n', np.insert(newdata, 1, [1, 2], axis = 0))

# vstack, hstack
test1 = np.random.randint(0, 10, (2, 2))
test2 = np.random.randint(0, 10, (2, 2))
print('np.vstack(test1, test2) \n', np.vstack((test1, test2)))
print('np.hstack(test1, test2) \n', np.hstack((test1, test2)))
# concatenate
Z1 = np.arange(3)
Z2 = np.arange(3,7)
Z3 = np.arange(7,10)
Z = np.array([Z1, Z2, Z3])
print('np.concatenate \n', np.concatenate(Z))
# c_, r_
M1 = np.array([1, 2, 3])
M2 = np.array([4, 5, 6])
print('np.c_ \n', np.c_[M1, M2])
print('np.r_ \n', np.r_[M1, M2])

# conditional index
print('newdata[newdata>3] \n', newdata[newdata>3])
newdata[newdata>3] = 0
print(newdata)

# split
A = np.random.randint(0, 10, (5, 5))
print('np.array_split \n', np.array_split(A, 2, axis = 1))

10 rows and 10 cols from data 
 [[3 6 5 4 3]
 [7 2 8 7 3]
 [7 8 2 5 7]
 [6 3 0 9 3]
 [2 1 4 3 9]]
10 rows and 10 cols from data 
 [[0 0 0 0 0]
 [0 0 0 0 0]
 [0 0 0 0 0]
 [0 0 0 0 0]
 [0 0 0 0 0]]
Original array 
 [[3 4]
 [6 5]]
General 
 [[4]
 [5]]
Special 
 [6 5]
original 
 [[3 4]
 [6 5]]
append axis = 0 
 [[3 4]
 [6 5]
 [1 2]]
append axis = 1 
 [[3 4 1]
 [6 5 2]]
insert between first col and second col 
 [[1 3 4]
 [2 6 5]]
insert between first row and second row 
 [[3 4]
 [1 2]
 [6 5]]
np.vstack(test1, test2) 
 [[2 6]
 [9 4]
 [5 4]
 [5 3]]
np.hstack(test1, test2) 
 [[2 6 5 4]
 [9 4 5 3]]
np.concatenate 
 [0 1 2 3 4 5 6 7 8 9]
np.c_ 
 [[1 4]
 [2 5]
 [3 6]]
np.r_ 
 [1 2 3 4 5 6]
newdata[newdata>3] 
 [4 6 5]
[[3 0]
 [0 0]]
np.array_split 
 [array([[0, 2, 2],
       [0, 3, 3],
       [5, 0, 1],
       [4, 1, 2],
       [5, 9, 9]]), array([[4, 8],
       [5, 9],
       [3, 9],
       [1, 5],
       [4, 1]])]


### 5. 数组运算

In [400]:
# set operators
x = np.array([1, 2, 3, 4])
y = np.array([2, 4, 7, 9])
print('The elements that are both in x and y:', np.intersect1d(x,y))
print('The elements that are in x that are not in y:', np.setdiff1d(x,y))
print('All the elements of x and y:',np.union1d(x,y))

# array + - * /
# array + 3
# array + array same shape

# dot
A = np.array([[1, 2, 3], [2, 3, 4]])
B = np.array([[3, 2], [3, 4], [1, 3]])
print('np.dot \n', np.dot(A, B))
print('np.mat \n', np.mat(A) * np.mat(B))

# .T .inv
print('transpose matrix \n', A.T)
print('inverse matrix \n', np.linalg.inv(np.array([[2,1], [3,4]])))

# sin, exp, sqrt, power, median, mean, average, var, std
np.sin(A)
np.exp(A)
np.sqrt(A)
np.power(A, 2)
np.sum(A)
np.median(A)
np.mean(A)
np.average(A)
np.var(A)
np.std(A, axis = 0)

# add different size
B = np.array([1, 2, 3])
C = np.array([1, 2])
print('A + B \n', np.add(A, B))
print('A + C \n', np.add(A.T, C).T)

# min, max, argmin, argmax
np.max(A, axis = 0)
np.max(A, axis = 1)
np.min(A, axis = 0)
np.min(A, axis = 1)
np.argmax(A, axis = 0)
np.argmax(A, axis = 1)
np.argmin(A, axis = 0)
np.argmin(A, axis = 1)

# transform to int: ceil floor int
D = np.random.randn(3,3)
print('np.ceil \n', np.ceil(D))
print('np.floor \n', np.floor(D))
print('np.trunc \n', np.trunc(D)) # = floor

# bincount
A = np.random.randint(0, 10, 50)
print('original \n', A)
print('np.bincount \n', np.bincount(A))

# np.percentile
print('np.percentile \n', np.percentile(A, q = [10, 50, 80]))

# eignvalue and eignvector
M = np.matrix([[1,2,3], [4,5,6], [7,8,9]])
w, v = np.linalg.eig(M)
print('eignvalue \n', w)
print('eignvector \n', v)

# distance Euler
a = np.array([2,3,4])
b = np.array([1,2,3])
print('Euler distaance \n', np.linalg.norm(b-a, 2))

# correlation matrix
print('correlation matrix \n', np.corrcoef(D))

# diff
print('np.diff \n', np.diff(D))

# cum sum
print('np.cumsum \n', np.cumsum(D))

The elements that are both in x and y: [2 4]
The elements that are in x that are not in y: [1 3]
All the elements of x and y: [1 2 3 4 7 9]
np.dot 
 [[12 19]
 [19 28]]
np.mat 
 [[12 19]
 [19 28]]
transpose matrix 
 [[1 2]
 [2 3]
 [3 4]]
inverse matrix 
 [[ 0.8 -0.2]
 [-0.6  0.4]]
A + B 
 [[2 4 6]
 [3 5 7]]
A + C 
 [[2 3 4]
 [4 5 6]]
np.ceil 
 [[ 3. -0. -1.]
 [ 1.  1. -1.]
 [ 1. -0. -1.]]
np.floor 
 [[ 2. -1. -2.]
 [ 0.  0. -2.]
 [ 0. -1. -2.]]
np.trunc 
 [[ 2. -0. -1.]
 [ 0.  0. -1.]
 [ 0. -0. -1.]]
original 
 [7 7 2 9 3 5 9 5 0 3 2 7 2 6 3 6 2 7 9 1 4 6 9 4 0 8 0 7 0 6 9 8 4 8 5 0 6
 0 1 7 5 1 0 0 1 8 5 4 2 0]
np.bincount 
 [9 4 5 3 4 5 5 6 4 5]
np.percentile 
 [0.  4.5 7. ]
eignvalue 
 [ 1.61168440e+01 -1.11684397e+00 -9.75918483e-16]
eignvector 
 [[-0.23197069 -0.78583024  0.40824829]
 [-0.52532209 -0.08675134 -0.81649658]
 [-0.8186735   0.61232756  0.40824829]]
Euler distaance 
 1.7320508075688772
correlation matrix 
 [[1.         0.69646468 0.99553028]
 [0.69646468 1.         0.62

### 6. 数组形状操作

In [376]:
print('np.reshape \n', np.array([[1, 2, 3], [2, 3, 4]]), '\n',\
                       np.array([[1, 2, 3], [2, 3, 4]]).reshape(3,2))
A = np.array([[1,2,3], [2,3,4]])
print('np.resize \n', np.resize(A, (3,2)))

np.reshape 
 [[1 2 3]
 [2 3 4]] 
 [[1 2]
 [3 2]
 [3 4]]
np.resize 
 [[1 2]
 [3 2]
 [3 4]]


### 7. 数组排序

In [464]:
# sort by the thrid col
Z = np.random.randint(0,10,(5,5))
print("before sort：\n",Z)
print('after sort: \n', Z[Z[:,2].argsort()])
# Z[[1,2,3,0,4]]

# sort
np.sort(Z, axis = 0)

before sort：
 [[9 5 9 9 7]
 [3 8 7 1 7]
 [8 7 4 3 0]
 [6 5 9 4 1]
 [2 7 1 1 6]]
after sort: 
 [[2 7 1 1 6]
 [8 7 4 3 0]
 [3 8 7 1 7]
 [9 5 9 9 7]
 [6 5 9 4 1]]


array([[2, 5, 1, 1, 0],
       [3, 5, 4, 1, 1],
       [6, 7, 7, 3, 6],
       [8, 7, 9, 4, 7],
       [9, 8, 9, 9, 7]])

### 8. 其它

In [467]:
# date
yesterday = np.datetime64('today', 'D') - np.timedelta64(1, 'D')
today = np.datetime64('today', 'D')
tomorrow  = np.datetime64('today', 'D') + np.timedelta64(1, 'D')
print("yesterday: ", yesterday)
print("today: ", today)
print("tomorrow: ", tomorrow)

# set precision
A = np.random.rand(10)
np.set_printoptions(precision=4)
print(A)
print(A/1e4)

# nonzero, return non zero index
A = np.random.randint(0,10, (2,3))
print(A)
print('np.nonzero \n', np.nonzero(A))

# np.random.choice
print('np.random.choice \n', np.random.choice(np.array([1,2,3,4,5,6]), 3, replace = False))

# np.isnan, np.where
A = np.array([np.random.rand(10) for i in range(5)])
A[np.random.randint(0,5,4), np.random.randint(0,5,4)] = np.nan
# judge wether nan exist
print('np.isnan \n', np.isnan(A))
# find index of nan
print('np.where \n', np.where(np.isnan(A)))
# drop nan value based on row
print('original A \n', A)
print('drop based on row \n', A[np.sum(np.isnan(A), axis=1) == 0])
print('drop based on column \n', A.T[np.sum(np.isnan(A), axis=0) == 0])

yesterday:  2018-09-03
today:  2018-09-04
tomorrow:  2018-09-05
[0.9327 0.0715 0.4973 0.7787 0.4929 0.8227 0.1785 0.4569 0.007  0.764 ]
[9.3272e-05 7.1473e-06 4.9733e-05 7.7868e-05 4.9286e-05 8.2265e-05
 1.7852e-05 4.5691e-05 7.0221e-07 7.6400e-05]
[[0 5 9]
 [9 6 1]]
np.nonzero 
 (array([0, 0, 1, 1, 1], dtype=int64), array([1, 2, 0, 1, 2], dtype=int64))
np.random.choice 
 [6 3 5]
np.isnan 
 [[False  True  True False False False False False False False]
 [False False False False False False False False False False]
 [False False False False False False False False False False]
 [ True False False False False False False False False False]
 [False  True False False False False False False False False]]
np.where 
 (array([0, 0, 3, 4], dtype=int64), array([1, 2, 0, 1], dtype=int64))
original A 
 [[0.5801    nan    nan 0.7655 0.4668 0.0767 0.5195 0.5405 0.407  0.2675]
 [0.4175 0.0645 0.3869 0.6315 0.0686 0.2524 0.2088 0.7066 0.0272 0.4488]
 [0.8253 0.5084 0.5157 0.9629 0.4903 0.6371 0.6657 