In [117]:
import numpy as np

## 一、生成数组

### 1.1 特殊数组zeros，ones

In [118]:
zeros = np.zeros([2, 3])
print(zeros, zeros.dtype)

[[0. 0. 0.]
 [0. 0. 0.]] float64


In [119]:
np.ones([2, 3])

array([[1., 1., 1.],
       [1., 1., 1.]])

### 1.2 从现有数组生成

In [120]:
np.array(zeros, dtype=np.int64)

array([[0, 0, 0],
       [0, 0, 0]], dtype=int64)

### 1.3 生成固定范围数组

In [121]:
# [0,100]生成样例数量11的等间距数组
np.linspace(0, 100, 11)

array([  0.,  10.,  20.,  30.,  40.,  50.,  60.,  70.,  80.,  90., 100.])

In [122]:
# [0,100)生成步长为10的等间距数组
np.arange(0, 100, 10)

array([ 0, 10, 20, 30, 40, 50, 60, 70, 80, 90])

In [123]:
# 生成从10的a次方到10的b次方之间按对数等分的n个元素的行向量
np.logspace(1, 2, 10)

array([ 10.        ,  12.91549665,  16.68100537,  21.5443469 ,
        27.82559402,  35.93813664,  46.41588834,  59.94842503,
        77.42636827, 100.        ])

### 1.4 生成随机数组

In [124]:
# 均匀分布数组
np.random.rand(2, 3, 4)

array([[[0.08052241, 0.02009055, 0.85071628, 0.84482042],
        [0.01079023, 0.94052156, 0.45566119, 0.7560331 ],
        [0.94862898, 0.16520921, 0.54569901, 0.72253383]],

       [[0.19835942, 0.86603597, 0.44017238, 0.9414448 ],
        [0.22920832, 0.33260915, 0.00469896, 0.47900346],
        [0.5410741 , 0.25824274, 0.6493158 , 0.85803708]]])

In [125]:
np.random.uniform(0, 100, 10)

array([53.20402061, 19.53977013, 64.88862532, 45.60208579, 41.68328674,
        6.91259004, 33.87250389, 60.30470611, 37.26257506, 38.33334222])

In [126]:
np.random.randint(10, 100, 2)

array([18, 57])

In [127]:
# 正态分布数组
np.random.normal(0, 1, 10)

array([-0.90490762,  0.11248097,  0.0226503 ,  0.40466903, -1.38314331,
        0.37593409, -0.47440301, -0.2806921 , -1.00344565,  0.43578118])

In [128]:
# [0,1)随机数
rdm = np.random.RandomState(seed=1)

In [129]:
rdm.rand()

0.417022004702574

In [130]:
rdm.rand(2,3)

array([[7.20324493e-01, 1.14374817e-04, 3.02332573e-01],
       [1.46755891e-01, 9.23385948e-02, 1.86260211e-01]])

### 1.5 生成网格数组

In [131]:
x, y = np.mgrid[1:3:1, 2:4:0.5]
print(x)
print(y)

[[1. 1. 1. 1.]
 [2. 2. 2. 2.]]
[[2.  2.5 3.  3.5]
 [2.  2.5 3.  3.5]]


In [132]:
# c_配对，vavel降维
np.c_[x.ravel(), y.ravel()]

array([[1. , 2. ],
       [1. , 2.5],
       [1. , 3. ],
       [1. , 3.5],
       [2. , 2. ],
       [2. , 2.5],
       [2. , 3. ],
       [2. , 3.5]])

## 二、数组变换

### 2.1 形状变换

In [133]:
arr = np.random.normal(0, 1, (4, 5))
# -1指行数自行计算， reshape不改变原数组
arr.reshape([-1, 4])

array([[ 0.28578889, -0.43104108, -0.38223576,  0.0071116 ],
       [ 2.23689499,  0.8348834 ,  0.68272799, -0.3778029 ],
       [-0.60641203, -0.86903814,  0.29040874,  1.26916403],
       [-0.59789322,  0.01515112, -0.14824806,  0.35687042],
       [-0.84608462, -0.87653932,  1.34843016, -1.46461537]])

In [134]:
# resize改变原数组
arr.resize(4, 5)
arr

array([[ 0.28578889, -0.43104108, -0.38223576,  0.0071116 ,  2.23689499],
       [ 0.8348834 ,  0.68272799, -0.3778029 , -0.60641203, -0.86903814],
       [ 0.29040874,  1.26916403, -0.59789322,  0.01515112, -0.14824806],
       [ 0.35687042, -0.84608462, -0.87653932,  1.34843016, -1.46461537]])

In [135]:
arr.T

array([[ 0.28578889,  0.8348834 ,  0.29040874,  0.35687042],
       [-0.43104108,  0.68272799,  1.26916403, -0.84608462],
       [-0.38223576, -0.3778029 , -0.59789322, -0.87653932],
       [ 0.0071116 , -0.60641203,  0.01515112,  1.34843016],
       [ 2.23689499, -0.86903814, -0.14824806, -1.46461537]])

### 2.2 数据类型转换

In [136]:
arr=np.array([[[1, 2, 3], [4, 5, 6]], [[12, 3, 34], [5, 6, 7]]])
arr.astype(np.float64)

array([[[ 1.,  2.,  3.],
        [ 4.,  5.,  6.]],

       [[12.,  3., 34.],
        [ 5.,  6.,  7.]]])

In [137]:
# 转换为bytes
arr.tostring()

b'\x01\x00\x00\x00\x02\x00\x00\x00\x03\x00\x00\x00\x04\x00\x00\x00\x05\x00\x00\x00\x06\x00\x00\x00\x0c\x00\x00\x00\x03\x00\x00\x00"\x00\x00\x00\x05\x00\x00\x00\x06\x00\x00\x00\x07\x00\x00\x00'

### 2.3 数组去重

In [138]:
arr=np.array([[1, 2, 3, 4],[3, 4, 5, 6]])
np.unique(arr)

array([1, 2, 3, 4, 5, 6])

## 三、数学运算

### 3.1 逻辑运算

In [139]:
stock_change = np.random.normal(0, 1, (8, 10))
stock_change = stock_change[0:5, 0:5]
stock_change

array([[ 3.39270649e-01,  1.54009397e+00,  1.26151018e+00,
        -1.29458786e-01, -1.17601766e+00],
       [-7.73742215e-01,  2.59260555e+00, -3.14999542e-01,
        -7.52894919e-04, -5.12227638e-01],
       [-4.14478967e-01,  2.75267720e-01, -5.59002112e-02,
        -3.41206729e-01, -4.39246179e-01],
       [ 1.61431533e+00, -1.04786586e+00, -6.17690922e-02,
        -7.19016683e-02,  1.84075354e-01],
       [ 1.85577040e+00, -9.28138519e-01,  4.15884815e-01,
        -4.92845431e-01,  1.10811129e-01]])

In [140]:
stock_change > 0.5

array([[False,  True,  True, False, False],
       [False,  True, False, False, False],
       [False, False, False, False, False],
       [ True, False, False, False, False],
       [ True, False, False, False, False]])

In [141]:
stock_change[stock_change > 0.5]

array([1.54009397, 1.26151018, 2.59260555, 1.61431533, 1.8557704 ])

In [142]:
stock_change[stock_change > 0.5] = 1
stock_change

array([[ 3.39270649e-01,  1.00000000e+00,  1.00000000e+00,
        -1.29458786e-01, -1.17601766e+00],
       [-7.73742215e-01,  1.00000000e+00, -3.14999542e-01,
        -7.52894919e-04, -5.12227638e-01],
       [-4.14478967e-01,  2.75267720e-01, -5.59002112e-02,
        -3.41206729e-01, -4.39246179e-01],
       [ 1.00000000e+00, -1.04786586e+00, -6.17690922e-02,
        -7.19016683e-02,  1.84075354e-01],
       [ 1.00000000e+00, -9.28138519e-01,  4.15884815e-01,
        -4.92845431e-01,  1.10811129e-01]])

In [143]:
np.all(stock_change[0:2, 0:5] > 0)

False

In [144]:
np.any(stock_change[0:5, :] > 0 )

True

In [145]:
# 三元运算
np.where(np.logical_and(stock_change > 0.5, stock_change < 2), "good", "bad")

array([['bad', 'good', 'good', 'bad', 'bad'],
       ['bad', 'good', 'bad', 'bad', 'bad'],
       ['bad', 'bad', 'bad', 'bad', 'bad'],
       ['good', 'bad', 'bad', 'bad', 'bad'],
       ['good', 'bad', 'bad', 'bad', 'bad']], dtype='<U4')

In [146]:
np.where(np.logical_or(stock_change > 0.5, stock_change < -0.5), 1, 0)

array([[0, 1, 1, 0, 1],
       [1, 1, 0, 0, 1],
       [0, 0, 0, 0, 0],
       [1, 1, 0, 0, 0],
       [1, 1, 0, 0, 0]])

### 3.2 统计运算

In [147]:
stock_change = np.random.normal(0, 1, (8, 10))
temp = stock_change[:4, :4]

In [148]:
temp

array([[-3.09643454,  0.28447727,  0.4966012 ,  0.18039866],
       [ 1.5022984 ,  0.53526588,  0.3060306 ,  0.68626739],
       [ 0.33835937, -0.62087479,  1.02503322,  0.16236985],
       [ 0.86367342, -0.35877443, -0.6808558 , -1.60928772]])

In [149]:
print("前四只股票前四天的最大涨幅{}".format(np.max(temp, axis=1)))
print("前四只股票前四天的最大跌幅{}".format(np.min(temp, axis=1)))
print("前四只股票前四天的波动程度{}".format(np.std(temp, axis=1)))
print("前四只股票前四天的平均涨跌幅{}".format(np.mean(temp, axis=1)))
print("前四只股票前四天内涨幅最大{}".format(np.argmax(temp, axis=1)))
print("前四天一天内涨幅最大的股票{}".format(np.argmax(temp, axis=0)))

前四只股票前四天的最大涨幅[0.4966012  1.5022984  1.02503322 0.86367342]
前四只股票前四天的最大跌幅[-3.09643454  0.3060306  -0.62087479 -1.60928772]
前四只股票前四天的波动程度[1.48395416 0.45083571 0.58573121 0.88476663]
前四只股票前四天的平均涨跌幅[-0.53373935  0.75746557  0.22622191 -0.44631113]
前四只股票前四天内涨幅最大[2 0 2 0]
前四天一天内涨幅最大的股票[1 1 2 1]


### 3.3 数组间运算

In [150]:
# 广播机制：shape为1的复制补齐直到能1对1参与计算
np.ones([1,3]) * 2 + np.ones([3, 1])

array([[3., 3., 3.],
       [3., 3., 3.],
       [3., 3., 3.]])

In [151]:
# 矩阵乘法运算
a = np.array([[1, 2, 3],
              [4, 5, 6]])
b = np.array([[1, 2], 
              [3, 4], 
              [5, 6]])

In [152]:
np.matmul(a, b)

array([[22, 28],
       [49, 64]])

In [153]:
np.dot(a,b)

array([[22, 28],
       [49, 64]])

In [154]:
# 合并
a = np.array([[1, 2], [3, 4]])
b = np.array([[5, 6]])

In [155]:
np.concatenate((a, b), axis=0)

array([[1, 2],
       [3, 4],
       [5, 6]])

In [156]:
np.vstack((a, b))

array([[1, 2],
       [3, 4],
       [5, 6]])

In [157]:
np.concatenate((a, b.T), axis=1)

array([[1, 2, 5],
       [3, 4, 6]])

In [158]:
np.hstack((a, b.T))

array([[1, 2, 5],
       [3, 4, 6]])

In [159]:
# 分割
x = np.arange(9.0)
a = np.array([[1, 2], [3, 4]])

In [160]:
np.split(x, 3)

[array([0., 1., 2.]), array([3., 4., 5.]), array([6., 7., 8.])]

In [161]:
np.split(a, 2)

[array([[1, 2]]), array([[3, 4]])]

In [162]:
np.split(x, [3, 5, 6, 10])

[array([0., 1., 2.]),
 array([3., 4.]),
 array([5.]),
 array([6., 7., 8.]),
 array([], dtype=float64)]