## 3.1并行化思想与基础操作

### 3.1.1并行化思想

In [1]:
# 官方推荐引用numpy
import numpy as np

In [2]:
normal_list = range(10000)
%timeit [i**2 for i in normal_list]

5.3 ms ± 334 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [3]:
np_list = np.arange(10000)
%timeit np_list**2

8.97 µs ± 397 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [4]:
# 注意 *3的操作被运作在每一个元素上
np_list = np.ones(5) * 3
print(np_list)
# 普通列表把*3操作认为是整体性操作
normal_list = [1, 1, 1, 1, 1] * 3
print(normal_list, len(normal_list))

[3. 3. 3. 3. 3.]
[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] 15


### 3.1.2初始化操作

In [5]:
# 100个0
np.zeros(100)

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [6]:
# shape:3行2列 全是0
np.zeros((3, 2))

array([[0., 0.],
       [0., 0.],
       [0., 0.]])

In [7]:
# shape:3行2列 全是1
np.ones((3, 2))

array([[1., 1.],
       [1., 1.],
       [1., 1.]])

In [8]:
# shape: x=2, y=3, z=3 值随机
np.empty((2, 3, 3))

array([[[6.23042070e-307, 4.67296746e-307, 1.69121096e-306],
        [9.79101081e-307, 2.22522868e-306, 1.42418987e-306],
        [1.37961641e-306, 1.60220528e-306, 1.24611266e-306]],

       [[9.34598925e-307, 1.24612081e-306, 1.11260755e-306],
        [1.60220393e-306, 1.51320640e-306, 9.34609790e-307],
        [1.86921279e-306, 1.24610723e-306, 2.56765117e-312]]])

In [9]:
# 初始化序列与np_list一样的shape，值全为1
np.ones_like(np_list)

array([1., 1., 1., 1., 1.])

In [10]:
# 初始化序列与np_list一样的shape，值全为0
np.zeros_like(np_list)

array([0., 0., 0., 0., 0.])

In [11]:
# eye()得到对角线全为1的单位矩阵
np.eye(3)

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

In [12]:
# 可以将普通list作为参数，通过np.array来初始化np array
data = [[1, 2, 3, 4], [5, 6, 7, 8]]
arr_np = np.array(data)
arr_np

array([[1, 2, 3, 4],
       [5, 6, 7, 8]])

In [13]:
# linspace()等间隔生成序列，从0-1之间生成11个点（10等分）
np.linspace(0, 1, 11)

array([0. , 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1. ])

In [14]:
# 200只股票
stock_cnt = 200
# 504个交易日
view_days = 504
# 生成服从正态分布：均值期望=0，标准差=1的序列
stock_day_change = np.random.standard_normal((stock_cnt, view_days))
# 打印shape：（200， 504）200行504列
print(stock_day_change.shape)
# 打印出第一只股票，前5个交易日的涨跌幅情况
print(stock_day_change[0:1, :5])


(200, 504)
[[ 0.22353348  0.62078822  0.43637755 -0.57670196 -0.51569826]]


### 3.1.3索引选取和切片选择

In [15]:
# 0:2第一只、第二只股票， 0:5前5个交易日的涨跌幅数据
stock_day_change[0:2, 0:5]

array([[ 0.22353348,  0.62078822,  0.43637755, -0.57670196, -0.51569826],
       [ 0.62478544,  0.0755934 , -0.13657937, -0.69076956, -0.9256024 ]])

In [16]:
# -2:倒数第一只，第二只股票，-5最后5个交易日的涨跌幅数据
stock_day_change[-2:, -5:]

array([[-0.01455101,  0.67674347, -0.87916471,  0.15115891,  0.23403297],
       [-1.51872267,  1.43092102,  2.51425444,  0.51147235,  0.32968961]])

In [17]:
# tmp = a
tmp = stock_day_change[:2, :5].copy()
# a = b
stock_day_change[:2, :5] = stock_day_change[-2:, -5:]
# b = tmp
stock_day_change[-2:, -5:] = tmp
stock_day_change[:2, :5], stock_day_change[-2:, -5:]

(array([[-0.01455101,  0.67674347, -0.87916471,  0.15115891,  0.23403297],
        [-1.51872267,  1.43092102,  2.51425444,  0.51147235,  0.32968961]]),
 array([[ 0.22353348,  0.62078822,  0.43637755, -0.57670196, -0.51569826],
        [ 0.62478544,  0.0755934 , -0.13657937, -0.69076956, -0.9256024 ]]))

### 3.1.4数据转换与规整

In [18]:
print(stock_day_change[:2, :5])
stock_day_change[:2, :5].astype(int)

[[-0.01455101  0.67674347 -0.87916471  0.15115891  0.23403297]
 [-1.51872267  1.43092102  2.51425444  0.51147235  0.32968961]]


array([[ 0,  0,  0,  0,  0],
       [-1,  1,  2,  0,  0]])

In [19]:
# 规整float数据
np.around(stock_day_change[:2, :5], 2)

array([[-0.01,  0.68, -0.88,  0.15,  0.23],
       [-1.52,  1.43,  2.51,  0.51,  0.33]])

In [26]:
# 使用copy()函数的目的是不修改原始序列
tmp_test = stock_day_change[:2, :5].copy()
# 将第一个元素改成nan
tmp_test[0][0] = np.nan
tmp_test

array([[        nan,  0.67674347, -0.87916471,  0.15115891,  0.23403297],
       [-1.51872267,  1.43092102,  2.51425444,  0.51147235,  0.32968961]])

In [27]:
# 使用np.nan_to_num()函数来用0填充na
tmp_test = np.nan_to_num(tmp_test)
tmp_test

array([[ 0.        ,  0.67674347, -0.87916471,  0.15115891,  0.23403297],
       [-1.51872267,  1.43092102,  2.51425444,  0.51147235,  0.32968961]])

### 3.1.5逻辑条件进行数据筛选

In [30]:
# 输出结构是bool的数组
mask = stock_day_change[:2, :5] > 0.5
mask

array([[False,  True, False, False, False],
       [False,  True,  True,  True, False]])

In [31]:
tmp_test = stock_day_change[:2, :5].copy()
# 使用上述的mask数据筛选出符合条件的数组，即筛选mask中对应的index值为True的
tmp_test[mask]

array([0.67674347, 1.43092102, 2.51425444, 0.51147235])

In [32]:
tmp_test[tmp_test > 0.5] = 1
tmp_test

array([[-0.01455101,  1.        , -0.87916471,  0.15115891,  0.23403297],
       [-1.51872267,  1.        ,  1.        ,  1.        ,  0.32968961]])

In [37]:
# 多重选择
tmp_test = stock_day_change[-2:, -5:]
print(tmp_test)
tmp_test[(tmp_test > 0.5) | (tmp_test < -0.5)]

[[ 0.22353348  0.62078822  0.43637755 -0.57670196 -0.51569826]
 [ 0.62478544  0.0755934  -0.13657937 -0.69076956 -0.9256024 ]]


array([ 0.62078822, -0.57670196, -0.51569826,  0.62478544, -0.69076956,
       -0.9256024 ])

### 3.1.6通用序列函数

#### 1 np.all()函数

In [38]:
# np.all判断序列中的所有元素是否全部是True，即对bool序列进行操作
# 本例实际判断stock_day_change[:2, :5]中是否全是上涨的
np.all(stock_day_change[:2, :5] > 0)

False

#### 2 np.any()函数

In [39]:
# np.any判断序列中是否有元素为true，即对bool序列进行或操作
# 本例实际判断stock_day_change[:2, :5]中是至少有一个是上涨的
np.any(stock_day_change[:2, :5] > 0)

True

#### 3 maximum()与minimum()函数

In [40]:
# 对两个序列对应的元素两两比较，maximum()结果集取大，minimum()为取小的结果集
np.maximum(stock_day_change[:2, :5], stock_day_change[-2:, -5:])

array([[0.22353348, 0.67674347, 0.43637755, 0.15115891, 0.23403297],
       [0.62478544, 1.43092102, 2.51425444, 0.51147235, 0.32968961]])

In [41]:
# 对两个序列对应的元素两两比较，maximum()结果集取大，minimum()为取小的结果集
np.minimum(stock_day_change[:2, :5], stock_day_change[-2:, -5:])

array([[-0.01455101,  0.62078822, -0.87916471, -0.57670196, -0.51569826],
       [-1.51872267,  0.0755934 , -0.13657937, -0.69076956, -0.9256024 ]])

#### 4 np.unique()函数

In [42]:
change_int = stock_day_change[:2, :5].astype(int)
print(change_int)
# 序列中数值值唯一且不重复的值组成新的序列
np.unique(change_int)

[[ 0  0  0  0  0]
 [-1  1  2  0  0]]


array([-1,  0,  1,  2])

#### 5 np.diff()函数

In [44]:
# axis = 1代表行操作，diff()后面减前面
print(stock_day_change[:2, :5])
np.diff(stock_day_change[:2, :5])

[[-0.01455101  0.67674347 -0.87916471  0.15115891  0.23403297]
 [-1.51872267  1.43092102  2.51425444  0.51147235  0.32968961]]


array([[ 0.69129448, -1.55590818,  1.03032362,  0.08287406],
       [ 2.94964368,  1.08333343, -2.00278209, -0.18178275]])

In [45]:
# axis = 0代表列操作，diff()后面减前面
print(stock_day_change[:2, :5])
np.diff(stock_day_change[:2, :5], axis=0)

[[-0.01455101  0.67674347 -0.87916471  0.15115891  0.23403297]
 [-1.51872267  1.43092102  2.51425444  0.51147235  0.32968961]]


array([[-1.50417165,  0.75417755,  3.39341916,  0.36031344,  0.09565663]])

#### 6 np.where()函数

In [47]:
tmp_test = stock_day_change[-2:, -5:]
print(tmp_test)
# 涨幅大于0.5的标识为1，其他的都为0
print(np.where(tmp_test > 0.5, 1, 0))

[[ 0.22353348  0.62078822  0.43637755 -0.57670196 -0.51569826]
 [ 0.62478544  0.0755934  -0.13657937 -0.69076956 -0.9256024 ]]
[[0 1 0 0 0]
 [1 0 0 0 0]]


In [48]:
tmp_test = stock_day_change[-2:, -5:]
print(tmp_test)
# 涨幅大于0.5的标识为1，其他的都保持不变
print(np.where(tmp_test > 0.5, 1, tmp_test))

[[ 0.22353348  0.62078822  0.43637755 -0.57670196 -0.51569826]
 [ 0.62478544  0.0755934  -0.13657937 -0.69076956 -0.9256024 ]]
[[ 0.22353348  1.          0.43637755 -0.57670196 -0.51569826]
 [ 1.          0.0755934  -0.13657937 -0.69076956 -0.9256024 ]]


In [49]:
# 如果逻辑表达式为复合逻辑条件，则使用np.logical_and()和np.logical_or
# 将序列中的值大于0.5并且小于1的赋值为1，否则赋值为0
np.where(np.logical_and(tmp_test > 0.5, tmp_test < 1), 1, 0)

array([[0, 1, 0, 0, 0],
       [1, 0, 0, 0, 0]])

In [50]:
# 序列中的值大于0.5或者小于-0.5的赋值为1，否则赋值为0
np.where(np.logical_or(tmp_test > 0.5, tmp_test < -0.5), 1, 0)

array([[0, 1, 0, 1, 1],
       [1, 0, 0, 1, 1]])

### 3.1.7数据本地序列化操作

In [51]:
np.save('stock_day_change', stock_day_change)

In [56]:
stock_day_change = np.load('stock_day_change.npy')
stock_day_change

array([[-0.01455101,  0.67674347, -0.87916471, ...,  0.14683054,
        -0.99878825,  0.40920057],
       [-1.51872267,  1.43092102,  2.51425444, ...,  0.08747479,
        -0.47514721,  0.61993226],
       [-1.42679913,  0.30253676, -1.62453837, ..., -1.82044916,
         0.34573823,  0.92095842],
       ...,
       [ 1.1677692 ,  0.01068707, -0.06158697, ...,  1.67657967,
        -0.79150391,  1.94680936],
       [-0.28358916,  1.45284617, -0.09683089, ...,  0.43637755,
        -0.57670196, -0.51569826],
       [-0.81589322,  0.98807332, -2.11475845, ..., -0.13657937,
        -0.69076956, -0.9256024 ]])