numpy与list运算效率比较

In [7]:
import random
import time
import numpy as np

a = []
for i in range(100000):
    a.append(random.random())

# %time 是 IPython 环境（如 Jupyter Notebook、JupyterLab 或 IPython 终端）中的魔术命令，用于测量代码执行时间。
%time sum1 = sum(a)

b = np.array(a)
%time sum2 = np.sum(b)


CPU times: user 513 μs, sys: 120 μs, total: 633 μs
Wall time: 623 μs
CPU times: user 171 μs, sys: 0 ns, total: 171 μs
Wall time: 173 μs


In [8]:
import numpy as np
np.ones(10)

array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])

In [9]:
import numpy as np
np.ones([2,3])

array([[1., 1., 1.],
       [1., 1., 1.]])

In [13]:
np.zeros_like(np.ones([2,3]))

array([[0., 0., 0.],
       [0., 0., 0.]])

In [14]:
arr = np.arange(12).reshape(3, 4)  # 3行4列数组：[[0,1,2,3],[4,5,6,7],[8,9,10,11]]

# 索引
print(arr[1, 2])  # 第2行第3列：6

# 切片（取第1-2行，第2-3列）
print(arr[0:2, 1:3])  # [[1,2], [5,6]]

# 省略号（...）表示剩余维度全取
print(arr[..., 0])  # 取所有行的第1列：[0,4,8]

6
[[1 2]
 [5 6]]
[0 4 8]


In [15]:
arr = np.arange(12).reshape(3, 4)
arr1 = np.array(arr)
arr2 = np.asarray(arr)
print(arr1 is arr)  # False，arr1是新数组
print(arr2 is arr)  # False，arr2是新数组
arr3 = np.array(arr, copy=False)
print(arr3 is arr)  # True，arr3是视图，没有复制数据
arr4  = np.array(arr, copy=True)
print(arr4 is arr)  # False，arr4是新数组
arr1[0,0] = 100
print(arr)  # arr未变，arr1是新数组（深拷贝）
arr3[0,0] = 200
print(arr)  # arr变了，arr3是视图（浅拷贝）
arr2[0,0] = 300
print(arr)  # arr变了，arr2是视图(浅拷贝)

False
True
True
False
[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]
[[200   1   2   3]
 [  4   5   6   7]
 [  8   9  10  11]]
[[300   1   2   3]
 [  4   5   6   7]
 [  8   9  10  11]]


In [16]:
np.linspace(1,10,10)  # 1到10之间均匀取10个数

array([ 1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10.])

In [17]:
np.logspace(1,10,10)  # 10的1次方到10的10次方之间均匀取10个数

array([1.e+01, 1.e+02, 1.e+03, 1.e+04, 1.e+05, 1.e+06, 1.e+07, 1.e+08,
       1.e+09, 1.e+10])

In [18]:
import numpy as np
# 生成均值为5，标准差为2的4个正态分布随机数
normal_arr = np.random.normal(loc=5, scale=2, size=4)
print(normal_arr)

[2.6797792  7.98416271 7.0771127  8.32607526]


In [19]:
import numpy as np
stock_change = np.random.normal(0, 1, (1000, 4))  # 1000天，4只股票的日收益率
print(stock_change)

stock_change.shape


[[-0.68186349  0.96285016 -0.97716143  0.27482748]
 [-0.4105086  -0.5018176   1.00377308 -0.27671748]
 [-0.45163915 -0.62258613 -1.10279151 -0.33362462]
 ...
 [ 0.17807797  0.04992982  1.14168834  0.37146182]
 [-0.72488774 -0.07312264 -0.27670554  0.28275177]
 [-1.68246556  1.40629388  0.12986788  0.59357479]]


(1000, 4)

In [20]:
# 随机生成正态分布数据，10天，5只股票
import numpy as np
stock_change = np.random.normal(0,1,(10,5))
print(stock_change.shape)
stock_change

(10, 5)


array([[-0.33051159, -2.46751076, -1.18696929, -1.13115961, -0.9230149 ],
       [-0.59990997,  0.55610692,  0.59209611,  0.15967857, -0.16485961],
       [-0.0058781 ,  0.61064277, -0.65578406,  1.12249027,  2.33574126],
       [ 0.63316302, -1.32979489,  1.25886026, -1.93172988,  1.79518945],
       [ 0.11252806, -2.5921023 , -1.27545539, -0.61501819,  1.57399862],
       [-0.68245444, -1.58440172,  1.07022404, -0.8874486 ,  0.83596035],
       [-1.12985808,  0.32002793,  1.82387478,  1.86366966,  0.26769768],
       [-0.84470199, -0.12623352, -0.08254795, -1.68601415, -0.75908092],
       [-0.21116471,  0.74931308,  0.25386686, -1.09531586,  1.28457245],
       [ 0.23163421,  0.58341525,  0.10899315, -0.07465003, -1.73604939]])

In [21]:
# pandas
import pandas as pd
pd.DataFrame(stock_change)

Unnamed: 0,0,1,2,3,4
0,-0.330512,-2.467511,-1.186969,-1.13116,-0.923015
1,-0.59991,0.556107,0.592096,0.159679,-0.16486
2,-0.005878,0.610643,-0.655784,1.12249,2.335741
3,0.633163,-1.329795,1.25886,-1.93173,1.795189
4,0.112528,-2.592102,-1.275455,-0.615018,1.573999
5,-0.682454,-1.584402,1.070224,-0.887449,0.83596
6,-1.129858,0.320028,1.823875,1.86367,0.267698
7,-0.844702,-0.126234,-0.082548,-1.686014,-0.759081
8,-0.211165,0.749313,0.253867,-1.095316,1.284572
9,0.231634,0.583415,0.108993,-0.07465,-1.736049


In [22]:
# 添加行列标签
stock_code = [f'股票{i+1}' for i in range(stock_change.shape[0])]
print(stock_code)

pd.DataFrame(stock_change,index=stock_code)

['股票1', '股票2', '股票3', '股票4', '股票5', '股票6', '股票7', '股票8', '股票9', '股票10']


Unnamed: 0,0,1,2,3,4
股票1,-0.330512,-2.467511,-1.186969,-1.13116,-0.923015
股票2,-0.59991,0.556107,0.592096,0.159679,-0.16486
股票3,-0.005878,0.610643,-0.655784,1.12249,2.335741
股票4,0.633163,-1.329795,1.25886,-1.93173,1.795189
股票5,0.112528,-2.592102,-1.275455,-0.615018,1.573999
股票6,-0.682454,-1.584402,1.070224,-0.887449,0.83596
股票7,-1.129858,0.320028,1.823875,1.86367,0.267698
股票8,-0.844702,-0.126234,-0.082548,-1.686014,-0.759081
股票9,-0.211165,0.749313,0.253867,-1.095316,1.284572
股票10,0.231634,0.583415,0.108993,-0.07465,-1.736049


In [23]:
# 设置列标签，采用pd.date_range()自动生成跳过周末的时间
date = pd.date_range(start='20190403',periods=stock_change.shape[1],freq='B')
date

DatetimeIndex(['2019-04-03', '2019-04-04', '2019-04-05', '2019-04-08',
               '2019-04-09'],
              dtype='datetime64[ns]', freq='B')

In [24]:
stock_c = pd.DataFrame(stock_change,index=stock_code, columns=date)

In [25]:
stock_c.head()   # 查看前五行

Unnamed: 0,2019-04-03,2019-04-04,2019-04-05,2019-04-08,2019-04-09
股票1,-0.330512,-2.467511,-1.186969,-1.13116,-0.923015
股票2,-0.59991,0.556107,0.592096,0.159679,-0.16486
股票3,-0.005878,0.610643,-0.655784,1.12249,2.335741
股票4,0.633163,-1.329795,1.25886,-1.93173,1.795189
股票5,0.112528,-2.592102,-1.275455,-0.615018,1.573999


In [26]:
stock_c.tail()   # 查看后五行

Unnamed: 0,2019-04-03,2019-04-04,2019-04-05,2019-04-08,2019-04-09
股票6,-0.682454,-1.584402,1.070224,-0.887449,0.83596
股票7,-1.129858,0.320028,1.823875,1.86367,0.267698
股票8,-0.844702,-0.126234,-0.082548,-1.686014,-0.759081
股票9,-0.211165,0.749313,0.253867,-1.095316,1.284572
股票10,0.231634,0.583415,0.108993,-0.07465,-1.736049


In [27]:
# 不可以先行后列
# stock_c['股票6']['2019-04-03']
stock_c['2019-04-03']['股票6']

np.float64(-0.6824544435034859)

In [29]:
stock_c.describe()

Unnamed: 0,2019-04-03,2019-04-04,2019-04-05,2019-04-08,2019-04-09
count,10.0,10.0,10.0,10.0,10.0
mean,-0.282715,-0.528054,0.190716,-0.42755,0.451015
std,0.541349,1.333342,1.032384,1.209022,1.333814
min,-1.129858,-2.592102,-1.275455,-1.93173,-1.736049
25%,-0.661818,-1.52075,-0.512475,-1.122199,-0.610526
50%,-0.270838,0.096897,0.18143,-0.751233,0.551829
75%,0.082927,0.576588,0.950692,0.101096,1.501642
max,0.633163,0.749313,1.823875,1.86367,2.335741


In [37]:
stock_c[['2019-04-03']].apply(lambda x: x.max()-x.min())

2019-04-03    1.763021
Freq: B, dtype: float64

In [41]:
stock_c[['2019-04-03']].apply(lambda x: (x > 0) & (x < 0.2))

Unnamed: 0,2019-04-03
股票1,False
股票2,False
股票3,False
股票4,False
股票5,True
股票6,False
股票7,False
股票8,False
股票9,False
股票10,False


In [45]:
# 文件保存
stock_c.to_csv("output_index.csv", index=True)