numpy与list运算效率比较

In [None]:
import random
import time
import numpy as np

a = []
for i in range(100000000):
    a.append(random.random())

# %time 是 IPython 环境（如 Jupyter Notebook、JupyterLab 或 IPython 终端）中的魔术命令，用于测量代码执行时间。
%time sum1 = sum(a)

b = np.array(a)
%time sum2 = np.sum(b)


In [2]:
import numpy as np
np.ones(10)

array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])

In [3]:
import numpy as np
np.ones([2,3])

array([[1., 1., 1.],
       [1., 1., 1.]])

In [4]:
np.zeros_like(np.array([[1,2,3],[4,5,6]]))

array([[0, 0, 0],
       [0, 0, 0]])

In [5]:
arr = np.arange(12).reshape(3, 4)  # 3行4列数组：[[0,1,2,3],[4,5,6,7],[8,9,10,11]]

# 索引
print(arr[1, 2])  # 第2行第3列：6

# 切片（取第1-2行，第2-3列）
print(arr[0:2, 1:3])  # [[1,2], [5,6]]

# 省略号（...）表示剩余维度全取
print(arr[..., 0])  # 取所有行的第1列：[0,4,8]

6
[[1 2]
 [5 6]]
[0 4 8]


In [6]:
arr = np.arange(12).reshape(3, 4)
arr1 = np.array(arr)
arr2 = np.asarray(arr)
print(arr1 is arr)  # False，arr1是新数组
print(arr2 is arr)  # False，arr2是新数组
arr3 = np.array(arr, copy=False)
print(arr3 is arr)  # True，arr3是视图，没有复制数据
arr4  = np.array(arr, copy=True)
print(arr4 is arr)  # False，arr4是新数组
arr1[0,0] = 100
print(arr)  # arr未变，arr1是新数组（深拷贝）
arr3[0,0] = 200
print(arr)  # arr变了，arr3是视图（浅拷贝）
arr2[0,0] = 300
print(arr)  # arr变了，arr2是视图(浅拷贝)

False
True
True
False
[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]
[[200   1   2   3]
 [  4   5   6   7]
 [  8   9  10  11]]
[[300   1   2   3]
 [  4   5   6   7]
 [  8   9  10  11]]


In [7]:
np.linspace(1,10,10)  # 1到10之间均匀取10个数

array([ 1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10.])

In [8]:
np.logspace(1,10,10)  # 10的1次方到10的10次方之间均匀取10个数

array([1.e+01, 1.e+02, 1.e+03, 1.e+04, 1.e+05, 1.e+06, 1.e+07, 1.e+08,
       1.e+09, 1.e+10])

In [9]:
import numpy as np
# 生成均值为5，标准差为2的4个正态分布随机数
normal_arr = np.random.normal(loc=5, scale=2, size=4)
print(normal_arr)

[5.34355335 4.57936675 3.10478078 3.59145075]


In [10]:
import numpy as np
stock_change = np.random.normal(0, 1, (1000, 4))  # 1000天，4只股票的日收益率
print(stock_change)

stock_change.shape


[[-0.63350751  0.91204643  1.27833243 -0.80431442]
 [ 0.13567564 -0.30295933  0.92114988 -0.2019134 ]
 [-1.2825885   0.20066806  0.06189313 -1.11798961]
 ...
 [ 1.7030109   0.63406156  0.67981411  0.33277854]
 [-1.01427916 -0.96020508 -0.04346708 -1.29565318]
 [-1.30243049 -0.3337324  -0.41125527 -1.7373276 ]]


(1000, 4)

In [11]:
# 随机生成正态分布数据，10天，5只股票
import numpy as np
stock_change = np.random.normal(0,1,(10,5))
print(stock_change.shape)
stock_change

(10, 5)


array([[ 0.04536047, -0.87932512, -0.09725504, -0.3075453 , -1.02723929],
       [-0.2175281 , -1.96118885, -1.05759124,  1.35287499,  2.31949323],
       [ 0.6278956 ,  0.3899473 ,  0.27175606,  0.8715988 , -0.88641377],
       [ 0.71499227,  1.00830608, -1.33074912, -1.36366635, -2.22911781],
       [ 0.58421362, -1.18933533,  0.8336049 , -0.99810914,  1.56959525],
       [ 0.86107819, -0.2601732 , -0.27175824,  0.82039003, -1.46000756],
       [-1.51579835,  1.1585687 ,  1.09383734, -2.26001689, -1.6001209 ],
       [ 0.19525833,  1.10020344,  0.04566768, -0.07027456, -1.68459137],
       [ 1.21164901, -1.09778492,  0.65076268, -0.02402788,  2.13768039],
       [ 0.4719082 ,  0.97167663, -0.15022226, -0.30679351, -0.40171845]])

In [13]:
# pandas
import pandas as pd
pd.DataFrame(stock_change)

Unnamed: 0,0,1,2,3,4
0,0.04536,-0.879325,-0.097255,-0.307545,-1.027239
1,-0.217528,-1.961189,-1.057591,1.352875,2.319493
2,0.627896,0.389947,0.271756,0.871599,-0.886414
3,0.714992,1.008306,-1.330749,-1.363666,-2.229118
4,0.584214,-1.189335,0.833605,-0.998109,1.569595
5,0.861078,-0.260173,-0.271758,0.82039,-1.460008
6,-1.515798,1.158569,1.093837,-2.260017,-1.600121
7,0.195258,1.100203,0.045668,-0.070275,-1.684591
8,1.211649,-1.097785,0.650763,-0.024028,2.13768
9,0.471908,0.971677,-0.150222,-0.306794,-0.401718


In [14]:
# 添加行列标签
stock_code = [f'股票{i+1}' for i in range(stock_change.shape[0])]
print(stock_code)

pd.DataFrame(stock_change,index=stock_code)

['股票1', '股票2', '股票3', '股票4', '股票5', '股票6', '股票7', '股票8', '股票9', '股票10']


Unnamed: 0,0,1,2,3,4
股票1,0.04536,-0.879325,-0.097255,-0.307545,-1.027239
股票2,-0.217528,-1.961189,-1.057591,1.352875,2.319493
股票3,0.627896,0.389947,0.271756,0.871599,-0.886414
股票4,0.714992,1.008306,-1.330749,-1.363666,-2.229118
股票5,0.584214,-1.189335,0.833605,-0.998109,1.569595
股票6,0.861078,-0.260173,-0.271758,0.82039,-1.460008
股票7,-1.515798,1.158569,1.093837,-2.260017,-1.600121
股票8,0.195258,1.100203,0.045668,-0.070275,-1.684591
股票9,1.211649,-1.097785,0.650763,-0.024028,2.13768
股票10,0.471908,0.971677,-0.150222,-0.306794,-0.401718


In [15]:
# 设置列标签，采用pd.date_range()自动生成跳过周末的时间
date = pd.date_range(start='20190403',periods=stock_change.shape[1],freq='B')
date

DatetimeIndex(['2019-04-03', '2019-04-04', '2019-04-05', '2019-04-08',
               '2019-04-09'],
              dtype='datetime64[ns]', freq='B')

In [17]:
stock_c = pd.DataFrame(stock_change,index=stock_code, columns=date)

In [19]:
stock_c.head()   # 查看前五行

Unnamed: 0,2019-04-03,2019-04-04,2019-04-05,2019-04-08,2019-04-09
股票1,0.04536,-0.879325,-0.097255,-0.307545,-1.027239
股票2,-0.217528,-1.961189,-1.057591,1.352875,2.319493
股票3,0.627896,0.389947,0.271756,0.871599,-0.886414
股票4,0.714992,1.008306,-1.330749,-1.363666,-2.229118
股票5,0.584214,-1.189335,0.833605,-0.998109,1.569595


In [22]:
stock_c.tail()   # 查看后五行

Unnamed: 0,2019-04-03,2019-04-04,2019-04-05,2019-04-08,2019-04-09
股票6,0.861078,-0.260173,-0.271758,0.82039,-1.460008
股票7,-1.515798,1.158569,1.093837,-2.260017,-1.600121
股票8,0.195258,1.100203,0.045668,-0.070275,-1.684591
股票9,1.211649,-1.097785,0.650763,-0.024028,2.13768
股票10,0.471908,0.971677,-0.150222,-0.306794,-0.401718


In [25]:
# 不可以先行后列
# stock_c['股票6']['2019-04-03']
stock_c['2019-04-03']['股票6']

np.float64(0.8610781860717629)

In [26]:
stock_c.describe()

Unnamed: 0,2019-04-03,2019-04-04,2019-04-05,2019-04-08,2019-04-09
count,10.0,10.0,10.0,10.0,10.0
mean,0.297903,-0.075911,-0.001195,-0.228557,-0.326244
std,0.758412,1.150595,0.773596,1.10042,1.694768
min,-1.515798,-1.961189,-1.330749,-2.260017,-2.229118
25%,0.082835,-1.04317,-0.241374,-0.825468,-1.565093
50%,0.528061,0.064887,-0.025794,-0.188534,-0.956827
75%,0.693218,0.999149,0.556011,0.609286,1.076767
max,1.211649,1.158569,1.093837,1.352875,2.319493
