## numpy的运行效率巨高

In [3]:
normal_list = range(10000)
%timeit [i**2 for i in normal_list]

2.66 ms ± 58.1 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [4]:
import numpy as np
numpy_list = np.arange(10000)
%timeit numpy_list**2

6.72 µs ± 202 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


## numpy的操作是针对各个元素的操作，普通list是整个列表

In [5]:
numpy_list = np.ones(5)*3
print(numpy_list)
normal_list = [1,1,1,1,1]*3
print (normal_list)

[3. 3. 3. 3. 3.]
[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]


## 生成Numpy矩阵

In [6]:
stock_count = 200
view_days = 504
stock_day_change = np.random.standard_normal((stock_count, view_days))
print(stock_day_change.shape)
print(stock_day_change[0:1,:10])

(200, 504)
[[ 1.912149   -1.52042359  0.65424242  0.30663493 -0.45930093  0.31043448
   1.59412837 -0.74795386 -0.91766078  1.20069572]]


# np.around(nplist)
显示小数点后两位

In [7]:
np.around(stock_day_change[0:2,:5], 2)

array([[ 1.91, -1.52,  0.65,  0.31, -0.46],
       [ 1.32,  1.06, -0.48, -1.05,  0.18]])

In [8]:
temp_list = stock_day_change[0:2,:5].copy()
temp_list[0][0] = np.nan
temp_list[0,1] = np.nan
print(temp_list)

[[        nan         nan  0.65424242  0.30663493 -0.45930093]
 [ 1.32365634  1.06084251 -0.48198367 -1.05260576  0.175686  ]]


# 用nan_to_num处理nan缺省值

In [9]:
temp_list = np.nan_to_num(temp_list)
print(temp_list)

[[ 0.          0.          0.65424242  0.30663493 -0.45930093]
 [ 1.32365634  1.06084251 -0.48198367 -1.05260576  0.175686  ]]


# 按照逻辑条件进行数据筛选

In [10]:
mask = stock_day_change[0:3,:5]>=0.5
print(mask)

[[ True False  True False False]
 [ True  True False False False]
 [False  True False  True False]]


In [11]:
temp_list = stock_day_change[0:3,:5].copy()
print(temp_list)
temp_list[mask]

[[ 1.912149   -1.52042359  0.65424242  0.30663493 -0.45930093]
 [ 1.32365634  1.06084251 -0.48198367 -1.05260576  0.175686  ]
 [-0.66121932  0.72434783 -1.88774381  1.13566786  0.22625727]]


array([1.912149  , 0.65424242, 1.32365634, 1.06084251, 0.72434783,
       1.13566786])

In [12]:
temp_list[temp_list>0.5] = 1
print(temp_list)

[[ 1.         -1.52042359  1.          0.30663493 -0.45930093]
 [ 1.          1.         -0.48198367 -1.05260576  0.175686  ]
 [-0.66121932  1.         -1.88774381  1.          0.22625727]]


# np.all()
判断序列中的所有元素是否为true

In [13]:
np.all(stock_day_change[0:2,:5]>0)

False

# np.any()
判断序列中的所有元素是否有true

In [14]:
np.any(stock_day_change[:2,:5]>0)

True

# np.maxmun() 和 np.minimun()
对两个元素对应对序列做比较，取最大（小）

In [15]:
print(stock_day_change[:2,:5])
print(stock_day_change[-2:,-5:])
np.maximum(stock_day_change[:2,:5],stock_day_change[-2:,-5:])

[[ 1.912149   -1.52042359  0.65424242  0.30663493 -0.45930093]
 [ 1.32365634  1.06084251 -0.48198367 -1.05260576  0.175686  ]]
[[-1.19666818 -0.94863388  1.55984786 -0.87562957 -0.58636868]
 [ 0.92410725 -0.33076116 -2.73541386 -0.97049025  0.40270536]]


array([[ 1.912149  , -0.94863388,  1.55984786,  0.30663493, -0.45930093],
       [ 1.32365634,  1.06084251, -0.48198367, -0.97049025,  0.40270536]])

In [16]:
np.minimum(stock_day_change[:2,:5],stock_day_change[-2:,-5:])

array([[-1.19666818, -1.52042359,  0.65424242, -0.87562957, -0.58636868],
       [ 0.92410725, -0.33076116, -2.73541386, -1.05260576,  0.175686  ]])

# np.unique(nplist1)
序列中数值唯一且不重复大值组成新的序列

In [17]:
change_int = stock_day_change[:2,:5].astype(int)
print(change_int)
np.unique(change_int)

[[ 1 -1  0  0  0]
 [ 1  1  0 -1  0]]


array([-1,  0,  1])

## np.diff(nplist1, nplist2)
将前后两个数值进行减法运算，默认axis=1，按照x轴方向

In [18]:
np.diff(stock_day_change[:2,:5])

array([[-3.43257259,  2.17466601, -0.34760749, -0.76593586],
       [-0.26281383, -1.54282618, -0.57062209,  1.22829175]])

In [19]:
np.diff(stock_day_change[:2,:5], axis=0)

array([[-0.58849266,  2.5812661 , -1.13622609, -1.35924068,  0.63498692]])

## np.where(表达式, 值1， 值2)
如果表达式成立，赋值值1，否则赋值值2

In [20]:
temp_list = stock_day_change[:2,:5]

In [24]:
np.where(temp_list>.5,1,0)

array([[1, 0, 1, 0, 0],
       [1, 1, 0, 0, 0]])

In [25]:
np.where(np.logical_and(temp_list>.5,temp_list<1),1,0)

array([[0, 0, 1, 0, 0],
       [0, 0, 0, 0, 0]])

## 基础统计函数

In [36]:
stock_day_change_four = stock_day_change[:4,:4]
t = np.max(stock_day_change_four, axis=1)
print(t)
t = np.argmax(stock_day_change_four, axis=1)
print(t)

[1.912149   1.32365634 1.13566786 1.44441551]
[0 0 3 0]


## 伯努利案例

In [58]:
gamblers = 10
def casino(win_rate, win_once=1, lose_once=1, commission=.01):
    my_money = 1000000
    play_count = 1000000
    commission = commission
    for _ in np.arange(0, play_count):
        w = np.random.binomial(1, win_rate)
        if w:
            my_money += win_once
        else:
            my_money -= lose_once
        my_money -= commission
        if my_money <= 0:
            breake
    return my_money

t = casino(win_rate=.5,win_once=1,lose_once=1,commission=0)

print([round(casino(win_rate=.6,win_once=1,lose_once=1,commission=.01),2) for _ in np.arange(0,gamblers)])

[1189690.0, 1189410.0, 1188558.0, 1190008.0, 1189916.0, 1189346.0, 1188400.0, 1191104.0, 1188288.0, 1190010.0]
