In [1]:
import numpy as np
import pandas as pd

In [2]:
df = pd.DataFrame(np.random.randn(5,4) - 1)
df

Unnamed: 0,0,1,2,3
0,0.397356,-1.050005,-1.656011,-0.65148
1,-1.559109,0.864164,-2.094334,-0.309744
2,0.157133,-0.303514,-0.533385,-1.052288
3,-1.123657,-1.346386,-0.969837,0.239719
4,-2.824093,-0.098515,-1.966907,-0.371212


# 一、apply和applymap

In [3]:
# apply函数中，axis默认是等于0，代表列
# apply函数：以行或列作为整体来进行处理
df.apply(lambda values: values.max(), axis=0)

0    0.397356
1    0.864164
2   -0.533385
3    0.239719
dtype: float64

In [4]:
# 2. axis的理解

a = np.array([
    [1,np.NAN,3],
    [4,5,6],
    [7,8,9]
])

# axis=0，在处理的时候，会以数组的第1层元素进行处理
# axis=1，在处理的时候，会以数组的第2层元素进行处理

# 为什么apply在axis=0的情况下，是代表列？
a1 = [1,2,3]
a2 = [4,8,6]
a3 = [7,3,5]

np.max([a1,a2,a3])
[7, 8, 6]

# 为什么df.dropna在axix=0的情况，是代表行？

[7, 8, 6]

In [5]:
# 3. applymap：针对DataFrame中的每个元素进行处理
df.applymap(lambda value: value*10)

Unnamed: 0,0,1,2,3
0,3.973559,-10.500048,-16.560106,-6.514803
1,-15.591087,8.641637,-20.943342,-3.097438
2,1.571329,-3.035144,-5.33385,-10.522882
3,-11.236571,-13.463859,-9.698373,2.39719
4,-28.240934,-0.985145,-19.669068,-3.712122


# 二、排序

In [6]:
# 1. sort_values：按照某一列进行排序，默认是按照从小到大排序
# 如果想要按照从大到小排序，那么就是ascending=False
df.sort_values(0, ascending=False)

Unnamed: 0,0,1,2,3
0,0.397356,-1.050005,-1.656011,-0.65148
2,0.157133,-0.303514,-0.533385,-1.052288
3,-1.123657,-1.346386,-0.969837,0.239719
1,-1.559109,0.864164,-2.094334,-0.309744
4,-2.824093,-0.098515,-1.966907,-0.371212


In [7]:
# 2. 按照索引排序 排序索引
df.sort_index(ascending=False)

Unnamed: 0,0,1,2,3
4,-2.824093,-0.098515,-1.966907,-0.371212
3,-1.123657,-1.346386,-0.969837,0.239719
2,0.157133,-0.303514,-0.533385,-1.052288
1,-1.559109,0.864164,-2.094334,-0.309744
0,0.397356,-1.050005,-1.656011,-0.65148


# 三、算术和逻辑运算

In [8]:
# 1. 算术运算
df*10

Unnamed: 0,0,1,2,3
0,3.973559,-10.500048,-16.560106,-6.514803
1,-15.591087,8.641637,-20.943342,-3.097438
2,1.571329,-3.035144,-5.33385,-10.522882
3,-11.236571,-13.463859,-9.698373,2.39719
4,-28.240934,-0.985145,-19.669068,-3.712122


In [9]:
# 2. 逻辑运算
df = pd.read_csv("data/stock_day.csv")
df[(df['open'] > 15) & (df['p_change'] > 2)]

Unnamed: 0,open,high,close,low,volume,price_change,p_change,ma5,ma10,ma20,v_ma5,v_ma10,v_ma20,turnover
2018-02-27,23.53,25.88,24.16,23.53,95578.03,0.63,2.68,22.942,22.142,22.875,53782.64,46738.65,55576.11,2.39
2018-02-26,22.80,23.78,23.53,22.80,60985.11,0.69,3.02,22.406,21.955,22.942,40827.52,42736.34,56007.50,1.53
2018-02-23,22.88,23.37,22.82,22.71,52914.01,0.54,2.42,21.938,21.929,23.022,35119.58,41871.97,56372.85,1.32
2018-02-14,21.49,21.99,21.92,21.48,23331.04,0.44,2.05,21.366,21.923,23.253,33590.21,42935.74,61716.11,0.58
2018-02-12,20.70,21.40,21.19,20.63,32445.39,0.82,4.03,21.504,22.338,23.533,44645.16,45679.94,68686.33,0.81
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2015-04-13,19.60,21.30,21.13,19.50,171822.69,1.70,8.75,19.228,17.812,16.563,149620.34,114456.84,111752.31,5.88
2015-04-09,18.28,19.89,19.62,18.02,183119.05,1.20,6.51,17.736,16.826,15.964,124323.21,106501.34,104829.10,6.27
2015-04-08,17.60,18.53,18.42,17.60,157725.97,0.88,5.02,17.070,16.394,15.698,101421.29,97906.88,101658.57,5.40
2015-04-07,16.54,17.98,17.54,16.50,122471.85,0.88,5.28,16.620,16.120,15.510,86769.62,97473.29,98832.94,4.19


In [10]:
df.query("p_change>2 & open > 15")

Unnamed: 0,open,high,close,low,volume,price_change,p_change,ma5,ma10,ma20,v_ma5,v_ma10,v_ma20,turnover
2018-02-27,23.53,25.88,24.16,23.53,95578.03,0.63,2.68,22.942,22.142,22.875,53782.64,46738.65,55576.11,2.39
2018-02-26,22.80,23.78,23.53,22.80,60985.11,0.69,3.02,22.406,21.955,22.942,40827.52,42736.34,56007.50,1.53
2018-02-23,22.88,23.37,22.82,22.71,52914.01,0.54,2.42,21.938,21.929,23.022,35119.58,41871.97,56372.85,1.32
2018-02-14,21.49,21.99,21.92,21.48,23331.04,0.44,2.05,21.366,21.923,23.253,33590.21,42935.74,61716.11,0.58
2018-02-12,20.70,21.40,21.19,20.63,32445.39,0.82,4.03,21.504,22.338,23.533,44645.16,45679.94,68686.33,0.81
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2015-04-13,19.60,21.30,21.13,19.50,171822.69,1.70,8.75,19.228,17.812,16.563,149620.34,114456.84,111752.31,5.88
2015-04-09,18.28,19.89,19.62,18.02,183119.05,1.20,6.51,17.736,16.826,15.964,124323.21,106501.34,104829.10,6.27
2015-04-08,17.60,18.53,18.42,17.60,157725.97,0.88,5.02,17.070,16.394,15.698,101421.29,97906.88,101658.57,5.40
2015-04-07,16.54,17.98,17.54,16.50,122471.85,0.88,5.28,16.620,16.120,15.510,86769.62,97473.29,98832.94,4.19


In [11]:
# df[df['high'].isin([25.88, 17.98])]

df.query('high == 25.88 | high == 17.98')

Unnamed: 0,open,high,close,low,volume,price_change,p_change,ma5,ma10,ma20,v_ma5,v_ma10,v_ma20,turnover
2018-02-27,23.53,25.88,24.16,23.53,95578.03,0.63,2.68,22.942,22.142,22.875,53782.64,46738.65,55576.11,2.39
2017-10-17,25.3,25.88,25.43,25.23,67649.41,0.24,0.95,25.908,26.734,26.542,89964.65,147619.06,159389.25,1.69
2017-08-31,24.16,25.88,25.62,23.78,259406.52,1.47,6.09,23.94,23.485,22.554,159122.44,189711.82,126717.47,6.49
2015-08-06,16.94,17.98,17.35,16.81,60609.0,-0.15,-0.86,17.098,18.587,20.783,84605.36,94500.89,116331.12,2.07
2015-04-07,16.54,17.98,17.54,16.5,122471.85,0.88,5.28,16.62,16.12,15.51,86769.62,97473.29,98832.94,4.19


# 四、统计和累计函数

In [17]:
df['volume'].sum()

64239248.78999999

In [22]:
df['volume'].cummax()

2018-02-27     95578.03
2018-02-26     95578.03
2018-02-23     95578.03
2018-02-22     95578.03
2018-02-14     95578.03
                ...    
2015-03-06    501915.41
2015-03-05    501915.41
2015-03-04    501915.41
2015-03-03    501915.41
2015-03-02    501915.41
Name: volume, Length: 643, dtype: float64