In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.DataFrame({'A': [1, 2, 2, 2, 2],
                   'B': [3, 3, 4, 4, 4],
                   'C': [1, 1, np.nan, 1, 1]})
df

Unnamed: 0,A,B,C
0,1,3,1.0
1,2,3,1.0
2,2,4,
3,2,4,1.0
4,2,4,1.0


# 一、理解交叉表：

In [3]:
# 交叉表的基本使用
pd.crosstab(df['A'], df['B'])

B,3,4
A,Unnamed: 1_level_1,Unnamed: 2_level_1
1,1,0
2,1,3


In [4]:
# 使用aggfunc和values，修改交叉表中的值
pd.crosstab(df['A'], df['B'], values=df['C'], aggfunc=np.sum)

B,3,4
A,Unnamed: 1_level_1,Unnamed: 2_level_1
1,1.0,
2,1.0,2.0


# 二、交叉表实战

In [5]:
stock_df = pd.read_csv("data/stock_day.csv")
stock_df.head()

Unnamed: 0,open,high,close,low,volume,price_change,p_change,ma5,ma10,ma20,v_ma5,v_ma10,v_ma20,turnover
2018-02-27,23.53,25.88,24.16,23.53,95578.03,0.63,2.68,22.942,22.142,22.875,53782.64,46738.65,55576.11,2.39
2018-02-26,22.8,23.78,23.53,22.8,60985.11,0.69,3.02,22.406,21.955,22.942,40827.52,42736.34,56007.5,1.53
2018-02-23,22.88,23.37,22.82,22.71,52914.01,0.54,2.42,21.938,21.929,23.022,35119.58,41871.97,56372.85,1.32
2018-02-22,22.25,22.76,22.28,22.02,36105.01,0.36,1.64,21.446,21.909,23.137,35397.58,39904.78,60149.6,0.9
2018-02-14,21.49,21.99,21.92,21.48,23331.04,0.44,2.05,21.366,21.923,23.253,33590.21,42935.74,61716.11,0.58


## 求股票涨跌与星期几的关系

In [8]:
# 获取涨跌的状态
stock_df['positive'] = np.where((stock_df['close'] - stock_df['open']) >= 0, 1, 0)
stock_df.head()

Unnamed: 0,open,high,close,low,volume,price_change,p_change,ma5,ma10,ma20,v_ma5,v_ma10,v_ma20,turnover,positive
2018-02-27,23.53,25.88,24.16,23.53,95578.03,0.63,2.68,22.942,22.142,22.875,53782.64,46738.65,55576.11,2.39,1
2018-02-26,22.8,23.78,23.53,22.8,60985.11,0.69,3.02,22.406,21.955,22.942,40827.52,42736.34,56007.5,1.53,1
2018-02-23,22.88,23.37,22.82,22.71,52914.01,0.54,2.42,21.938,21.929,23.022,35119.58,41871.97,56372.85,1.32,0
2018-02-22,22.25,22.76,22.28,22.02,36105.01,0.36,1.64,21.446,21.909,23.137,35397.58,39904.78,60149.6,0.9,1
2018-02-14,21.49,21.99,21.92,21.48,23331.04,0.44,2.05,21.366,21.923,23.253,33590.21,42935.74,61716.11,0.58,1


In [12]:
# 获取星期几
stock_df['weekday'] = pd.to_datetime(stock_df.index).weekday
stock_df.head()

Unnamed: 0,open,high,close,low,volume,price_change,p_change,ma5,ma10,ma20,v_ma5,v_ma10,v_ma20,turnover,positive,weekday
2018-02-27,23.53,25.88,24.16,23.53,95578.03,0.63,2.68,22.942,22.142,22.875,53782.64,46738.65,55576.11,2.39,1,1
2018-02-26,22.8,23.78,23.53,22.8,60985.11,0.69,3.02,22.406,21.955,22.942,40827.52,42736.34,56007.5,1.53,1,0
2018-02-23,22.88,23.37,22.82,22.71,52914.01,0.54,2.42,21.938,21.929,23.022,35119.58,41871.97,56372.85,1.32,0,4
2018-02-22,22.25,22.76,22.28,22.02,36105.01,0.36,1.64,21.446,21.909,23.137,35397.58,39904.78,60149.6,0.9,1,3
2018-02-14,21.49,21.99,21.92,21.48,23331.04,0.44,2.05,21.366,21.923,23.253,33590.21,42935.74,61716.11,0.58,1,2


In [21]:
# 求星期几中，股票涨跌的概率
count_df = pd.crosstab(stock_df['weekday'], stock_df['positive'])
count_df

positive,0,1
weekday,Unnamed: 1_level_1,Unnamed: 2_level_1
0,56,69
1,49,82
2,60,72
3,55,73
4,60,67


In [22]:
sum_series = count_df.apply(np.sum, axis=1)
sum_series

weekday
0    125
1    131
2    132
3    128
4    127
dtype: int64

In [26]:
count_df.div(sum_series, axis=0)

positive,0,1
weekday,Unnamed: 1_level_1,Unnamed: 2_level_1
0,0.448,0.552
1,0.374046,0.625954
2,0.454545,0.545455
3,0.429688,0.570312
4,0.472441,0.527559
