In [8]:
import pandas as pd
import tushare as ts
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib as mpl
import datetime
import cufflinks as cf
import plotly.offline as plyo
sns.set()
mpl.rcParams['font.sans-serif'] = 'WenQuanYi Micro Hei'
pro = ts.pro_api('36edcbf4f0f39791466df6aa78b8602b50985638532710659cc35754')

In [9]:
def cal_smb_hml(df):
    # 划分大小市值公司, circ_mv = Circulation Market value
    df['SB'] = df['circ_mv'].map(lambda x: 'B' if x >= df['circ_mv'].median() else 'S')

    # 求账面市值比：PB的倒数
    df['BM'] = 1 / df['pb']

    # 划分高、中、低账面市值比公司, H:M:L = 3:4:3
    border_down, border_up = df['BM'].quantile([0.3, 0.7])

    # H >= 0.7，0.4< M < 0.7, L <= 0.4
    df['HML'] = df['BM'].map(lambda x: 'H' if x >= border_up else 'M')
    df['HML'] = df.apply(lambda row: 'L' if row['BM'] <= border_down else row['HML'], axis=1)

    # 组合划分为6组, [SL, SM, SH] & [BL, BM, BH]
    df_SL = df.query('(SB=="S") & (HML=="L")')
    df_SM = df.query('(SB=="S") & (HML=="M")')
    df_SH = df.query('(SB=="S") & (HML=="H")')
    df_BL = df.query('(SB=="B") & (HML=="L")')
    df_BM = df.query('(SB=="B") & (HML=="M")')
    df_BH = df.query('(SB=="B") & (HML=="H")')
    
     # 计算各组收益率
    R_SL = (df_SL['pct_chg'] * df_SL['circ_mv'] / 100).sum() / df_SL['circ_mv'].sum()
    R_SM = (df_SM['pct_chg'] * df_SM['circ_mv'] / 100).sum() / df_SM['circ_mv'].sum()
    R_SH = (df_SH['pct_chg'] * df_SH['circ_mv'] / 100).sum() / df_SH['circ_mv'].sum()
    R_BL = (df_BL['pct_chg'] * df_BL['circ_mv'] / 100).sum() / df_BL['circ_mv'].sum()
    R_BM = (df_BM['pct_chg'] * df_BM['circ_mv'] / 100).sum() / df_BM['circ_mv'].sum()
    R_BH = (df_BH['pct_chg'] * df_BH['circ_mv'] / 100).sum() / df_BH['circ_mv'].sum()

    # 计算SMB, HML并返回
    smb = (R_SL + R_SM + R_SH - R_BL - R_BM - R_BH) / 3
    hml = (R_SH + R_BH - R_SL - R_BL) / 2
    return smb, hml

In [None]:
data = []
df_cal = pro.trade_cal(start_date='20170701')
df_cal = df_cal.query('(exchange=="SSE") & (is_open==1)')
for date in df_cal.cal_date:
    df_daily = pro.daily(trade_date=date)
    df_basic = pro.daily_basic(trade_date=date)
    df = pd.merge(df_daily, df_basic, on='ts_code', how='inner')
    smb, hml = cal_smb_hml(df)
    data.append([date, smb, hml])
    print(date, smb, hml)

20170703 0.010788618999982728 0.002128986200379589
20170704 0.004304121927425319 0.0005175102358438029


In [6]:
df_tfm = pd.DataFrame(data, columns=['trade_date', 'SMB', 'HML'])
df_tfm['trade_date'] = pd.to_datetime(df_tfm.trade_date)
df_tfm = df_tfm.set_index('trade_date')
df_tfm.to_csv('df_three_factor_model.csv')

Unnamed: 0,Unnamed: 1,Unnamed: 2,df_three_factor_model
2019-07-18,-0.010031,-0.009226,0.003282
2019-07-19,0.007969,0.003703,-0.0038
2019-07-19,0.007969,-0.003833,0.002194
2019-07-19,0.003969,0.001487,0.001794
2019-07-19,0.007969,-0.003508,0.001655
2019-07-19,0.007969,-0.003352,0.001698
2019-07-19,0.007969,-0.00395,0.002083
2019-07-19,0.007969,-0.003793,0.002221
2019-07-19,0.007969,-0.003586,0.001236
2019-07-19,0.007969,-0.003487,0.00125


In [4]:
# 获取数据
wanke = pro.daily(ts_code='000002.SZ', start_date='20170601')
pingan = pro.daily(ts_code='601318.SH', start_date='20170601')
maotai = pro.daily(ts_code='600519.SH', start_date='20170601')
wanhua = pro.daily(ts_code='002415.SZ', start_date='20170601')
NHGS = pro.daily(ts_code='600377.SH', start_date='20170601')
yili = pro.daily(ts_code='600887.SH', start_date='20170601')
gzA = pro.index_daily(ts_code='399317.SZ', start_date='20170601')

# 仅保留收益率数据，且用日期作为index
# 然后按照日期排序（增序）
stock_list = [wanke, pingan, maotai, wanhua, NHGS, yili, gzA]
for stock in stock_list:
    stock.index = pd.to_datetime(stock.trade_date)
df_stock = pd.concat([stock.pct_chg / 100 for stock in stock_list], axis=1)
df_stock.columns = ['wanke', 'pingan', 'maotai', 'wanhua', 'NHGS', 'yili', 'gzA']
df_stock = df_stock.sort_index(ascending=True)
df_stock.tail(10)

Unnamed: 0_level_0,wanke,pingan,maotai,wanhua,NHGS,yili,gzA
trade_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2019-07-08,-0.010187,-0.024876,-0.014963,-0.030906,0.008152,-0.01464,-0.027915
2019-07-09,0.0,-0.011965,-0.008935,-0.022361,-0.018868,0.008793,0.000536
2019-07-10,0.000686,0.003999,0.006618,0.009374,0.008242,-0.003306,-0.004011
2019-07-11,0.001371,0.001138,-0.001038,0.003343,-0.007266,0.021713,0.000304
2019-07-12,0.015063,0.008752,0.004677,-0.001481,-0.011461,-0.001476,0.005625
2019-07-15,0.011804,-0.007549,-0.010303,0.006674,-0.001932,-0.003252,0.007383
2019-07-16,0.002,-0.012829,-0.008126,-0.013996,-0.013553,-0.030546,-0.001454
2019-07-17,0.002329,0.00046,-0.004649,0.004856,-0.009814,-0.006424,-4.1e-05
2019-07-18,-0.001328,-0.002989,-0.016606,-0.027138,-0.004955,-0.011392,-0.013024
2019-07-19,0.020605,0.018679,0.008834,0.011846,0.000996,0.00436,0.008514
