In [1]:
'''

Updated on 8th October 2020

A draft to use Fama-French three-factor model to evaluate mutual fund performace
'''


import pandas as pd
import tushare as ts
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib as mpl
import datetime as dt
import statsmodels.api as sm
pro = ts.pro_api('')

In [2]:
def cal_SMB_HML(df):
    df['Big'] = df['circ_mv'].map(lambda x: 1 if x >= df['circ_mv'].quantile(0.7) else 0)
    df['Small'] = df['circ_mv'].map(lambda x: 1 if x <= df['circ_mv'].quantile(0.3) else 0)
    
    df['BM'] = 1 / df['pb']
    border_down, border_up = df['BM'].quantile([0.3, 0.7])
    #border_down, border_up
    df['HML'] = df['BM'].map(lambda x: 'H' if x >= border_up else 'M')
    df['HML'] = df.apply(lambda row: 'L' if row['BM'] <= border_down else row['HML'], axis=1)
    
    SMB = df[df['Small']==1]['pct_chg'].mean() - df[df['Big']==1]['pct_chg'].mean()
    HML = df[df['HML']=='H']['pct_chg'].mean() - df[df['HML']=='L']['pct_chg'].mean()
    return SMB, HML



In [3]:
data = []
start_date = '20170101'
end_date = '20170301'
index_code = '399317.SZ'
fund_list = ['150018.SZ', '150019.SZ']
#fund_list.append(index_code)

A_share_return = pro.index_daily(ts_code = '399317.SZ', start_date = start_date, end_date = end_date)
df_calendar = pro.trade_cal(start_date = start_date, end_date = end_date)

df_calendar = df_calendar.query('(exchange=="SSE") & (is_open==1)')
for date in df_calendar['cal_date']:
    df_daily = pro.daily(trade_date=date)
    df_basic = pro.daily_basic(trade_date=date)
    df = pd.merge(df_daily, df_basic, on='ts_code', how='inner')
    SMB, HML = cal_SMB_HML(df)
    data.append([date, SMB, HML])

df_DataFrame = pd.DataFrame(data, columns=['trade_date', 'SMB', 'HML'])
df_DataFrame['trade_date'] = pd.to_datetime(df_DataFrame['trade_date'])
df_DataFrame = df_DataFrame.set_index(['trade_date'])
# Notice: combine HML,SMB,and market return
# Market return did not substract the risk-free rate
df_DataFrame['market_return'] = pro.index_daily(ts_code=index_code, start_date = start_date, end_date = end_date)['pct_chg'].iloc[::-1].values 
fund_returns = pd.DataFrame(index = df_calendar['cal_date'])
fund_returns.index = pd.to_datetime(fund_returns.index)

for fund in fund_list:
    fund_DataFrame =  pro.fund_daily(ts_code=fund, start_date=start_date, end_date=end_date)
    fund_DataFrame = fund_DataFrame.set_index(pd.to_datetime(fund_DataFrame['trade_date']))
    df_DataFrame[fund] = fund_DataFrame['pct_chg']
    

df_DataFrame.dropna(inplace = True)
# Y = df_DataFrame.iloc[:,3]
# X = df_DataFrame.iloc[:,0:3]
# X = sm.add_constant(X)
# result = sm.OLS(Y, X).fit()


In [5]:
# Once factors are available, regression model can be developed directly.
df_DataFrame

Unnamed: 0_level_0,SMB,HML,market_return,150018.SZ,150019.SZ
trade_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2017-01-03,0.749776,0.071452,0.955,0.0976,0.2475
2017-01-05,-0.084284,0.544617,0.0105,-1.1,-0.6135
2017-01-06,-0.746202,1.271845,-0.6683,0.5056,-1.358
2017-01-09,-0.265029,0.559001,0.4816,0.6036,0.1252
2017-01-10,0.531906,0.002852,-0.2831,0.1,-0.25
2017-01-11,-0.376433,0.291737,-0.928,-0.7992,-0.7519
2017-01-12,0.277935,0.080667,-0.7362,-0.1007,-1.1364
2017-01-13,-1.23099,1.035152,-0.9259,-0.3024,-1.1494
2017-01-16,-3.163664,2.24123,-2.0294,0.1011,-3.7468
2017-01-17,1.365152,-1.053462,0.6912,0.101,0.6711
