In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os

import math
import scipy.stats as st
import statsmodels.api as sm
from linearmodels import FamaMacBeth

In [3]:
old_path2 = 'D:/jupyterfile/因子数据/控制变量/'

lscolumns=[]
for i in os.listdir(old_path2):
    i=i[:-4]
    lscolumns.append(i)


factors = ['ret_month','tail_risk']+lscolumns[1:]
#data = pd.read_csv('../数据/all_data/all_data.csv',index_col=[0,1],parse_dates=[1])[factors]

In [4]:
factors

['ret_month', 'tail_risk', 'beta', 'bm', 'log_cap', 'Max', 'mom']

In [4]:
#factors = ['ret_month','idiovol', 'beta','bm','illiq','illiq_std','log_cap','mom','str']
data = pd.read_csv('D:/jupyterfile/因子数据/all_data/tail_risk.csv',index_col=[0,1],parse_dates=[1])[factors]

In [5]:
data

Unnamed: 0_level_0,Unnamed: 1_level_0,ret_month,tail_risk,beta,bm,log_cap,Max,mom
code,date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1.0,2003-09-30,-0.113958,0.008900,1.287230,0.223160,23.604888,0.015919,-0.249819
1.0,2003-10-31,0.027849,0.029753,1.272123,0.251398,23.485741,0.025028,-0.280181
1.0,2003-11-30,0.013878,0.014767,1.289896,0.244198,23.514798,0.049438,-0.316147
1.0,2003-12-31,0.088847,0.013672,1.337315,0.239453,23.530192,0.034347,-0.187932
1.0,2004-01-31,0.114744,0.011804,1.330531,0.243387,23.616812,0.045909,-0.256170
...,...,...,...,...,...,...,...,...
603999.0,2020-07-31,0.025908,0.242586,1.318162,0.453179,22.063223,0.049322,0.122639
603999.0,2020-08-31,-0.152489,0.219438,1.255334,0.441201,22.090010,0.034860,0.185846
603999.0,2020-09-30,-0.032383,0.023008,1.180690,0.529022,21.926022,0.028317,0.211573
603999.0,2020-10-31,0.123759,-0.011491,1.154601,0.546026,21.894385,0.024180,-0.008526


In [7]:
def FamaMacBeth_summary(DF,
                        reg_lst,
                        reg_order,
                        reg_names=None,
                        params_format='{:.3f}',
                        tvalues_format='{:.2f}'):

    '''
    A function for Fama-MacBeth regression and results summary.

    Parameters
    ----------
    DF: DataFrame
        A panel date of which multi-index is stock and month (datetime64[ns]),
        containing all the dependent and independent variables.
    reg_lst: list
        A list containing multiple lists of dependent variable and independent
        variables, e.g., [['Y', 'X1', ...],..., ['Y', 'X1', ...,]].
    reg_order: list
        The order of independent variables in result table.
    reg_names: list
        The names for each regression.
    params_format: str
        The number of decimal places for parameters, e.g., '{:.3f}'.
    tvalues_format: str
        The number of decimal places for t-values, e.g., '{:.2f}'.
    '''

    # Create a DataFrame
    rows = sum([[var, f'{var}_t'] for var in ['const'] + reg_order], [])
    if reg_names is None:
        reg_names = [f'({i+1})' for i in range(len(reg_lst))]
    show = pd.DataFrame(index=rows, columns=reg_names)

    for reg, reg_name in zip(reg_lst, reg_names):
        df = DF.loc[:, reg].copy().dropna()
        T = len(df.index.get_level_values(df.index.names[1]).unique())
        #最大滞后阶数计算
        lag = math.floor(4*(T/100)**(2/9))
        fmb = FamaMacBeth(df[reg[0]], sm.add_constant(df[reg[1:]]))
        # Newey-West adjust
        fmb = fmb.fit(cov_type='kernel', bandwidth=lag)
        # params, tvalues(tstats) and pvalues
        params = fmb.params
        tvalues = fmb.tstats
        pvalues = fmb.pvalues
        # Obs.
        total_obs = fmb.nobs
        # mean_obs = fmb.time_info['mean']

        # average rsquared_adj
        dft = df.reset_index(level=df.index.names[0], drop=True).copy()
        rsquared_adj = []
        for month in dft.index.unique():
            dftm = dft.loc[month].copy()
            ols = sm.OLS(dftm[reg[0]], sm.add_constant(dftm[reg[1:]])).fit()
            rsquared_adj.append(ols.rsquared_adj)
        ar2a = np.mean(rsquared_adj)

        # params and significance
        ps_lst = []
        for param, pvalue in zip(params, pvalues):
            param = params_format.format(param)
            if (pvalue <= 0.1) & (pvalue > 0.05):
                param = param + '*'
            elif (pvalue <= 0.05) & (pvalue > 0.01):
                param = param + '**'
            elif pvalue <= 0.01:
                param = param + '***'
            ps_lst.append(param)

        # params and tvalues
        tvalues = [tvalues_format.format(t) for t in tvalues]
        t_lst = [f'({t})' for t in tvalues]
        pt_lst = [[i, j] for i, j in zip(ps_lst, t_lst)]

        # put them in place
        for var, pt in zip(['const'] + reg[1:], pt_lst):
            show.loc[var, reg_name] = pt[0]
            show.loc[f'{var}_t', reg_name] = pt[1]
        show.loc['No. Obs.', reg_name] = str(total_obs)
        show.loc['Adj. R²', reg_name] = '{:.2f}%'.format(ar2a * 100)

    rename_index = sum([[var, ''] for var in ['Intercept'] + reg_order], [])
    show.index = rename_index + ['No. Obs.', 'Adj. R²']

    return show.dropna(axis=0, how='all').fillna('')

In [8]:
model1 =factors[:2]
model2 =factors[:3]
model3 =factors[:4]
model4 =factors[:5]
model5 =factors[:6]
model6 =factors[:7]
model7 =factors[:8]
model8 =factors[:9]
model9 =factors[:10]
model10=factors[:11]

In [9]:
result = FamaMacBeth_summary(data,[model1,model2,model3,model4,model5,model6,model7,model8,model9,model10],model10)

In [10]:
result

Unnamed: 0,(1),(2),(3),(4),(5),(6),(7),(8),(9),(10)
Intercept,0.008,0.009,0.004,0.075**,0.075**,0.090***,0.090***,0.090***,0.090***,0.090***
,(1.08),(1.42),(0.65),(2.09),(2.07),(2.63),(2.63),(2.63),(2.63),(2.63)
tail_risk,0.008**,0.009**,0.010**,0.009**,0.010**,0.010***,0.010***,0.010***,0.010***,0.010***
,(1.99),(2.20),(2.47),(2.39),(2.42),(2.59),(2.59),(2.59),(2.59),(2.59)
beta,,-0.002,-0.002,-0.003,-0.001,-0.001,-0.001,-0.001,-0.001,-0.001
,,(-0.38),(-0.54),(-0.77),(-0.26),(-0.33),(-0.33),(-0.33),(-0.33),(-0.33)
bm,,,0.015***,0.015***,0.013***,0.014***,0.014***,0.014***,0.014***,0.014***
,,,(3.28),(3.70),(3.26),(3.80),(3.80),(3.80),(3.80),(3.80)
log_cap,,,,-0.003**,-0.003*,-0.004**,-0.004**,-0.004**,-0.004**,-0.004**
,,,,(-2.02),(-1.87),(-2.51),(-2.51),(-2.51),(-2.51),(-2.51)
