In [2]:
import pandas as pd
import numpy as np
import datetime as dt
import wrds
import matplotlib.pyplot as plt
from dateutil.relativedelta import *
from pandas.tseries.offsets import *
from scipy import stats
import janitor 
import warnings
warnings.filterwarnings('ignore')
import seaborn as sns

import statsmodels.api as sm

In [149]:
data = pd.read_pickle("data99.pkl")
beme = pd.read_pickle('portfolio_beme.pkl')
returns = pd.read_pickle('portfolio_return.pkl')

In [150]:
list = ['MV_excess', 'BV_excess', 'SV_excess', 'MG_excess', 'BG_excess', 'SG_excess']
beme = beme.rename(columns={col: col.replace('_excess', '_BM') for col in list})
beme00 = beme[['jdate','MV_BM','BV_BM','SV_BM','MG_BM','BG_BM','SG_BM']]
returns = returns.rename(columns={col: col.replace('_excess', '_Return') for col in list})
returns00 = returns[['jdate','MV_Return','BV_Return','SV_Return','MG_Return','BG_Return','SG_Return']]

In [151]:
sample = pd.merge(returns00,beme00,on='jdate',how='inner')
sample_whole = sample[(sample['jdate']>='1963-07-31') & (sample['jdate']<='2019-06-30')]
sample_before = sample[(sample['jdate']>='1963-07-31') & (sample['jdate']<='1991-06-30')]
sample_after = sample[(sample['jdate']>'1991-06-30') & (sample['jdate']<='2019-06-30')]

In [152]:
portfolios = ['MV', 'BV', 'SV', 'MG', 'BG', 'SG']
results = []

def portfolio_regressions(data, portfolios):
    results = []
    for portfolio in portfolios:
        ret_col = f"{portfolio}_Return" 
        bm_col = f"{portfolio}_BM"     
        
        X = sm.add_constant(data[bm_col])  
        y = data[ret_col]
        model = sm.OLS(y, X).fit()       

        results.append({
            'portfolio': portfolio,
            'a': model.params['const'],                # 截距
            'b': model.params[bm_col],                # 斜率
            'StdErr(a)': model.bse['const'],          # 截距标准误差
            'StdErr(b)': model.bse[bm_col],           # 斜率标准误差
            't(a)': model.tvalues['const'],           # 截距 t 值
            't(b)': model.tvalues[bm_col],            # 斜率 t 值
            'R2': model.rsquared,                     # R 方
            'RSE': (model.ssr / model.df_resid) ** 0.5  # 残差标准误差
        })
    return pd.DataFrame(results)

    

In [153]:
portfolios = ['MV', 'BV', 'SV', 'MG', 'BG', 'SG']
table3_panelA = portfolio_regressions(sample_whole, portfolios)
table3_panelA

Unnamed: 0,portfolio,a,b,StdErr(a),StdErr(b),t(a),t(b),R2,RSE
0,MV,-1.154607,1.861209,0.272253,0.331287,-4.24093,5.618111,0.04499,2.644347
1,BV,-1.072555,1.75906,0.290889,0.377845,-3.687161,4.655506,0.031335,2.70415
2,SV,-1.613829,2.223657,0.354007,0.345461,-4.558749,6.436792,0.058238,3.532236
3,MG,0.175026,0.724594,0.128793,0.433993,1.358966,1.669596,0.004143,1.121978
4,BG,0.198118,0.79677,0.135608,0.459197,1.460967,1.735137,0.004473,1.164167
5,SG,0.185422,0.373846,0.34839,1.167582,0.532227,0.320188,0.000153,3.614624


In [154]:
sample_whole = sample.copy()
sample_whole['dummy'] = (sample_whole['jdate'] > '1991-06-30').astype(int)

In [155]:
portfolios = ['MV', 'BV', 'SV', 'MG', 'BG', 'SG']
results = []

def portfolio_regressions_with_dummy(data, portfolios, dummy_col):
    results = []
    
    for portfolio in portfolios:
        ret_col = f"{portfolio}_Return"
        bm_col = f"{portfolio}_BM"
        
        data['const_dummy'] = data[dummy_col] 
        data['bm_dummy'] = data[dummy_col] * data[bm_col] 
        
        X = sm.add_constant(data[[bm_col, 'const_dummy', 'bm_dummy']])  
        y = data[ret_col] 
        model = sm.OLS(y, X).fit()

        results.append({
            'portfolio': portfolio,
            'a': model.params['const'], 
            'da': model.params['const_dummy'],                
            'b': model.params[bm_col],                
            'db': model.params['bm_dummy'],      
            'StdErr(a)': model.bse['const'], 
            'StdErr(da)': model.bse['const_dummy'],        
            'StdErr(b)': model.bse[bm_col],          
            'StdErr(db)': model.bse['bm_dummy'],    
            't(a)': model.tvalues['const'],  
            't(da)': model.tvalues['const_dummy'],         
            't(b)': model.tvalues[bm_col],          
            't(db)': model.tvalues['bm_dummy'],     
            'R2': model.rsquared,                     
            'RSE': (model.ssr / model.df_resid) ** 0.5  
        })
    
    return pd.DataFrame(results)


In [156]:
portfolios = ['MV', 'BV', 'SV', 'MG', 'BG', 'SG']
table3_panelB = portfolio_regressions_with_dummy(sample_whole, portfolios,dummy_col='dummy')
table3_panelB

Unnamed: 0,portfolio,a,da,b,db,StdErr(a),StdErr(da),StdErr(b),StdErr(db),t(a),t(da),t(b),t(db),R2,RSE
0,MV,-0.847192,-0.557515,1.589385,0.486258,0.39963,0.547108,0.47179,0.665642,-2.119941,-1.019022,3.368838,0.73051,0.046938,2.6456
1,BV,-0.861515,-0.373925,1.603803,0.25301,0.41936,0.584683,0.526399,0.760596,-2.054356,-0.639535,3.046741,0.332647,0.032713,2.706268
2,SV,-0.78521,-1.332386,1.436994,1.264236,0.573889,0.728945,0.556495,0.710316,-1.368225,-1.827829,2.582224,1.779823,0.062996,3.528572
3,MG,0.414007,-0.368928,1.332186,-1.065711,0.232156,0.336721,0.653611,1.271799,1.783313,-1.095648,2.038195,-0.837956,0.006444,1.122358
4,BG,0.451283,-0.380423,1.446338,-1.091651,0.247392,0.35418,0.701106,1.333651,1.824165,-1.074096,2.062939,-0.818543,0.006713,1.164596
5,SG,0.312288,0.300602,0.463367,2.377461,0.543856,0.869884,1.515909,3.41128,0.574211,0.345565,0.305669,0.696941,0.001853,3.616951


In [157]:
X1_mean = sample_whole[sample_whole['dummy'] == 0][['MV_BM', 'BV_BM', 'SV_BM', 'MG_BM', 'BG_BM', 'SG_BM']].mean()
X1_mean = X1_mean.reset_index().rename(columns={'index': 'portfolio', 0: 'X1_mean'})
X1_mean['portfolio'] = X1_mean['portfolio'].str.replace('_BM', '')

X2_mean = sample_whole[sample_whole['dummy'] == 1][['MV_BM', 'BV_BM', 'SV_BM', 'MG_BM', 'BG_BM', 'SG_BM']].mean()
X2_mean = X2_mean.reset_index().rename(columns={'index': 'portfolio', 0: 'X2_mean'})
X2_mean['portfolio'] = X2_mean['portfolio'].str.replace('_BM', '')

In [161]:
table00 = pd.merge(table3_panelA,X1_mean,on='portfolio',how='inner')
table11 = pd.merge(table00,X2_mean,on='portfolio',how='inner')

In [165]:
table11

Unnamed: 0,portfolio,a,b,StdErr(a),StdErr(b),t(a),t(b),R2,RSE,X1_mean,X2_mean,Y11,Y12,Y11-Y12
0,MV,-1.154607,1.861209,0.272253,0.331287,-4.24093,5.618111,0.04499,2.644347,0.789879,0.733999,0.315522,0.211519,0.104004
1,BV,-1.072555,1.75906,0.290889,0.377845,-3.687161,4.655506,0.031335,2.70415,0.745654,0.691664,0.239095,0.144123,0.094972
2,SV,-1.613829,2.223657,0.354007,0.345461,-4.558749,6.436792,0.058238,3.532236,0.971512,0.920068,0.546481,0.432086,0.114394
3,MG,0.175026,0.724594,0.128793,0.433993,1.358966,1.669596,0.004143,1.121978,-0.342614,-0.216394,-0.073231,0.018228,-0.091458
4,BG,0.198118,0.79677,0.135608,0.459197,1.460967,1.735137,0.004473,1.164167,-0.341025,-0.216277,-0.0736,0.025795,-0.099395
5,SG,0.185422,0.373846,0.34839,1.167582,0.532227,0.320188,0.000153,3.614624,-0.334319,-0.212569,0.060438,0.105954,-0.045516


In [168]:
table11['Y11'] = table11['a'] + table11['X1_mean']*table11['b'] 
table11['Y12'] = table11['a'] + table11['X2_mean']*table11['b']
table11['Y11-Y12'] = table11['Y11'] - table11['Y12']
table11['se(Y11-Y12)'] = np.abs(((table11['X1_mean'] - table11['X2_mean'])*table11['StdErr(b)']))
table11['t(Y11-Y12)'] = table11['StdErr(b)']*np.sign(table11['X1_mean'] - table11['X2_mean'])

In [169]:
table11

Unnamed: 0,portfolio,a,b,StdErr(a),StdErr(b),t(a),t(b),R2,RSE,X1_mean,X2_mean,Y11,Y12,Y11-Y12,se(Y11-Y12),t(Y11-Y12)
0,MV,-1.154607,1.861209,0.272253,0.331287,-4.24093,5.618111,0.04499,2.644347,0.789879,0.733999,0.315522,0.211519,0.104004,0.018512,0.331287
1,BV,-1.072555,1.75906,0.290889,0.377845,-3.687161,4.655506,0.031335,2.70415,0.745654,0.691664,0.239095,0.144123,0.094972,0.0204,0.377845
2,SV,-1.613829,2.223657,0.354007,0.345461,-4.558749,6.436792,0.058238,3.532236,0.971512,0.920068,0.546481,0.432086,0.114394,0.017772,0.345461
3,MG,0.175026,0.724594,0.128793,0.433993,1.358966,1.669596,0.004143,1.121978,-0.342614,-0.216394,-0.073231,0.018228,-0.091458,0.054779,-0.433993
4,BG,0.198118,0.79677,0.135608,0.459197,1.460967,1.735137,0.004473,1.164167,-0.341025,-0.216277,-0.0736,0.025795,-0.099395,0.057283,-0.459197
5,SG,0.185422,0.373846,0.34839,1.167582,0.532227,0.320188,0.000153,3.614624,-0.334319,-0.212569,0.060438,0.105954,-0.045516,0.142153,-1.167582


In [61]:
mean00 = pd.DataFrame(returns.mean())
mean00.rename(columns={0:'mean'}, inplace=True)

In [65]:
mean11 = mean00.iloc[0:6,:]