In [1]:
import warnings
import pandas as pd
import numpy as np
import statsmodels.api as sm
from scipy.stats import ttest_ind
warnings.filterwarnings('ignore')

In [2]:
start_y = 1967
end_y = 2005
split_y = 1985

In [3]:
data = pd.read_csv('./data/master_data.csv')
data = data.loc[(data['fyear'] >= start_y) & (data['fyear'] <= end_y)]
data = data.dropna(subset=['exp_lag', 'exp', 'exp_lead', 'earn'])

In [4]:
def t_test(before, after, coef):
    t_stat, p_value = ttest_ind(before[coef], after[coef])
    return t_stat, p_value

In [5]:
# Panel A: Cross sectional regressions
results = pd.DataFrame(columns=['exp_lag', 'exp_cur', 'exp_lead'])


for year, group in data.groupby('fyear'):
    X = group[['exp_lag', 'exp', 'exp_lead']]
    X = sm.add_constant(X)
    y = group['rev']

    model = sm.OLS(y, X).fit()
    coef = model.params

    result = pd.DataFrame(
        {
            'exp_lag': coef['exp_lag'],
            'exp_cur': coef['exp'],
            'exp_lead': coef['exp_lead']
        },
        index=[year]
    )
    results = pd.concat([results, result])


results = results.reset_index(names='year')

In [6]:
panel_a = pd.DataFrame(columns=['exp_lag', 'exp_cur', 'exp_lead'])
panel_a = pd.concat([panel_a, results.loc[results['year'] <= split_y, ['exp_lag', 'exp_cur', 'exp_lead']].mean().to_frame().T], ignore_index=True)
panel_a = pd.concat([panel_a, results.loc[results['year'] >= split_y + 1, ['exp_lag', 'exp_cur', 'exp_lead']].mean().to_frame().T], ignore_index=True)
panel_a.loc['difference'] = panel_a.iloc[1] - panel_a.iloc[0]
panel_a.loc['t-statistic'] = np.nan
panel_a.loc['p-value'] = np.nan


for coef in ['exp_lag', 'exp_cur', 'exp_lead']:
    t, p = t_test(results.loc[results['year'] <= split_y], results.loc[results['year'] >= split_y + 1], coef)
    panel_a.loc['t-statistic', coef] = t
    panel_a.loc['p-value', coef] = p

panel_a

Unnamed: 0,exp_lag,exp_cur,exp_lead
0,0.016106,1.008213,-0.015138
1,0.101731,0.880352,0.032657
difference,0.085624,-0.127861,0.047796
t-statistic,-5.476643,4.76314,-3.378902
p-value,3e-06,2.9e-05,0.001726


In [7]:
# Panel B: Earn Volat and Persistence
persistence = pd.DataFrame(columns=['persistence'])


for year, group in data.groupby('fyear'):
    X = group[['earn_lag']]
    X = sm.add_constant(X)
    y = group['earn']

    model = sm.OLS(y, X).fit()
    coef = model.params

    result = pd.DataFrame(
        {
            'persistence': coef['earn_lag']
        },
        index=[year]
    )
    persistence = pd.concat([persistence, result])


persistence = persistence.reset_index(names='year')
volatility = data.groupby('fyear')['earn_5yr_volat'].mean().reset_index()

In [8]:
panel_b = pd.DataFrame(columns=['volatility', 'persistence'])
panel_b.loc[0, 'volatility'] = volatility.loc[volatility['fyear'] <= split_y, 'earn_5yr_volat'].mean()
panel_b.loc[1, 'volatility'] = volatility.loc[volatility['fyear'] >= split_y + 1, 'earn_5yr_volat'].mean()
panel_b.loc[0, 'persistence'] = persistence.loc[persistence['year'] <= split_y, 'persistence'].mean()
panel_b.loc[1, 'persistence'] = persistence.loc[persistence['year'] >= split_y + 1, 'persistence'].mean()
panel_b.loc['difference'] = panel_b.iloc[1] - panel_b.iloc[0]
panel_b.loc['t-statistic'] = np.nan
panel_b.loc['p-value'] = np.nan
panel_b.loc[['t-statistic', 'p-value'], 'volatility'] = t_test(volatility.loc[volatility['fyear'] <= split_y], volatility.loc[volatility['fyear'] >= split_y + 1], 'earn_5yr_volat')
panel_b.loc[['t-statistic', 'p-value'], 'persistence'] = t_test(persistence.loc[persistence['year'] <= split_y], persistence.loc[persistence['year'] >= split_y + 1], 'persistence')

panel_b

Unnamed: 0,volatility,persistence
0,0.015823,0.854779
1,0.025552,0.669035
difference,0.00973,-0.185745
t-statistic,-10.139186,5.424967
p-value,0.0,4e-06
