In [106]:
resource = "../../data/generated/"
results = "../../results/"

In [112]:
import pandas as pd
import numpy as np
from linearmodels.panel import PanelOLS, compare
import statsmodels.formula.api as smf
# from tabprintin.beautify import *

In [113]:
ts = pd.read_csv(resource + 'time_series.csv', parse_dates=['date'], index_col=['date'])

# Compute the log change of industrial production over next 12 months (or just growth)
ts['log_indprod_growth_nextyear'] = np.log(ts['ind_prod'].shift(-12) / ts['ind_prod'].shift(-1))
# ts['indprod_growth_nextyear'] = ts['ind_prod'].shift(-12) / ts['ind_prod'].shift(-1) - 1

##################
## Base assets ###
##################

# Market return
ts['lag_ex_mkt'] = ts['ex_mkt'].shift(1)

# Compute the excess return of the long-term government bond portfolio
ts['ex_long_gov_ret'] = ts['long_gov_ret'] - ts['rf']
ts['lag_ex_long_gov_ret'] = ts['ex_long_gov_ret'].shift(1)

# Compute the excess return of the intermediate-term government bond portfolio
ts['ex_medium_gov_ret'] = ts['medium_gov_ret'] - ts['rf']
ts['lag_ex_medium_gov_ret'] = ts['ex_medium_gov_ret'].shift(1)

# Compute the excess return of the high-yield bond portfolio
ts['ex_high_yd_bd_ret'] = ts['high_yd_bd_ret'] - ts['rf']
ts['lag_ex_high_yd_bd_ret'] = ts['ex_high_yd_bd_ret'].shift(1)

# Compute the return for gold index
ts['ex_gold_ret'] = ts['gold'].pct_change() - ts['rf']
ts['lag_ex_gold_ret'] = ts['ex_gold_ret'].shift(1)

# Create dummies for 1987 (stock market crash) and 1996-2002 (Internet bubble period)
ts['dummy_87'] = (ts.index.year == 1987).astype(int)
ts['dummy_96_02'] = ((ts.index.year >= 1996) & (ts.index.year <= 2002)).astype(int)

#########################
### Control Variables ###
#########################
# Compute the 10 year minus 3 month government bond yield
ts['lag_10y_3m_gov_bd_yd'] = (ts['DGS10'] - ts['DTB3']).shift(1)

# Compute the 1 year minus 3 month government bond yield
ts['lag_1y_3m_gov_bd_yd'] = (ts['DGS1'] - ts['DTB3']).shift(1)

# Baa minus Aaa corporate bond yield
ts['lag_Baa_Aaa_bd_yd'] = (ts['BAA'] - ts['AAA']).shift(1)

# Compute the dividend yield on the S&P 500 index
ts['lag_sp_div_yd'] = (ts['sp_div'] / ts['sp_price']).shift(1)

# Compute the log change of industrial production over last 12 months (or just growth)
# ts['log_indprod_growth_lastyear'] = np.log(ts['ind_prod'].shift(13) / ts['ind_prod'].shift(1))
ts['indprod_growth_lastyear'] = ts['ind_prod'].shift(13) / ts['ind_prod'].shift(1)  - 1

# Compute the inflation over last 12 months
ts['infl_lastyear'] = (ts['cpi'].shift(1) - ts['cpi'].shift(13)) / ts['cpi'].shift(13)

# Compute the market portfolio excess return over last 12 months
# [Controllare e sbagliato]
ts['ex_mkt_lastyear'] = (((ts['ex_mkt'] + 100)/100).rolling(13).apply(lambda x: x[:-1].prod()) - 1) * 100

# Interactions
ts['slope_ex_mkt_87'] = ts['ex_mkt'] * ts['dummy_87']
ts['slope_ex_mkt_9602'] = ts['ex_mkt'] * ts['dummy_96_02']

ts['lag_slope_ex_mkt_87'] = ts['slope_ex_mkt_87'].shift(1)
ts['lag_slope_ex_mkt_9602'] = ts['slope_ex_mkt_9602'].shift(1)

In [114]:
# Set the start and end dates of sample
start_date = '1984-01-01'
end_date = '2008-04-30'

ts = ts.loc[(ts.index >= start_date) & (ts.index <= end_date)]

In [118]:
# mod1 = PanelOLS.from_formula('log_indprod_growth_nextyear ~ 1+ex_mkt+ex_long_gov_ret+ex_medium_gov_ret+ex_high_yd_bd_ret+ex_gold_ret+ex_mkt*dummy_87+ex_mkt*dummy_96_02+lagged_10y_3m_gov_bd_yd+lagged_1y_3m_gov_bd_yd+lagged_Baa_Aaa_bd_yd+lagged_sp_div_yd+log_indprod_growth_lastyear+infl_lastyear+ex_mkt_lastyear',
#                              data=ts)

mod1 = smf.ols('log_indprod_growth_nextyear ~ 1+ex_mkt+ex_long_gov_ret+ex_medium_gov_ret+ex_high_yd_bd_ret+ex_gold_ret+slope_ex_mkt_87+slope_ex_mkt_9602+rf+lag_10y_3m_gov_bd_yd+lag_1y_3m_gov_bd_yd+lag_Baa_Aaa_bd_yd+lag_sp_div_yd+indprod_growth_lastyear+infl_lastyear+ex_mkt_lastyear',
                data=ts)

mod2 = smf.ols('log_indprod_growth_nextyear ~ 1+ex_mkt+ex_long_gov_ret+ex_medium_gov_ret+ex_high_yd_bd_ret+ex_gold_ret+slope_ex_mkt_87+slope_ex_mkt_9602+rf+lag_10y_3m_gov_bd_yd+lag_1y_3m_gov_bd_yd+lag_Baa_Aaa_bd_yd+lag_sp_div_yd+indprod_growth_lastyear+infl_lastyear+ex_mkt_lastyear+ lag_ex_mkt+lag_ex_long_gov_ret+lag_ex_medium_gov_ret+lag_ex_high_yd_bd_ret+lag_ex_gold_ret+lag_slope_ex_mkt_87+lag_slope_ex_mkt_9602',
                data=ts)

reg1 = mod1.fit(cov_type='HAC',cov_kwds={'maxlags':11})
reg2 = mod2.fit(cov_type='HAC',cov_kwds={'maxlags':11})
print(reg1.summary().tables[0],reg1.summary().tables[1])
print(reg2.summary().tables[0],reg2.summary().tables[1])
# print(compare({'model_A_name': reg1, 'model_B_name': reg2 }, stars = True))

                                 OLS Regression Results                                
Dep. Variable:     log_indprod_growth_nextyear   R-squared:                       0.381
Model:                                     OLS   Adj. R-squared:                  0.348
Method:                          Least Squares   F-statistic:                     6.070
Date:                         Thu, 13 Apr 2023   Prob (F-statistic):           5.70e-11
Time:                                 10:27:27   Log-Likelihood:                 637.64
No. Observations:                          292   AIC:                            -1243.
Df Residuals:                              276   BIC:                            -1184.
Df Model:                                   15                                         
Covariance Type:                           HAC                                         
                              coef    std err          z      P>|z|      [0.025      0.975]
----------------------------

# Summary statistics

In [4]:
var_name = (['Modified Jones DA', 
             'Dechow and Dichev DA', 
             'Ln(Total assets)', 
             'Ln(Market capitalization)',
             'Market to book', 
             'Return on assets', 
             'Sales growth'])

stat_name = (['N', 
             'Mean', 
             'Std. dev.', 
             'Min.', 
             '25\%', 
             'Median',
             '75\%',
             'Max.'])

summary_stats = smp_da[['mj_da', 'dd_da', 'ln_ta', 'ln_mktcap', 'mtb', 'ebit_avgta', 'sales_growth']].describe().T

summary_stats.index = var_name
summary_stats.columns = stat_name

summary_stats.N = summary_stats.N.astype(int)

print(summary_stats.style.format(decimal='.', thousands=',', precision=3).to_latex())
summary_stats

\begin{tabular}{lrrrrrrrr}
 & N & Mean & Std. dev. & Min. & 25\% & Median & 75\% & Max. \\
Modified Jones DA & 57,163 & 0.006 & 0.237 & -1.927 & -0.047 & 0.017 & 0.088 & 1.027 \\
Dechow and Dichev DA & 57,163 & 0.001 & 0.096 & -0.490 & -0.032 & 0.002 & 0.038 & 0.385 \\
Ln(Total assets) & 57,163 & 4.568 & 2.403 & -2.207 & 2.871 & 4.466 & 6.219 & 11.770 \\
Ln(Market capitalization) & 57,163 & 4.558 & 2.421 & -1.552 & 2.794 & 4.439 & 6.284 & 11.924 \\
Market to book & 57,163 & 2.786 & 8.532 & -125.299 & 0.929 & 1.801 & 3.485 & 123.185 \\
Return on assets & 57,163 & -0.125 & 0.643 & -12.152 & -0.104 & 0.042 & 0.088 & 0.897 \\
Sales growth & 57,163 & 1.013 & 0.471 & -4.642 & 0.988 & 1.013 & 1.055 & 5.902 \\
\end{tabular}



Unnamed: 0,N,Mean,Std. dev.,Min.,25\%,Median,75\%,Max.
Modified Jones DA,57163,0.006109,0.237157,-1.926686,-0.047093,0.017298,0.087554,1.026649
Dechow and Dichev DA,57163,0.00112,0.095979,-0.489783,-0.032441,0.001534,0.038344,0.385274
Ln(Total assets),57163,4.568192,2.402711,-2.207275,2.871331,4.466322,6.219185,11.770022
Ln(Market capitalization),57163,4.557544,2.421399,-1.552481,2.793788,4.439238,6.283627,11.92433
Market to book,57163,2.786015,8.532028,-125.299106,0.928928,1.801402,3.485184,123.184914
Return on assets,57163,-0.12516,0.642789,-12.152318,-0.104129,0.041673,0.088155,0.896939
Sales growth,57163,1.012775,0.471146,-4.642257,0.98781,1.013323,1.055299,5.902028


# Regression

In [5]:
mod1 = PanelOLS.from_formula('mj_da ~ EntityEffects + TimeEffects + ln_ta + mtb + ebit_avgta + sales_growth',
                            data=smp_da.set_index(['gvkey', 'fyear'])
                           )
reg1 = mod1.fit(cov_type='clustered', cluster_entity=True, cluster_time=True)

mod2 = PanelOLS.from_formula('dd_da ~ EntityEffects + TimeEffects + ln_ta + mtb + ebit_avgta + sales_growth',
                            data=smp_da.set_index(['gvkey', 'fyear'])
                           )
reg2 = mod2.fit(cov_type='clustered', cluster_entity=True, cluster_time=True)

In [6]:
print(reg1.summary.tables[1],reg2.summary.tables[1])

                              Parameter Estimates                               
              Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
--------------------------------------------------------------------------------
ebit_avgta       0.2322     0.0258     8.9914     0.0000      0.1816      0.2828
ln_ta           -0.0410     0.0052    -7.9027     0.0000     -0.0512     -0.0308
mtb           2.526e-05     0.0002     0.1172     0.9067     -0.0004      0.0004
sales_growth     0.0098     0.0043     2.2714     0.0231      0.0013      0.0182
              Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
--------------------------------------------------------------------------------
ebit_avgta       0.0492     0.0079     6.2288     0.0000      0.0337      0.0647
ln_ta            0.0035     0.0012     2.9030     0.0037      0.0011      0.0059
mtb              0.0005  8.469e-05     5.9695     0.0000      0.0003      0.0007
sales_growth     0.0101     

In [7]:
print(compare({'model_A_name': reg1, 'model_B_name': reg2 }, stars = True))

                  Model Comparison                  
                          model_A_name  model_B_name
----------------------------------------------------
Dep. Variable                    mj_da         dd_da
Estimator                     PanelOLS      PanelOLS
No. Observations                 57163         57163
Cov. Est.                    Clustered     Clustered
R-squared                       0.1890        0.0590
R-Squared (Within)              0.1752        0.0587
R-Squared (Between)            -0.6533       -0.0607
R-Squared (Overall)            -0.6454       -0.0257
F-statistic                     2811.9        756.65
P-value (F-stat)                0.0000        0.0000
ebit_avgta                   0.2322***     0.0492***
                              (8.9914)      (6.2288)
ln_ta                       -0.0410***     0.0035***
                             (-7.9027)      (2.9030)
mtb                          2.526e-05     0.0005***
                              (0.1172)      (5

In [8]:
prova3 = smf.ols('dd_da ~ ln_ta + mtb + ebit_avgta + sales_growth',data=smp_da).fit()

In [9]:
prova = pystout(models=[reg1,reg2,prova3],
        file='test_table.tex',
        addnotes=['Here is a little note','And another one'],
        digits=3,
        modstat={'nobs':'Obs','rsquared_adj':'Adj. R\sym{2}','fvalue':'F-stat'}
        )
print(prova)


\hline
Obs & 57163 & 57163 & 57163 \\
Adj. R\sym{2} &  &  & 0.054 \\
F-stat & 2811.931 & 756.648 & 812.885 \\
\hline\hline
\multicolumn{4}{l}{\footnotesize Here is a little note}\vspace{-.25em} \\
\multicolumn{4}{l}{\footnotesize And another one}
\end{tabular}
}


  options = options.append(pd.DataFrame([r],index=[value]))
  options = options.append(pd.DataFrame([r],index=[value]))
  options = options.append(pd.DataFrame([r],index=[value]))


In [10]:
prova

'\n\\hline\nObs & 57163 & 57163 & 57163 \\\\\nAdj. R\\sym{2} &  &  & 0.054 \\\\\nF-stat & 2811.931 & 756.648 & 812.885 \\\\\n\\hline\\hline\n\\multicolumn{4}{l}{\\footnotesize Here is a little note}\\vspace{-.25em} \\\\\n\\multicolumn{4}{l}{\\footnotesize And another one}\n\\end{tabular}\n}'