In [243]:
resource = "../../data/generated/"
results = "../../results/"

In [244]:
import pandas as pd
import numpy as np
from linearmodels.panel import PanelOLS, compare
import statsmodels.formula.api as smf
import statsmodels.api as sm
from statsmodels.tsa.api import VAR
# from tabprintin.beautify import *

In [245]:
# Set the start and end dates of the analysis period
start_date = pd.to_datetime('1984-01-01')
end_date = pd.to_datetime('2008-04-30')

In [246]:
ts = pd.read_csv(resource + 'time_series.csv', parse_dates=['date'], index_col=['date'])
ts.index.freq = 'M'

# Compute the log change of industrial production over next 12 months (or just growth)
ts['log_indprod_growth_nextyear'] = np.log(ts['ind_prod'].shift(-12) / ts['ind_prod'].shift(-1))
# ts['indprod_growth_nextyear'] = ts['ind_prod'].shift(-12) / ts['ind_prod'].shift(-1) - 1

##################
## Base assets ###
##################

# Market return
# ts['ex_mkt'] = ts['ex_mkt'] /100
ts['lag_ex_mkt'] = ts['ex_mkt'].shift(1)

# Compute the excess return of the long-term government bond portfolio
ts['ex_long_gov_ret'] = ts['long_gov_ret'] - ts['rf']
ts['lag_ex_long_gov_ret'] = ts['ex_long_gov_ret'].shift(1)

# Compute the excess return of the intermediate-term government bond portfolio
ts['ex_medium_gov_ret'] = ts['medium_gov_ret'] - ts['rf']
ts['lag_ex_medium_gov_ret'] = ts['ex_medium_gov_ret'].shift(1)

# Compute the excess return of the high-yield bond portfolio
ts['ex_high_yd_bd_ret'] = ts['high_yd_bd_ret'] - ts['rf']
ts['lag_ex_high_yd_bd_ret'] = ts['ex_high_yd_bd_ret'].shift(1)

# Compute the return for gold index
ts['ex_gold_ret'] = ts['gold'].pct_change() - ts['rf']
ts['lag_ex_gold_ret'] = ts['ex_gold_ret'].shift(1)

# Create dummies for 1987 (stock market crash) and 1996-2002 (Internet bubble period)
ts['dummy_87'] = (ts.index.year == 1987).astype(int)
ts['dummy_96_02'] = ((ts.index.year >= 1996) & (ts.index.year <= 2002)).astype(int)

#########################
### Control Variables ###
#########################
# Compute the 10 year minus 3 month government bond yield
ts['lag_10y_3m_gov_bd_yd'] = (ts['DGS10'] - ts['DTB3']).shift(1)

# Compute the 1 year minus 3 month government bond yield
ts['lag_1y_3m_gov_bd_yd'] = (ts['DGS1'] - ts['DTB3']).shift(1)

# Baa minus Aaa corporate bond yield
ts['lag_Baa_Aaa_bd_yd'] = (ts['BAA'] - ts['AAA']).shift(1)

# Compute the dividend yield on the S&P 500 index
ts['lag_sp_div_yd'] = (ts['sp_div'] / ts['sp_price']).shift(1)

# Compute the log change of industrial production over last 12 months (or just growth)
ts['log_indprod_growth_lastyear'] = np.log(ts['ind_prod'].shift(13) / ts['ind_prod'].shift(1))
# ts['indprod_growth_lastyear'] = ts['ind_prod'].shift(13) / ts['ind_prod'].shift(1)  - 1


# Compute the inflation over last 12 months
# ts['infl_lastyear'] = (ts['cpi'].shift(1) - ts['cpi'].shift(13)) / ts['cpi'].shift(13)
ts['infl_lastyear'] = np.log(ts['cpi'].shift(1) / ts['cpi'].shift(13))

# Compute the market portfolio excess return over last 12 months
# [Controllare e sbagliato]
ts['ex_mkt_lastyear'] = (((ts['ex_mkt'] + 100)/100).rolling(13).apply(lambda x: x[:-1].prod()) - 1) * 100

# Interactions
ts['slope_ex_mkt_87'] = ts['ex_mkt'] * ts['dummy_87']
ts['slope_ex_mkt_9602'] = ts['ex_mkt'] * ts['dummy_96_02']

ts['lag_slope_ex_mkt_87'] = ts['slope_ex_mkt_87'].shift(1)
ts['lag_slope_ex_mkt_9602'] = ts['slope_ex_mkt_9602'].shift(1)

In [247]:
ts1 = ts.loc[(ts.index >= start_date) & (ts.index <= end_date)]

In [248]:
# mod1 = PanelOLS.from_formula('log_indprod_growth_nextyear ~ 1+ex_mkt+ex_long_gov_ret+ex_medium_gov_ret+ex_high_yd_bd_ret+ex_gold_ret+ex_mkt*dummy_87+ex_mkt*dummy_96_02+lagged_10y_3m_gov_bd_yd+lagged_1y_3m_gov_bd_yd+lagged_Baa_Aaa_bd_yd+lagged_sp_div_yd+log_indprod_growth_lastyear+infl_lastyear+ex_mkt_lastyear',
#                              data=ts)

mod1 = smf.ols('log_indprod_growth_nextyear ~ 1+ex_mkt+ex_long_gov_ret+ex_medium_gov_ret+ex_high_yd_bd_ret+ex_gold_ret+slope_ex_mkt_87+slope_ex_mkt_9602+rf+lag_10y_3m_gov_bd_yd+lag_1y_3m_gov_bd_yd+lag_Baa_Aaa_bd_yd+lag_sp_div_yd+log_indprod_growth_lastyear+infl_lastyear+ex_mkt_lastyear',
                data=ts1)

mod2 = smf.ols('log_indprod_growth_nextyear ~ 1+ex_mkt+ex_long_gov_ret+ex_medium_gov_ret+ex_high_yd_bd_ret+ex_gold_ret+slope_ex_mkt_87+slope_ex_mkt_9602+rf+lag_10y_3m_gov_bd_yd+lag_1y_3m_gov_bd_yd+lag_Baa_Aaa_bd_yd+lag_sp_div_yd+log_indprod_growth_lastyear+infl_lastyear+ex_mkt_lastyear+ lag_ex_mkt+lag_ex_long_gov_ret+lag_ex_medium_gov_ret+lag_ex_high_yd_bd_ret+lag_ex_gold_ret+lag_slope_ex_mkt_87+lag_slope_ex_mkt_9602',
                data=ts1)

reg1 = mod1.fit(cov_type='HAC',cov_kwds={'maxlags':11})
reg2 = mod2.fit(cov_type='HAC',cov_kwds={'maxlags':11})
print(reg1.summary().tables[0],reg1.summary().tables[1])
print(reg2.summary().tables[0],reg2.summary().tables[1])
# print(compare({'model_A_name': reg1, 'model_B_name': reg2 }, stars = True))

                                 OLS Regression Results                                
Dep. Variable:     log_indprod_growth_nextyear   R-squared:                       0.381
Model:                                     OLS   Adj. R-squared:                  0.348
Method:                          Least Squares   F-statistic:                     6.051
Date:                         Thu, 13 Apr 2023   Prob (F-statistic):           6.26e-11
Time:                                 23:02:00   Log-Likelihood:                 637.59
No. Observations:                          292   AIC:                            -1243.
Df Residuals:                              276   BIC:                            -1184.
Df Model:                                   15                                         
Covariance Type:                           HAC                                         
                                  coef    std err          z      P>|z|      [0.025      0.975]
------------------------

Summary statistics

Granger causality

In [249]:
# Get the one-year ahead industrial production growth expectatitions factor
coef = reg2.params
ts['myp'] = coef['ex_mkt'] * ts['ex_mkt'] + coef['ex_long_gov_ret'] * ts['ex_long_gov_ret'] + coef['ex_medium_gov_ret'] * ts['ex_medium_gov_ret'] + coef['ex_high_yd_bd_ret'] * ts['ex_high_yd_bd_ret'] + coef['ex_gold_ret'] * ts['ex_gold_ret'] + coef['slope_ex_mkt_87'] * ts['slope_ex_mkt_87'] + coef['slope_ex_mkt_9602'] * ts['slope_ex_mkt_9602']

# Get the unexpected inflation factor
ts['infl'] = np.log(ts['cpi'] / ts['cpi'].shift(1))
ts['delta_infl'] = ts['infl'] - ts['infl'].shift(1)
inf_ma1 = sm.tsa.arima.ARIMA(ts['delta_infl'], order=(0,0,1)).fit()
ts['fit_delta_infl'] = inf_ma1.fittedvalues
ts['ui'] = ts['delta_infl'] - ts['fit_delta_infl']

# Get the change in the aggregate survival probability factor
######### ts['dsv']

# Get the change in the average level of the term structure factor
ts['mean_term_structure'] = ts[['DTB3', 'DGS10']].mean(axis=1)
ts['ats'] = ts['mean_term_structure'] - ts['mean_term_structure'].shift(1)

# Get the change in the slope of the term strucutre factor
ts['diff_term_structure'] = ts['DGS10'] - ts['DTB3']
ts['sts'] = ts['diff_term_structure'] - ts['diff_term_structure'].shift(1)

# Get the change in the multilateral US dollar exchange rate factor
mod3 = smf.ols('TWEXBMTH ~ 1 + TWEXBGSMTH', data=ts).fit()
parm = mod3.params
parm
ts['fit_TWEXBMTH'] = parm['Intercept'] + parm['TWEXBGSMTH'] * ts['TWEXBGSMTH']
ts['exchange_rate'] = np.where(ts['TWEXBMTH'].notna(), ts['TWEXBMTH'], ts['fit_TWEXBMTH'])
ts['fx'] = ts['exchange_rate'] - ts['exchange_rate'].shift(1)

In [250]:
# Set the start and end dates of the analysis period
start_date = pd.to_datetime('1984-01-01') ##############
end_date = pd.to_datetime('1999-12-31') ##############
ts2 = ts.loc[(ts.index >= start_date) & (ts.index <= end_date)]

In [251]:
# Create VAR model with constant term
model = VAR(ts2[['hml','smb','mom','myp','ui','dsv','ats','sts','fx']])
results = model.fit(maxlags=1, trend='c')

# Print summary table
print(results.summary())

  Summary of Regression Results   
Model:                         VAR
Method:                        OLS
Date:           Thu, 13, Apr, 2023
Time:                     23:02:00
--------------------------------------------------------------------
No. of Equations:         9.00000    BIC:                   -87.5071
Nobs:                     191.000    HQIC:                  -88.4189
Log likelihood:           6154.13    FPE:                2.14275e-39
AIC:                     -89.0396    Det(Omega_mle):     1.35365e-39
--------------------------------------------------------------------
Results for equation hml
            coefficient       std. error           t-stat            prob
-------------------------------------------------------------------------
const         -0.000521         0.001947           -0.268           0.789
L1.hml         0.323533         0.084793            3.816           0.000
L1.smb         0.019084         0.083014            0.230           0.818
L1.mom        -0

# Summary statistics

In [252]:
# var_name = (['Modified Jones DA', 
#              'Dechow and Dichev DA', 
#              'Ln(Total assets)', 
#              'Ln(Market capitalization)',
#              'Market to book', 
#              'Return on assets', 
#              'Sales growth'])

# stat_name = (['N', 
#              'Mean', 
#              'Std. dev.', 
#              'Min.', 
#              '25\%', 
#              'Median',
#              '75\%',
#              'Max.'])

# summary_stats = smp_da[['mj_da', 'dd_da', 'ln_ta', 'ln_mktcap', 'mtb', 'ebit_avgta', 'sales_growth']].describe().T

# summary_stats.index = var_name
# summary_stats.columns = stat_name

# summary_stats.N = summary_stats.N.astype(int)

# print(summary_stats.style.format(decimal='.', thousands=',', precision=3).to_latex())
# summary_stats

# Regression

In [253]:
# mod1 = PanelOLS.from_formula('mj_da ~ EntityEffects + TimeEffects + ln_ta + mtb + ebit_avgta + sales_growth',
#                             data=smp_da.set_index(['gvkey', 'fyear'])
#                            )
# reg1 = mod1.fit(cov_type='clustered', cluster_entity=True, cluster_time=True)

# mod2 = PanelOLS.from_formula('dd_da ~ EntityEffects + TimeEffects + ln_ta + mtb + ebit_avgta + sales_growth',
#                             data=smp_da.set_index(['gvkey', 'fyear'])
#                            )
# reg2 = mod2.fit(cov_type='clustered', cluster_entity=True, cluster_time=True)

In [254]:
# print(reg1.summary.tables[1],reg2.summary.tables[1])

In [255]:
# print(compare({'model_A_name': reg1, 'model_B_name': reg2 }, stars = True))

In [256]:
# prova3 = smf.ols('dd_da ~ ln_ta + mtb + ebit_avgta + sales_growth',data=smp_da).fit()

In [257]:
# prova = pystout(models=[reg1,reg2,prova3],
#         file='test_table.tex',
#         addnotes=['Here is a little note','And another one'],
#         digits=3,
#         modstat={'nobs':'Obs','rsquared_adj':'Adj. R\sym{2}','fvalue':'F-stat'}
#         )
# print(prova)

In [258]:
# prova