In [126]:
import pandas as pd
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.tsa.vector_ar.vecm import coint_johansen
# Import Statsmodels
from statsmodels.tsa.api import VAR, VARMAX
from statsmodels.tsa.stattools import adfuller
from statsmodels.tools.eval_measures import rmse, aic

from statsmodels.tsa.stattools import grangercausalitytests
from statsmodels.stats.stattools import durbin_watson

In [127]:
def cointegration_test(df, alpha=0.05): 
    """Perform Johanson's Cointegration Test and Report Summary"""
    out = coint_johansen(df,-1,5)
    d = {'0.90':0, '0.95':1, '0.99':2}
    traces = out.lr1
    cvts = out.cvt[:, d[str(1-alpha)]]
    def adjust(val, length= 6): return str(val).ljust(length)

    # Summary
    print('Name   ::  Test Stat > C(95%)    =>   Signif  \n', '--'*20)
    for col, trace, cvt in zip(df.columns, traces, cvts):
        print(adjust(col), ':: ', adjust(round(trace,2), 9), ">", adjust(cvt, 8), ' =>  ' , trace > cvt)
        

In [128]:
maxlag=12
test = 'ssr_chi2test'
def grangers_causation_matrix(data, variables, test='ssr_chi2test', verbose=False):    
    """Check Granger Causality of all possible combinations of the Time series.
    The rows are the response variable, columns are predictors. The values in the table 
    are the P-Values. P-Values lesser than the significance level (0.05), implies 
    the Null Hypothesis that the coefficients of the corresponding past values is 
    zero, that is, the X does not cause Y can be rejected.

    data      : pandas dataframe containing the time series variables
    variables : list containing names of the time series variables.
    """
    df = pd.DataFrame(np.zeros((len(variables), len(variables))), columns=variables, index=variables)
    for c in df.columns:
        for r in df.index:
            test_result = grangercausalitytests(data[[r, c]], maxlag=maxlag)
            p_values = [round(test_result[i+1][0][test][1],4) for i in range(maxlag)]
            if verbose: print(f'Y = {r}, X = {c}, P Values = {p_values}')
            min_p_value = np.min(p_values)
            df.loc[r, c] = min_p_value
    df.columns = [var + '_x' for var in variables]
    df.index = [var + '_y' for var in variables]
    return df

In [129]:
def adfuller_test(series, signif=0.05, name='', verbose=False):
    """Perform ADFuller to test for Stationarity of given series and print report"""
    r = adfuller(series, autolag='AIC')
    output = {'test_statistic':round(r[0], 4), 'pvalue':round(r[1], 4), 'n_lags':round(r[2], 4), 'n_obs':r[3]}
    p_value = output['pvalue'] 
    def adjust(val, length= 6): return str(val).ljust(length)

    # Print Summary
    print(f'    Augmented Dickey-Fuller Test on "{name}"', "\n   ", '-'*47)
    print(f' Null Hypothesis: Data has unit root. Non-Stationary.')
    print(f' Significance Level    = {signif}')
    print(f' Test Statistic        = {output["test_statistic"]}')
    print(f' No. Lags Chosen       = {output["n_lags"]}')

    for key,val in r[4].items():
        print(f' Critical value {adjust(key)} = {round(val, 3)}')

    if p_value <= signif:
        print(f" => P-Value = {p_value}. Rejecting Null Hypothesis.")
        print(f" => Series is Stationary.")
    else:
        print(f" => P-Value = {p_value}. Weak evidence to reject the Null Hypothesis.")
        print(f" => Series is Non-Stationary.")

# Import data

In [197]:
df = pd.read_csv("quaterly_data.csv", parse_dates=['date'], index_col='date')
df = df.fillna(df.mean())
df.head()

Unnamed: 0_level_0,us bankruptcy,EFFR,us employment cost index,GDP(Billions),us unemployment rate,"Interest Rates: Long-Term Government Bond Yields: 10-Year: Main (Including Benchmark) for United States, Percent, Quarterly, Not Seasonally Adjusted",adults investing money in the stock market(%),HSCI,CCMP Index,NYA Index,us market cap,hk bankruptcy
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2000-03-31,312335,5.85,118.325275,10002.179,4.087491,6.48,62,19994140000000.0,24100000000000.0,32800000000000.0,56900000000000.0,1106
2000-06-30,321729,6.53,118.325275,10247.72,3.984665,6.176667,62,19994140000000.0,22300000000000.0,31900000000000.0,54100000000000.0,1054
2000-09-30,308718,6.52,118.325275,10318.165,4.058545,5.893333,62,19994140000000.0,23000000000000.0,33100000000000.0,56100000000000.0,1198
2000-12-31,310169,6.4,118.325275,10435.744,3.949703,5.566667,62,19994140000000.0,20600000000000.0,32000000000000.0,52700000000000.0,1198
2001-03-31,366841,5.31,87.6,10470.231,4.292366,5.05,62,19994140000000.0,17800000000000.0,31700000000000.0,49500000000000.0,1428


# Testing Causation using Granger’s Causality Test

In [198]:
grangers_causation_matrix(df, variables = df.columns)


Granger Causality
number of lags (no zero) 1
ssr based F test:         F=-0.0000 , p=1.0000  , df_denom=92, df_num=1
ssr based chi2 test:   chi2=-0.0000 , p=1.0000  , df=1
likelihood ratio test: chi2=-0.0000 , p=1.0000  , df=1
parameter F test:         F=247.2735, p=0.0000  , df_denom=92, df_num=1

Granger Causality
number of lags (no zero) 2
ssr based F test:         F=0.0000  , p=1.0000  , df_denom=90, df_num=2
ssr based chi2 test:   chi2=0.0000  , p=1.0000  , df=2
likelihood ratio test: chi2=-0.0000 , p=1.0000  , df=2
parameter F test:         F=123.2064, p=0.0000  , df_denom=90, df_num=2

Granger Causality
number of lags (no zero) 3
ssr based F test:         F=0.0000  , p=1.0000  , df_denom=88, df_num=3
ssr based chi2 test:   chi2=0.0000  , p=1.0000  , df=3
likelihood ratio test: chi2=-0.0000 , p=1.0000  , df=3
parameter F test:         F=81.8176 , p=0.0000  , df_denom=88, df_num=3

Granger Causality
number of lags (no zero) 4
ssr based F test:         F=0.0000  , p=1.0000  , df_d

ssr based chi2 test:   chi2=1.0159  , p=0.9074  , df=4
likelihood ratio test: chi2=1.0102  , p=0.9082  , df=4
parameter F test:         F=0.2288  , p=0.9215  , df_denom=82, df_num=4

Granger Causality
number of lags (no zero) 5
ssr based F test:         F=0.7468  , p=0.5909  , df_denom=79, df_num=5
ssr based chi2 test:   chi2=4.2537  , p=0.5135  , df=5
likelihood ratio test: chi2=4.1562  , p=0.5272  , df=5
parameter F test:         F=0.7468  , p=0.5909  , df_denom=79, df_num=5

Granger Causality
number of lags (no zero) 6
ssr based F test:         F=0.4919  , p=0.8125  , df_denom=76, df_num=6
ssr based chi2 test:   chi2=3.4563  , p=0.7498  , df=6
likelihood ratio test: chi2=3.3909  , p=0.7584  , df=6
parameter F test:         F=0.4919  , p=0.8125  , df_denom=76, df_num=6

Granger Causality
number of lags (no zero) 7
ssr based F test:         F=0.4843  , p=0.8430  , df_denom=73, df_num=7
ssr based chi2 test:   chi2=4.0867  , p=0.7697  , df=7
likelihood ratio test: chi2=3.9946  , p=0.780

ssr based F test:         F=1.2912  , p=0.2582  , df_denom=67, df_num=9
ssr based chi2 test:   chi2=14.9158 , p=0.0933  , df=9
likelihood ratio test: chi2=13.7548 , p=0.1313  , df=9
parameter F test:         F=1.2912  , p=0.2582  , df_denom=67, df_num=9

Granger Causality
number of lags (no zero) 10
ssr based F test:         F=1.2146  , p=0.2990  , df_denom=64, df_num=10
ssr based chi2 test:   chi2=16.1310 , p=0.0959  , df=10
likelihood ratio test: chi2=14.7701 , p=0.1407  , df=10
parameter F test:         F=1.2146  , p=0.2990  , df_denom=64, df_num=10

Granger Causality
number of lags (no zero) 11
ssr based F test:         F=1.1903  , p=0.3129  , df_denom=61, df_num=11
ssr based chi2 test:   chi2=18.0298 , p=0.0809  , df=11
likelihood ratio test: chi2=16.3337 , p=0.1292  , df=11
parameter F test:         F=1.1903  , p=0.3129  , df_denom=61, df_num=11

Granger Causality
number of lags (no zero) 12
ssr based F test:         F=1.0987  , p=0.3787  , df_denom=58, df_num=12
ssr based chi2 t

ssr based F test:         F=2.7490  , p=0.0478  , df_denom=85, df_num=3
ssr based chi2 test:   chi2=8.9262  , p=0.0303  , df=3
likelihood ratio test: chi2=8.5193  , p=0.0364  , df=3
parameter F test:         F=2.7490  , p=0.0478  , df_denom=85, df_num=3

Granger Causality
number of lags (no zero) 4
ssr based F test:         F=2.0768  , p=0.0913  , df_denom=82, df_num=4
ssr based chi2 test:   chi2=9.2190  , p=0.0559  , df=4
likelihood ratio test: chi2=8.7813  , p=0.0668  , df=4
parameter F test:         F=2.0768  , p=0.0913  , df_denom=82, df_num=4

Granger Causality
number of lags (no zero) 5
ssr based F test:         F=4.3362  , p=0.0015  , df_denom=79, df_num=5
ssr based chi2 test:   chi2=24.6999 , p=0.0002  , df=5
likelihood ratio test: chi2=21.8259 , p=0.0006  , df=5
parameter F test:         F=4.3362  , p=0.0015  , df_denom=79, df_num=5

Granger Causality
number of lags (no zero) 6
ssr based F test:         F=5.5390  , p=0.0001  , df_denom=76, df_num=6
ssr based chi2 test:   chi2=

ssr based F test:         F=1.6452  , p=0.1140  , df_denom=64, df_num=10
ssr based chi2 test:   chi2=21.8508 , p=0.0159  , df=10
likelihood ratio test: chi2=19.4464 , p=0.0349  , df=10
parameter F test:         F=1.6452  , p=0.1140  , df_denom=64, df_num=10

Granger Causality
number of lags (no zero) 11
ssr based F test:         F=1.5470  , p=0.1384  , df_denom=61, df_num=11
ssr based chi2 test:   chi2=23.4332 , p=0.0153  , df=11
likelihood ratio test: chi2=20.6684 , p=0.0370  , df=11
parameter F test:         F=1.5470  , p=0.1384  , df_denom=61, df_num=11

Granger Causality
number of lags (no zero) 12
ssr based F test:         F=1.4741  , p=0.1606  , df_denom=58, df_num=12
ssr based chi2 test:   chi2=25.3143 , p=0.0134  , df=12
likelihood ratio test: chi2=22.0943 , p=0.0365  , df=12
parameter F test:         F=1.4741  , p=0.1606  , df_denom=58, df_num=12

Granger Causality
number of lags (no zero) 1
ssr based F test:         F=1.0242  , p=0.3142  , df_denom=91, df_num=1
ssr based chi2


covariance of constraints does not have full rank. The number of constraints is 10, but rank is 9


covariance of constraints does not have full rank. The number of constraints is 11, but rank is 10


covariance of constraints does not have full rank. The number of constraints is 12, but rank is 8



ssr based F test:         F=4.6615  , p=0.0001  , df_denom=67, df_num=9
ssr based chi2 test:   chi2=53.8504 , p=0.0000  , df=9
likelihood ratio test: chi2=41.8154 , p=0.0000  , df=9
parameter F test:         F=4.6615  , p=0.0001  , df_denom=67, df_num=9

Granger Causality
number of lags (no zero) 10
ssr based F test:         F=5.4594  , p=0.0000  , df_denom=64, df_num=10
ssr based chi2 test:   chi2=72.5082 , p=0.0000  , df=10
likelihood ratio test: chi2=52.4302 , p=0.0000  , df=10
parameter F test:         F=5.4594  , p=0.0000  , df_denom=64, df_num=10

Granger Causality
number of lags (no zero) 11
ssr based F test:         F=4.9869  , p=0.0000  , df_denom=61, df_num=11
ssr based chi2 test:   chi2=75.5400 , p=0.0000  , df=11
likelihood ratio test: chi2=53.8841 , p=0.0000  , df=11
parameter F test:         F=4.9869  , p=0.0000  , df_denom=61, df_num=11

Granger Causality
number of lags (no zero) 12
ssr based F test:         F=4.7777  , p=0.0000  , df_denom=58, df_num=12
ssr based chi2 t

likelihood ratio test: chi2=9.7027  , p=0.0457  , df=4
parameter F test:         F=2.3347  , p=0.0622  , df_denom=83, df_num=4

Granger Causality
number of lags (no zero) 5
ssr based F test:         F=3.0659  , p=0.0139  , df_denom=80, df_num=5
ssr based chi2 test:   chi2=17.2454 , p=0.0041  , df=5
likelihood ratio test: chi2=15.7779 , p=0.0075  , df=5
parameter F test:         F=3.0681  , p=0.0139  , df_denom=80, df_num=5

Granger Causality
number of lags (no zero) 6
ssr based F test:         F=2.8593  , p=0.0144  , df_denom=77, df_num=6
ssr based chi2 test:   chi2=19.8298 , p=0.0030  , df=6
likelihood ratio test: chi2=17.9022 , p=0.0065  , df=6
parameter F test:         F=2.8597  , p=0.0144  , df_denom=77, df_num=6

Granger Causality
number of lags (no zero) 7
ssr based F test:         F=2.3439  , p=0.0323  , df_denom=74, df_num=7
ssr based chi2 test:   chi2=19.5117 , p=0.0067  , df=7
likelihood ratio test: chi2=17.6232 , p=0.0138  , df=7
parameter F test:         F=2.3451  , p=0.032

ssr based F test:         F=1.4937  , p=0.1530  , df_denom=58, df_num=12
ssr based chi2 test:   chi2=25.6497 , p=0.0120  , df=12
likelihood ratio test: chi2=22.3509 , p=0.0338  , df=12
parameter F test:         F=1.4937  , p=0.1530  , df_denom=58, df_num=12

Granger Causality
number of lags (no zero) 1
ssr based F test:         F=4.4932  , p=0.0368  , df_denom=91, df_num=1
ssr based chi2 test:   chi2=4.6413  , p=0.0312  , df=1
likelihood ratio test: chi2=4.5304  , p=0.0333  , df=1
parameter F test:         F=4.4932  , p=0.0368  , df_denom=91, df_num=1

Granger Causality
number of lags (no zero) 2
ssr based F test:         F=2.3406  , p=0.1022  , df_denom=88, df_num=2
ssr based chi2 test:   chi2=4.9472  , p=0.0843  , df=2
likelihood ratio test: chi2=4.8201  , p=0.0898  , df=2
parameter F test:         F=2.3406  , p=0.1022  , df_denom=88, df_num=2

Granger Causality
number of lags (no zero) 3
ssr based F test:         F=1.6657  , p=0.1805  , df_denom=85, df_num=3
ssr based chi2 test:   c

ssr based F test:         F=1.4307  , p=0.2224  , df_denom=79, df_num=5
ssr based chi2 test:   chi2=8.1496  , p=0.1482  , df=5
likelihood ratio test: chi2=7.8015  , p=0.1675  , df=5
parameter F test:         F=1.4307  , p=0.2224  , df_denom=79, df_num=5

Granger Causality
number of lags (no zero) 6
ssr based F test:         F=1.2612  , p=0.2854  , df_denom=76, df_num=6
ssr based chi2 test:   chi2=8.8613  , p=0.1815  , df=6
likelihood ratio test: chi2=8.4474  , p=0.2071  , df=6
parameter F test:         F=1.2612  , p=0.2854  , df_denom=76, df_num=6

Granger Causality
number of lags (no zero) 7
ssr based F test:         F=1.1001  , p=0.3723  , df_denom=73, df_num=7
ssr based chi2 test:   chi2=9.2832  , p=0.2330  , df=7
likelihood ratio test: chi2=8.8255  , p=0.2654  , df=7
parameter F test:         F=1.1001  , p=0.3723  , df_denom=73, df_num=7

Granger Causality
number of lags (no zero) 8
ssr based F test:         F=0.9951  , p=0.4476  , df_denom=70, df_num=8
ssr based chi2 test:   chi2=

ssr based chi2 test:   chi2=18.8101 , p=0.0427  , df=10
likelihood ratio test: chi2=16.9925 , p=0.0745  , df=10
parameter F test:         F=1.4827  , p=0.1654  , df_denom=67, df_num=10

Granger Causality
number of lags (no zero) 11
ssr based F test:         F=1.5846  , p=0.1249  , df_denom=64, df_num=11
ssr based chi2 test:   chi2=22.8782 , p=0.0184  , df=11
likelihood ratio test: chi2=20.2333 , p=0.0422  , df=11
parameter F test:         F=1.5846  , p=0.1249  , df_denom=64, df_num=11

Granger Causality
number of lags (no zero) 12
ssr based F test:         F=1.5459  , p=0.1322  , df_denom=62, df_num=12
ssr based chi2 test:   chi2=24.8349 , p=0.0156  , df=12
likelihood ratio test: chi2=21.7261 , p=0.0407  , df=12
parameter F test:         F=1.5459  , p=0.1322  , df_denom=62, df_num=12

Granger Causality
number of lags (no zero) 1
ssr based F test:         F=2.4217  , p=0.1231  , df_denom=91, df_num=1
ssr based chi2 test:   chi2=2.5015  , p=0.1137  , df=1
likelihood ratio test: chi2=2.46


covariance of constraints does not have full rank. The number of constraints is 12, but rank is 10



parameter F test:         F=1.5943  , p=0.1714  , df_denom=79, df_num=5

Granger Causality
number of lags (no zero) 6
ssr based F test:         F=1.5505  , p=0.1734  , df_denom=76, df_num=6
ssr based chi2 test:   chi2=10.8940 , p=0.0917  , df=6
likelihood ratio test: chi2=10.2771 , p=0.1135  , df=6
parameter F test:         F=1.5505  , p=0.1734  , df_denom=76, df_num=6

Granger Causality
number of lags (no zero) 7
ssr based F test:         F=1.4543  , p=0.1972  , df_denom=73, df_num=7
ssr based chi2 test:   chi2=12.2723 , p=0.0920  , df=7
likelihood ratio test: chi2=11.4886 , p=0.1187  , df=7
parameter F test:         F=1.4543  , p=0.1972  , df_denom=73, df_num=7

Granger Causality
number of lags (no zero) 8
ssr based F test:         F=1.3497  , p=0.2341  , df_denom=70, df_num=8
ssr based chi2 test:   chi2=13.4196 , p=0.0982  , df=8
likelihood ratio test: chi2=12.4801 , p=0.1310  , df=8
parameter F test:         F=1.3497  , p=0.2341  , df_denom=70, df_num=8

Granger Causality
number of

parameter F test:         F=0.7365  , p=0.7103  , df_denom=58, df_num=12

Granger Causality
number of lags (no zero) 1
ssr based F test:         F=3.5526  , p=0.0626  , df_denom=91, df_num=1
ssr based chi2 test:   chi2=3.6697  , p=0.0554  , df=1
likelihood ratio test: chi2=3.5999  , p=0.0578  , df=1
parameter F test:         F=3.5526  , p=0.0626  , df_denom=91, df_num=1

Granger Causality
number of lags (no zero) 2
ssr based F test:         F=1.4740  , p=0.2346  , df_denom=88, df_num=2
ssr based chi2 test:   chi2=3.1156  , p=0.2106  , df=2
likelihood ratio test: chi2=3.0646  , p=0.2160  , df=2
parameter F test:         F=1.4740  , p=0.2346  , df_denom=88, df_num=2

Granger Causality
number of lags (no zero) 3
ssr based F test:         F=0.7574  , p=0.5211  , df_denom=85, df_num=3
ssr based chi2 test:   chi2=2.4594  , p=0.4827  , df=3
likelihood ratio test: chi2=2.4271  , p=0.4886  , df=3
parameter F test:         F=0.7574  , p=0.5211  , df_denom=85, df_num=3

Granger Causality
number o


covariance of constraints does not have full rank. The number of constraints is 2, but rank is 1


covariance of constraints does not have full rank. The number of constraints is 3, but rank is 1


covariance of constraints does not have full rank. The number of constraints is 4, but rank is 1


covariance of constraints does not have full rank. The number of constraints is 5, but rank is 1


covariance of constraints does not have full rank. The number of constraints is 6, but rank is 1


covariance of constraints does not have full rank. The number of constraints is 7, but rank is 1


covariance of constraints does not have full rank. The number of constraints is 8, but rank is 1


covariance of constraints does not have full rank. The number of constraints is 9, but rank is 1


covariance of constraints does not have full rank. The number of constraints is 10, but rank is 1


covariance of constraints does not have full rank. The number of constraints is 11, but rank is 1


covaria

parameter F test:         F=1.6582  , p=0.2012  , df_denom=90, df_num=1

Granger Causality
number of lags (no zero) 3
ssr based F test:         F=-0.0000 , p=1.0000  , df_denom=88, df_num=3
ssr based chi2 test:   chi2=-0.0000 , p=1.0000  , df=3
likelihood ratio test: chi2=-0.0000 , p=1.0000  , df=3
parameter F test:         F=1.9377  , p=0.1674  , df_denom=88, df_num=1

Granger Causality
number of lags (no zero) 4
ssr based F test:         F=0.0000  , p=1.0000  , df_denom=86, df_num=4
ssr based chi2 test:   chi2=0.0000  , p=1.0000  , df=4
likelihood ratio test: chi2=0.0000  , p=1.0000  , df=4
parameter F test:         F=1.9205  , p=0.1694  , df_denom=86, df_num=1

Granger Causality
number of lags (no zero) 5
ssr based F test:         F=-0.0000 , p=1.0000  , df_denom=84, df_num=5
ssr based chi2 test:   chi2=-0.0000 , p=1.0000  , df=5
likelihood ratio test: chi2=-0.0000 , p=1.0000  , df=5
parameter F test:         F=1.5733  , p=0.2132  , df_denom=84, df_num=1

Granger Causality
number of

ssr based F test:         F=0.5333  , p=0.5886  , df_denom=88, df_num=2
ssr based chi2 test:   chi2=1.1272  , p=0.5692  , df=2
likelihood ratio test: chi2=1.1204  , p=0.5711  , df=2
parameter F test:         F=0.5333  , p=0.5885  , df_denom=88, df_num=2

Granger Causality
number of lags (no zero) 3
ssr based F test:         F=0.3955  , p=0.7566  , df_denom=85, df_num=3
ssr based chi2 test:   chi2=1.2842  , p=0.7329  , df=3
likelihood ratio test: chi2=1.2753  , p=0.7350  , df=3
parameter F test:         F=0.3955  , p=0.7566  , df_denom=85, df_num=3

Granger Causality
number of lags (no zero) 4
ssr based F test:         F=0.3394  , p=0.8506  , df_denom=82, df_num=4
ssr based chi2 test:   chi2=1.5067  , p=0.8254  , df=4
likelihood ratio test: chi2=1.4944  , p=0.8276  , df=4
parameter F test:         F=0.3394  , p=0.8506  , df_denom=82, df_num=4

Granger Causality
number of lags (no zero) 5
ssr based F test:         F=0.3467  , p=0.8829  , df_denom=79, df_num=5
ssr based chi2 test:   chi2=


covariance of constraints does not have full rank. The number of constraints is 2, but rank is 1


covariance of constraints does not have full rank. The number of constraints is 3, but rank is 1


covariance of constraints does not have full rank. The number of constraints is 4, but rank is 1


covariance of constraints does not have full rank. The number of constraints is 5, but rank is 1


covariance of constraints does not have full rank. The number of constraints is 6, but rank is 1


covariance of constraints does not have full rank. The number of constraints is 7, but rank is 1


covariance of constraints does not have full rank. The number of constraints is 8, but rank is 1


covariance of constraints does not have full rank. The number of constraints is 9, but rank is 1


covariance of constraints does not have full rank. The number of constraints is 10, but rank is 1


covariance of constraints does not have full rank. The number of constraints is 11, but rank is 1


covaria

ssr based F test:         F=1.5556  , p=0.1539  , df_denom=71, df_num=8
ssr based chi2 test:   chi2=15.2496 , p=0.0545  , df=8
likelihood ratio test: chi2=14.0512 , p=0.0804  , df=8
parameter F test:         F=1.5556  , p=0.1539  , df_denom=71, df_num=8

Granger Causality
number of lags (no zero) 9
ssr based F test:         F=1.9511  , p=0.0590  , df_denom=68, df_num=9
ssr based chi2 test:   chi2=22.2080 , p=0.0082  , df=9
likelihood ratio test: chi2=19.7549 , p=0.0195  , df=9
parameter F test:         F=1.9511  , p=0.0590  , df_denom=68, df_num=9

Granger Causality
number of lags (no zero) 10
ssr based F test:         F=2.4997  , p=0.0132  , df_denom=65, df_num=10
ssr based chi2 test:   chi2=32.6879 , p=0.0003  , df=10
likelihood ratio test: chi2=27.6577 , p=0.0020  , df=10
parameter F test:         F=2.4997  , p=0.0132  , df_denom=65, df_num=10

Granger Causality
number of lags (no zero) 11
ssr based F test:         F=2.1759  , p=0.0273  , df_denom=62, df_num=11
ssr based chi2 test: 

likelihood ratio test: chi2=25.3347 , p=0.0133  , df=12
parameter F test:         F=2.0330  , p=0.0341  , df_denom=68, df_num=12

Granger Causality
number of lags (no zero) 1
ssr based F test:         F=0.1016  , p=0.7507  , df_denom=91, df_num=1
ssr based chi2 test:   chi2=0.1049  , p=0.7460  , df=1
likelihood ratio test: chi2=0.1048  , p=0.7461  , df=1
parameter F test:         F=0.1016  , p=0.7507  , df_denom=91, df_num=1

Granger Causality
number of lags (no zero) 2
ssr based F test:         F=0.3467  , p=0.7080  , df_denom=88, df_num=2
ssr based chi2 test:   chi2=0.7327  , p=0.6933  , df=2
likelihood ratio test: chi2=0.7298  , p=0.6943  , df=2
parameter F test:         F=0.3465  , p=0.7081  , df_denom=88, df_num=2

Granger Causality
number of lags (no zero) 3
ssr based F test:         F=0.3272  , p=0.8057  , df_denom=86, df_num=3
ssr based chi2 test:   chi2=1.0500  , p=0.7892  , df=3
likelihood ratio test: chi2=1.0440  , p=0.7906  , df=3
parameter F test:         F=0.3254  , p=0.8


covariance of constraints does not have full rank. The number of constraints is 2, but rank is 1


covariance of constraints does not have full rank. The number of constraints is 3, but rank is 1


covariance of constraints does not have full rank. The number of constraints is 4, but rank is 1


covariance of constraints does not have full rank. The number of constraints is 5, but rank is 1


covariance of constraints does not have full rank. The number of constraints is 6, but rank is 1


covariance of constraints does not have full rank. The number of constraints is 7, but rank is 1


covariance of constraints does not have full rank. The number of constraints is 8, but rank is 1


covariance of constraints does not have full rank. The number of constraints is 9, but rank is 1


covariance of constraints does not have full rank. The number of constraints is 10, but rank is 1


covariance of constraints does not have full rank. The number of constraints is 11, but rank is 1


covaria

ssr based F test:         F=6.4522  , p=0.0128  , df_denom=91, df_num=1
ssr based chi2 test:   chi2=6.6649  , p=0.0098  , df=1
likelihood ratio test: chi2=6.4392  , p=0.0112  , df=1
parameter F test:         F=6.4522  , p=0.0128  , df_denom=91, df_num=1

Granger Causality
number of lags (no zero) 2
ssr based F test:         F=0.3170  , p=0.7291  , df_denom=88, df_num=2
ssr based chi2 test:   chi2=0.6701  , p=0.7153  , df=2
likelihood ratio test: chi2=0.6677  , p=0.7162  , df=2
parameter F test:         F=0.3109  , p=0.7336  , df_denom=88, df_num=2

Granger Causality
number of lags (no zero) 3
ssr based F test:         F=1.6305  , p=0.1883  , df_denom=85, df_num=3
ssr based chi2 test:   chi2=5.2943  , p=0.1515  , df=3
likelihood ratio test: chi2=5.1475  , p=0.1613  , df=3
parameter F test:         F=1.6352  , p=0.1873  , df_denom=85, df_num=3

Granger Causality
number of lags (no zero) 4
ssr based F test:         F=1.3740  , p=0.2501  , df_denom=82, df_num=4
ssr based chi2 test:   chi2=


Granger Causality
number of lags (no zero) 1
ssr based F test:         F=1.0475  , p=0.3088  , df_denom=91, df_num=1
ssr based chi2 test:   chi2=1.0820  , p=0.2982  , df=1
likelihood ratio test: chi2=1.0759  , p=0.2996  , df=1
parameter F test:         F=1.0475  , p=0.3088  , df_denom=91, df_num=1

Granger Causality
number of lags (no zero) 2
ssr based F test:         F=0.4848  , p=0.6174  , df_denom=88, df_num=2
ssr based chi2 test:   chi2=1.0248  , p=0.5991  , df=2
likelihood ratio test: chi2=1.0192  , p=0.6007  , df=2
parameter F test:         F=0.4848  , p=0.6174  , df_denom=88, df_num=2

Granger Causality
number of lags (no zero) 3
ssr based F test:         F=0.3493  , p=0.7898  , df_denom=85, df_num=3
ssr based chi2 test:   chi2=1.1341  , p=0.7689  , df=3
likelihood ratio test: chi2=1.1272  , p=0.7705  , df=3
parameter F test:         F=0.3493  , p=0.7898  , df_denom=85, df_num=3

Granger Causality
number of lags (no zero) 4
ssr based F test:         F=0.5008  , p=0.7352  , df_d


covariance of constraints does not have full rank. The number of constraints is 2, but rank is 1


covariance of constraints does not have full rank. The number of constraints is 3, but rank is 1


covariance of constraints does not have full rank. The number of constraints is 4, but rank is 1


covariance of constraints does not have full rank. The number of constraints is 5, but rank is 1


covariance of constraints does not have full rank. The number of constraints is 6, but rank is 1


covariance of constraints does not have full rank. The number of constraints is 7, but rank is 1


covariance of constraints does not have full rank. The number of constraints is 8, but rank is 1


covariance of constraints does not have full rank. The number of constraints is 9, but rank is 1


covariance of constraints does not have full rank. The number of constraints is 10, but rank is 1


covariance of constraints does not have full rank. The number of constraints is 11, but rank is 1


covaria

parameter F test:         F=1.9073  , p=0.1706  , df_denom=91, df_num=1

Granger Causality
number of lags (no zero) 2
ssr based F test:         F=0.9928  , p=0.3747  , df_denom=88, df_num=2
ssr based chi2 test:   chi2=2.0984  , p=0.3502  , df=2
likelihood ratio test: chi2=2.0751  , p=0.3543  , df=2
parameter F test:         F=0.9928  , p=0.3747  , df_denom=88, df_num=2

Granger Causality
number of lags (no zero) 3
ssr based F test:         F=0.7831  , p=0.5066  , df_denom=85, df_num=3
ssr based chi2 test:   chi2=2.5429  , p=0.4676  , df=3
likelihood ratio test: chi2=2.5084  , p=0.4738  , df=3
parameter F test:         F=0.7831  , p=0.5066  , df_denom=85, df_num=3

Granger Causality
number of lags (no zero) 4
ssr based F test:         F=0.6205  , p=0.6492  , df_denom=82, df_num=4
ssr based chi2 test:   chi2=2.7542  , p=0.5998  , df=4
likelihood ratio test: chi2=2.7133  , p=0.6069  , df=4
parameter F test:         F=0.6205  , p=0.6492  , df_denom=82, df_num=4

Granger Causality
number of

ssr based F test:         F=6.9011  , p=0.0000  , df_denom=79, df_num=5
ssr based chi2 test:   chi2=39.3103 , p=0.0000  , df=5
likelihood ratio test: chi2=32.6165 , p=0.0000  , df=5
parameter F test:         F=6.9011  , p=0.0000  , df_denom=79, df_num=5

Granger Causality
number of lags (no zero) 6
ssr based F test:         F=6.2920  , p=0.0000  , df_denom=76, df_num=6
ssr based chi2 test:   chi2=44.2096 , p=0.0000  , df=6
likelihood ratio test: chi2=35.8926 , p=0.0000  , df=6
parameter F test:         F=6.2920  , p=0.0000  , df_denom=76, df_num=6

Granger Causality
number of lags (no zero) 7
ssr based F test:         F=4.3090  , p=0.0005  , df_denom=73, df_num=7
ssr based chi2 test:   chi2=36.3605 , p=0.0000  , df=7
likelihood ratio test: chi2=30.4346 , p=0.0001  , df=7
parameter F test:         F=4.3090  , p=0.0005  , df_denom=73, df_num=7

Granger Causality
number of lags (no zero) 8
ssr based F test:         F=1.4279  , p=0.2003  , df_denom=70, df_num=8
ssr based chi2 test:   chi2=

Unnamed: 0,us bankruptcy_x,EFFR_x,us employment cost index_x,GDP(Billions)_x,us unemployment rate_x,"Interest Rates: Long-Term Government Bond Yields: 10-Year: Main (Including Benchmark) for United States, Percent, Quarterly, Not Seasonally Adjusted_x",adults investing money in the stock market(%)_x,HSCI_x,CCMP Index_x,NYA Index_x,us market cap_x,hk bankruptcy_x
us bankruptcy_y,1.0,0.028,0.0005,0.0001,0.4235,0.0017,0.2045,0.0554,0.0,0.0,0.0,0.0
EFFR_y,0.0,1.0,0.0001,0.0004,0.0001,0.0463,0.0,0.1856,0.0617,0.0002,0.0098,0.0053
us employment cost index_y,0.2544,0.0001,1.0,0.0,0.0,0.0,0.0,0.0105,0.0,0.0,0.0004,0.0738
GDP(Billions)_y,0.6088,0.0489,0.0132,1.0,0.0,0.0797,0.6309,0.0,0.4554,0.6244,0.3464,0.0189
us unemployment rate_y,0.0846,0.0042,0.2526,0.0,1.0,0.0428,0.0917,0.0,0.8153,0.8749,0.3856,0.0749
"Interest Rates: Long-Term Government Bond Yields: 10-Year: Main (Including Benchmark) for United States, Percent, Quarterly, Not Seasonally Adjusted_y",0.3701,0.0085,0.0,0.0006,0.012,1.0,0.0041,0.0001,0.0,0.0007,0.0006,0.063
adults investing money in the stock market(%)_y,0.0,0.0,0.005,0.0342,0.0312,0.1596,1.0,0.2926,0.3259,0.5154,0.0517,0.346
HSCI_y,0.5285,0.038,0.0,0.1029,0.089,0.0026,0.0136,0.9979,0.0,0.0023,0.0093,0.1791
CCMP Index_y,0.058,0.0007,0.0,0.0,0.0448,0.0156,0.0835,0.0106,1.0,0.0,0.0004,0.0
NYA Index_y,0.8207,0.0627,0.0124,0.0,0.0,0.0398,0.0338,0.0015,0.0025,0.9967,0.7196,0.0273


In [199]:
df = df.drop('us market cap', axis=1).drop('adults investing money in the stock market(%)', axis=1)
cointegration_test(df)


Name   ::  Test Stat > C(95%)    =>   Signif  
 ----------------------------------------
us bankruptcy ::  557.69    > 219.4051  =>   True
EFFR   ::  410.71    > 179.5199  =>   True
us employment cost index ::  311.83    > 143.6691  =>   True
GDP(Billions) ::  237.67    > 111.7797  =>   True
us unemployment rate ::  176.11    > 83.9383   =>   True
Interest Rates: Long-Term Government Bond Yields: 10-Year: Main (Including Benchmark) for United States, Percent, Quarterly, Not Seasonally Adjusted ::  129.71    > 60.0627   =>   True
HSCI   ::  89.01     > 40.1749   =>   True
CCMP Index ::  53.2      > 24.2761   =>   True
NYA Index ::  20.63     > 12.3212   =>   True
hk bankruptcy ::  4.21      > 4.1296    =>   True


In [200]:
df.head()

Unnamed: 0_level_0,us bankruptcy,EFFR,us employment cost index,GDP(Billions),us unemployment rate,"Interest Rates: Long-Term Government Bond Yields: 10-Year: Main (Including Benchmark) for United States, Percent, Quarterly, Not Seasonally Adjusted",HSCI,CCMP Index,NYA Index,hk bankruptcy
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2000-03-31,312335,5.85,118.325275,10002.179,4.087491,6.48,19994140000000.0,24100000000000.0,32800000000000.0,1106
2000-06-30,321729,6.53,118.325275,10247.72,3.984665,6.176667,19994140000000.0,22300000000000.0,31900000000000.0,1054
2000-09-30,308718,6.52,118.325275,10318.165,4.058545,5.893333,19994140000000.0,23000000000000.0,33100000000000.0,1198
2000-12-31,310169,6.4,118.325275,10435.744,3.949703,5.566667,19994140000000.0,20600000000000.0,32000000000000.0,1198
2001-03-31,366841,5.31,87.6,10470.231,4.292366,5.05,19994140000000.0,17800000000000.0,31700000000000.0,1428


# split dataset

In [483]:
pp = 20
df_train, df_test = df[:-pp], df[-pp:]

# check for stationary

In [484]:
df_differenced = df_train.dropna()
for name, column in df_differenced.items():
    adfuller_test(column, name=column.name)
    print('\n')

    Augmented Dickey-Fuller Test on "us bankruptcy" 
    -----------------------------------------------
 Null Hypothesis: Data has unit root. Non-Stationary.
 Significance Level    = 0.05
 Test Statistic        = -3.0358
 No. Lags Chosen       = 0
 Critical value 1%     = -3.522
 Critical value 5%     = -2.901
 Critical value 10%    = -2.588
 => P-Value = 0.0317. Rejecting Null Hypothesis.
 => Series is Stationary.


    Augmented Dickey-Fuller Test on "EFFR" 
    -----------------------------------------------
 Null Hypothesis: Data has unit root. Non-Stationary.
 Significance Level    = 0.05
 Test Statistic        = -3.8667
 No. Lags Chosen       = 3
 Critical value 1%     = -3.526
 Critical value 5%     = -2.903
 Critical value 10%    = -2.589
 => P-Value = 0.0023. Rejecting Null Hypothesis.
 => Series is Stationary.


    Augmented Dickey-Fuller Test on "us employment cost index" 
    -----------------------------------------------
 Null Hypothesis: Data has unit root. Non-Station

# use differencing to make it stationary

In [485]:
df_differenced = df_train.diff().diff().dropna()
for name, column in df_differenced.items():
    adfuller_test(column, name=column.name)
    print('\n')

    Augmented Dickey-Fuller Test on "us bankruptcy" 
    -----------------------------------------------
 Null Hypothesis: Data has unit root. Non-Stationary.
 Significance Level    = 0.05
 Test Statistic        = -6.2312
 No. Lags Chosen       = 5
 Critical value 1%     = -3.532
 Critical value 5%     = -2.906
 Critical value 10%    = -2.59
 => P-Value = 0.0. Rejecting Null Hypothesis.
 => Series is Stationary.


    Augmented Dickey-Fuller Test on "EFFR" 
    -----------------------------------------------
 Null Hypothesis: Data has unit root. Non-Stationary.
 Significance Level    = 0.05
 Test Statistic        = -8.2725
 No. Lags Chosen       = 1
 Critical value 1%     = -3.526
 Critical value 5%     = -2.903
 Critical value 10%    = -2.589
 => P-Value = 0.0. Rejecting Null Hypothesis.
 => Series is Stationary.


    Augmented Dickey-Fuller Test on "us employment cost index" 
    -----------------------------------------------
 Null Hypothesis: Data has unit root. Non-Stationary.
 S

# Build model

In [486]:
model = VAR(df_differenced)


No frequency information was provided, so inferred frequency Q-DEC will be used.



# select proper order

In [487]:
sorted_order = model.select_order(maxlags=4)
sorted_order.summary()

0,1,2,3,4
,AIC,BIC,FPE,HQIC
0.0,206.1,206.4*,3.247e+89,206.2*
1.0,205.5,209.0,1.789e+89,206.9
2.0,205.1,211.9,1.395e+89,207.8
3.0,204.3,214.4,1.101e+89,208.3
4.0,202.0*,215.3,3.227e+88*,207.3


In [531]:
model = VAR(df_differenced)


No frequency information was provided, so inferred frequency Q-DEC will be used.



In [532]:
model_fitted = model.fit(2)

In [533]:
model_fitted.summary()

  Summary of Regression Results   
Model:                         VAR
Method:                        OLS
Date:           Wed, 22, Nov, 2023
Time:                     09:04:51
--------------------------------------------------------------------
No. of Equations:         10.0000    BIC:                    217.690
Nobs:                     71.0000    HQIC:                   213.659
Log likelihood:          -8287.87    FPE:                5.17970e+91
AIC:                      210.998    Det(Omega_mle):     3.88159e+90
--------------------------------------------------------------------
Results for equation us bankruptcy
                                                                                                                                                             coefficient       std. error           t-stat            prob
-------------------------------------------------------------------------------------------------------------------------------------------------------------

In [534]:
out = durbin_watson(model_fitted.resid)

In [535]:
for col, val in zip(df.columns, out):
    print(col, ':', round(val, 2))

us bankruptcy : 1.97
EFFR : 2.14
us employment cost index : 1.8
GDP(Billions) : 2.13
us unemployment rate : 1.89
Interest Rates: Long-Term Government Bond Yields: 10-Year: Main (Including Benchmark) for United States, Percent, Quarterly, Not Seasonally Adjusted : 2.06
HSCI : 2.28
CCMP Index : 2.11
NYA Index : 2.1
hk bankruptcy : 2.19


In [536]:
lag_order = model_fitted.k_ar
rg = -pp
forecast_input = df_differenced.values[rg:]

In [537]:
fc = model_fitted.forecast(y=forecast_input, steps=lag_order)
df_forecast = pd.DataFrame(forecast_input, index=df.index[rg:], columns=df.columns + '_2d')

In [538]:
def invert_transformation(df_train, df_forecast, second_diff=False):
    """Revert back the differencing to get the forecast to original scale."""
    df_fc = df_forecast.copy()
    columns = df_train.columns
    for col in columns:        
        # Roll back 2nd Diff
        if second_diff:
            df_fc[str(col)+'_1d'] = (df_train[col].iloc[-1]-df_train[col].iloc[-2]) + df_fc[str(col)+'_2d'].cumsum()
        # Roll back 1st Diff
        df_fc[str(col)+'_forecast'] = df_train[col].iloc[-1] + df_fc[str(col)+'_1d'].cumsum()
    return df_fc

In [539]:
df_results = invert_transformation(df_train, df_forecast, second_diff=True)

In [540]:
import plotly.graph_objects as go

In [541]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=df_results.index, y=df_results['hk bankruptcy_forecast'], mode='lines', name='hk bankruptcy_forecast'))
fig.add_trace(go.Scatter(x=df.index, y=df['hk bankruptcy'], mode='lines', name='hk bankruptcy'))
fig.update_layout(
    shapes=[
        dict(
            type="rect",
            xref="x",
            yref="paper",
            x0='2019-12-01',
            y0=0,
            x1='2023-11-30',
            y1=1,
            fillcolor="red",
            opacity=0.5,
            layer="below",
            line_width=0,
        )
    ]
)

In [542]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=df_results.index, y=df_results['us bankruptcy_forecast'], mode='lines', name='us bankruptcy_forecast'))
fig.add_trace(go.Scatter(x=df.index, y=df['us bankruptcy'], mode='lines', name='us bankruptcy'))
fig.update_layout(
    shapes=[
        dict(
            type="rect",
            xref="x",
            yref="paper",
            x0='2019-12-01',
            y0=0,
            x1='2023-11-30',
            y1=1,
            fillcolor="red",
            opacity=0.5,
            layer="below",
            line_width=0,
        )
    ]
)

# Metrics of VAR model

In [543]:
from statsmodels.tsa.stattools import acf
def forecast_accuracy(forecast, actual):
    mape = np.mean(np.abs(forecast - actual)/np.abs(actual))  # MAPE
    me = np.mean(forecast - actual)             # ME
    mae = np.mean(np.abs(forecast - actual))    # MAE
    mpe = np.mean((forecast - actual)/actual)   # MPE
    rmse = np.mean((forecast - actual)**2)**.5  # RMSE
    corr = np.corrcoef(forecast, actual)[0,1]   # corr
    mins = np.amin(np.hstack([forecast[:,None], 
                              actual[:,None]]), axis=1)
    maxs = np.amax(np.hstack([forecast[:,None], 
                              actual[:,None]]), axis=1)
    minmax = 1 - np.mean(mins/maxs)             # minmax
    return({'mape':mape, 'me':me, 'mae': mae, 
            'mpe': mpe, 'rmse':rmse, 'corr':corr, 'minmax':minmax})


In [544]:
print('Forecast Accuracy of: hk bankruptcy')
accuracy_prod = forecast_accuracy(df_results['hk bankruptcy_forecast'].values, df_test['hk bankruptcy'])
for k, v in accuracy_prod.items():
    print(k, ': ', round(v,4))
print()
print()
print('Forecast Accuracy of: us bankruptcy')
accuracy_prod = forecast_accuracy(df_results['us bankruptcy_forecast'].values, df_test['us bankruptcy'])
for k, v in accuracy_prod.items():
    print(k, ': ', round(v,4))

Forecast Accuracy of: hk bankruptcy
mape :  0.6731
me :  -1011.5
mae :  1086.4
mpe :  -0.554
rmse :  1296.256
corr :  0.1271
minmax :  0.6407


Forecast Accuracy of: us bankruptcy
mape :  1.2388
me :  130634.2
mae :  131765.0
mpe :  1.2327
rmse :  158389.8402
corr :  -0.7428
minmax :  0.4523



Support for multi-dimensional indexing (e.g. `obj[:, None]`) is deprecated and will be removed in a future version.  Convert to a numpy array before indexing instead.


Support for multi-dimensional indexing (e.g. `obj[:, None]`) is deprecated and will be removed in a future version.  Convert to a numpy array before indexing instead.



In [509]:
import pandas as pd
import statsmodels.api as sm

In [440]:

dependent_variable = ['us bankruptcy', 'hk bankruptcy']
inde_v = []
for name, column in df_differenced.items():
    if name not in dependent_variable:
        inde_v.append(name)
inde_v

['EFFR',
 'us employment cost index',
 'GDP(Billions)',
 'us unemployment rate',
 'Interest Rates: Long-Term Government Bond Yields: 10-Year: Main (Including Benchmark) for United States, Percent, Quarterly, Not Seasonally Adjusted',
 'HSCI',
 'CCMP Index',
 'NYA Index']

In [441]:
X = df_train[inde_v]
X = sm.add_constant(X)
y = df_train['us bankruptcy']
model = sm.OLS(y, X)
results = model.fit()
results.summary()

0,1,2,3
Dep. Variable:,us bankruptcy,R-squared:,0.537
Model:,OLS,Adj. R-squared:,0.489
Method:,Least Squares,F-statistic:,11.11
Date:,"Wed, 22 Nov 2023",Prob (F-statistic):,3.07e-09
Time:,08:54:20,Log-Likelihood:,-941.07
No. Observations:,75,AIC:,1898.0
Df Residuals:,67,BIC:,1917.0
Df Model:,7,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,1058.8637,499.373,2.120,0.038,62.112,2055.615
EFFR,-1.42e+04,1.14e+04,-1.248,0.216,-3.69e+04,8506.260
us employment cost index,-6.1000,1635.489,-0.004,0.997,-3270.550,3258.350
GDP(Billions),4.0885,10.454,0.391,0.697,-16.778,24.955
us unemployment rate,2.19e+04,7657.459,2.861,0.006,6619.968,3.72e+04
"Interest Rates: Long-Term Government Bond Yields: 10-Year: Main (Including Benchmark) for United States, Percent, Quarterly, Not Seasonally Adjusted",2.383e+04,1.93e+04,1.237,0.221,-1.46e+04,6.23e+04
HSCI,-2.664e-09,1.57e-09,-1.702,0.093,-5.79e-09,4.61e-10
CCMP Index,5.857e-09,4.83e-09,1.213,0.230,-3.78e-09,1.55e-08
NYA Index,1.862e-09,3.95e-09,0.471,0.639,-6.03e-09,9.75e-09

0,1,2,3
Omnibus:,55.761,Durbin-Watson:,1.053
Prob(Omnibus):,0.0,Jarque-Bera (JB):,366.661
Skew:,2.061,Prob(JB):,2.4e-80
Kurtosis:,13.017,Cond. No.,1050000000000000.0


# predict with OLS regression

In [450]:
df_x = df[inde_v]
new_data_with_const = sm.add_constant(df_x)
new_data_with_const['predictions'] = results.predict(new_data_with_const)
print(new_data_with_const['predictions'].shape)

(95,)


In [451]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=df.index, y=new_data_with_const['predictions'], mode='lines', name='us bankruptcy_forecast'))
fig.add_trace(go.Scatter(x=df.index, y=df['us bankruptcy'], mode='lines', name='us bankruptcy'))
fig.update_layout(
    shapes=[
        dict(
            type="rect",
            xref="x",
            yref="paper",
            x0='2019-12-01',
            y0=0,
            x1='2023-11-30',
            y1=1,
            fillcolor="red",
            opacity=0.5,
            layer="below",
            line_width=0,
        )
    ]
)

# OLS with hk bankruptcy

In [452]:
X = df_train[inde_v]
X = sm.add_constant(X)
y = df_train['hk bankruptcy']
model = sm.OLS(y, X)
results = model.fit()
results.summary()

0,1,2,3
Dep. Variable:,hk bankruptcy,R-squared:,0.715
Model:,OLS,Adj. R-squared:,0.685
Method:,Least Squares,F-statistic:,24.04
Date:,"Wed, 22 Nov 2023",Prob (F-statistic):,5.27e-16
Time:,08:57:48,Log-Likelihood:,-606.37
No. Observations:,75,AIC:,1229.0
Df Residuals:,67,BIC:,1247.0
Df Model:,7,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-6.4935,5.759,-1.128,0.264,-17.988,5.001
EFFR,-188.8399,131.204,-1.439,0.155,-450.724,73.044
us employment cost index,-55.5758,18.860,-2.947,0.004,-93.221,-17.931
GDP(Billions),0.6915,0.121,5.736,0.000,0.451,0.932
us unemployment rate,28.4219,88.305,0.322,0.749,-147.835,204.679
"Interest Rates: Long-Term Government Bond Yields: 10-Year: Main (Including Benchmark) for United States, Percent, Quarterly, Not Seasonally Adjusted",-182.3389,222.192,-0.821,0.415,-625.836,261.158
HSCI,-1.165e-10,1.81e-11,-6.456,0.000,-1.53e-10,-8.05e-11
CCMP Index,3.162e-10,5.57e-11,5.677,0.000,2.05e-10,4.27e-10
NYA Index,-6.025e-11,4.56e-11,-1.322,0.191,-1.51e-10,3.07e-11

0,1,2,3
Omnibus:,18.2,Durbin-Watson:,0.651
Prob(Omnibus):,0.0,Jarque-Bera (JB):,24.892
Skew:,1.025,Prob(JB):,3.93e-06
Kurtosis:,4.939,Cond. No.,1050000000000000.0


# Predictions 

In [453]:
df_x = df[inde_v]
new_data_with_const = sm.add_constant(df_x)
new_data_with_const['predictions'] = results.predict(new_data_with_const)
print(new_data_with_const['predictions'].shape)

(95,)


In [455]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=df.index, y=new_data_with_const['predictions'], mode='lines', name='hk bankruptcy_forecast'))
fig.add_trace(go.Scatter(x=df.index, y=df['hk bankruptcy'], mode='lines', name='hk bankruptcy'))
fig.update_layout(
    shapes=[
        dict(
            type="rect",
            xref="x",
            yref="paper",
            x0='2019-12-01',
            y0=0,
            x1='2023-11-30',
            y1=1,
            fillcolor="red",
            opacity=0.5,
            layer="below",
            line_width=0,
        )
    ]
)