In [1]:
# Set auto reload
%reload_ext autoreload
%autoreload 2

In [2]:
# Import libraries
import warnings
import pandas as pd
import numpy as np
import statsmodels.api as sm
import statsmodels.formula.api as smf

from scipy.special import logit

# Config
%config InlineBackend.figure_format = 'retina' #Retina display
warnings.filterwarnings('ignore') #Disable warning

In [3]:
# Create regression DataFrame
df = pd.concat(
    [
        pd.read_csv(
            'https://raw.githubusercontent.com/Sarvesh547/Credit-Risk/main/PD/datasets/monthlyODR.csv'
        ).query(
            "Segment == 'CU'"
        ).eval(
            "ODR = Bad / N"
        ).eval(
            "logitODR = @logit(ODR)", engine = 'python'
        )['logitODR'],
        pd.read_csv(
            'https://raw.githubusercontent.com/Sarvesh547/Credit-Risk/main/PD/datasets/macroTransformed.csv',
            parse_dates = ['Date'],
            date_parser = lambda x: pd.to_datetime(x)
        )[['Date', 'GDP_C_lg12', 'MPI_C_lg12']]
    ],
    axis = 1
).set_index('Date')

# Show table
df.head(5)

Unnamed: 0_level_0,logitODR,GDP_C_lg12,MPI_C_lg12
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2014-01-01,-3.23668,0.125214,0.239164
2014-02-01,-3.162347,0.086355,0.119307
2014-03-01,-3.127475,0.052505,0.143477
2014-04-01,-3.093313,0.032748,0.08987
2014-05-01,-3.061565,0.026525,0.046398


In [4]:
# Select variables for linear regression model
X = sm.add_constant(df[['GDP_C_lg12', 'MPI_C_lg12']]) #Add intercept
y = df['logitODR']

# Linear regression model
model = sm.OLS(y, X)
result = model.fit()
print(result.summary())

                            OLS Regression Results                            
Dep. Variable:               logitODR   R-squared:                       0.688
Model:                            OLS   Adj. R-squared:                  0.677
Method:                 Least Squares   F-statistic:                     61.68
Date:                Mon, 03 Oct 2022   Prob (F-statistic):           6.99e-15
Time:                        07:06:03   Log-Likelihood:                 100.82
No. Observations:                  59   AIC:                            -195.6
Df Residuals:                      56   BIC:                            -189.4
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const         -2.9473      0.019   -155.761      0.0

In [5]:
# HAC Adjustment
newModel = smf.ols(
    'logitODR ~ 1 + GDP_C_lg12 + MPI_C_lg12',
    data = df
)
newResult = newModel.fit()

### Define number of lags
There will be 3 ways to define the number of lags used for HAC Adjustment.

1.
\begin{align}
        Lags = 4\left(\frac{T}{100}\right)^{\frac{2}{9}}
    \end{align}

2.
\begin{align}
        Lags = T^{\frac{1}{4}}
    \end{align}

3.
\begin{align}
        Lags = None
   \end{align}

where;
- $T$ is the number of observation in regression model.

In [6]:
# HAC Result
# 1
lags = int(4 * (df.shape[0] / 100) ** (2 / 9))
print(f'Number of lags: {lags}')

HAC = newResult.get_robustcov_results(
    cov_type = 'HAC',
    maxlags = lags
)

# Summary
print(HAC.summary())

Number of lags: 3
                            OLS Regression Results                            
Dep. Variable:               logitODR   R-squared:                       0.688
Model:                            OLS   Adj. R-squared:                  0.677
Method:                 Least Squares   F-statistic:                     34.50
Date:                Mon, 03 Oct 2022   Prob (F-statistic):           1.72e-10
Time:                        07:06:03   Log-Likelihood:                 100.82
No. Observations:                  59   AIC:                            -195.6
Df Residuals:                      56   BIC:                            -189.4
Df Model:                           2                                         
Covariance Type:                  HAC                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     -2.9473      0.027  

In [7]:
# HAC Result
# 2
lags = int(df.shape[0] ** (1 / 4))
print(f'Number of lags: {lags}')

HAC = newResult.get_robustcov_results(
    cov_type = 'HAC',
    maxlags = lags
)

# Summary
print(HAC.summary())

Number of lags: 2
                            OLS Regression Results                            
Dep. Variable:               logitODR   R-squared:                       0.688
Model:                            OLS   Adj. R-squared:                  0.677
Method:                 Least Squares   F-statistic:                     35.09
Date:                Mon, 03 Oct 2022   Prob (F-statistic):           1.32e-10
Time:                        07:06:03   Log-Likelihood:                 100.82
No. Observations:                  59   AIC:                            -195.6
Df Residuals:                      56   BIC:                            -189.4
Df Model:                           2                                         
Covariance Type:                  HAC                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     -2.9473      0.025  

In [8]:
# HAC Result
# 3
lags = None
print(f'Number of lags: {lags}')

HAC = newResult.get_robustcov_results(
    cov_type = 'HAC',
    maxlags = lags
)

# Summary
print(HAC.summary())

Number of lags: None
                            OLS Regression Results                            
Dep. Variable:               logitODR   R-squared:                       0.688
Model:                            OLS   Adj. R-squared:                  0.677
Method:                 Least Squares   F-statistic:                     34.50
Date:                Mon, 03 Oct 2022   Prob (F-statistic):           1.72e-10
Time:                        07:06:04   Log-Likelihood:                 100.82
No. Observations:                  59   AIC:                            -195.6
Df Residuals:                      56   BIC:                            -189.4
Df Model:                           2                                         
Covariance Type:                  HAC                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     -2.9473      0.02