# Running initial OLS models to assess correlations #

In [None]:
# Loading packages
import pandas as pd
import statsmodels.api as sm
import statsmodels.formula.api as smf

In [1]:
# Read CSV
growth_data_updated = pd.read_csv("growth_data.csv")

# Create lagged variables using pandas shift()
growth_data_updated = growth_data_updated.assign(
    Gini_lag3=growth_data_updated["gini"].shift(3),
    Gini_lag5=growth_data_updated["gini"].shift(5),
    Gini_lag7=growth_data_updated["gini"].shift(7),
    Gini_lag10=growth_data_updated["gini"].shift(10)
)


# Model 0 (without lags)
model_0 = smf.ols("UK_gdp_pc_growth ~ HC_index + Capital_stock + gini + change_in_cpi + wrl_gdp_growth",
                   data=growth_data_updated).fit()
print(model_0.summary())


                            OLS Regression Results                            
Dep. Variable:       UK_gdp_pc_growth   R-squared:                       0.531
Model:                            OLS   Adj. R-squared:                  0.486
Method:                 Least Squares   F-statistic:                     11.79
Date:                Thu, 06 Feb 2025   Prob (F-statistic):           1.20e-07
Time:                        14:28:54   Log-Likelihood:                -103.15
No. Observations:                  58   AIC:                             218.3
Df Residuals:                      52   BIC:                             230.7
Df Model:                           5                                         
Covariance Type:            nonrobust                                         
                     coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------------------------------------------
Intercept        -31.5812     10.655     -2.

The model with a three-year lag on Gini is the only one where the variable was statistially significant.

In [2]:
# Model 3 (3-year lag)
model_3 = smf.ols("UK_gdp_pc_growth ~ HC_index + Capital_stock + Gini_lag3 + change_in_cpi + wrl_gdp_growth",
                   data=growth_data_updated).fit()
print(model_3.summary())

                            OLS Regression Results                            
Dep. Variable:       UK_gdp_pc_growth   R-squared:                       0.597
Model:                            OLS   Adj. R-squared:                  0.557
Method:                 Least Squares   F-statistic:                     14.82
Date:                Thu, 06 Feb 2025   Prob (F-statistic):           6.53e-09
Time:                        14:29:59   Log-Likelihood:                -95.510
No. Observations:                  56   AIC:                             203.0
Df Residuals:                      50   BIC:                             215.2
Df Model:                           5                                         
Covariance Type:            nonrobust                                         
                     coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------------------------------------------
Intercept        -30.0177      9.960     -3.

In [3]:

# Model 5 (5-year lag)
model_5 = smf.ols("UK_gdp_pc_growth ~ HC_index + Capital_stock + Gini_lag5 + change_in_cpi + wrl_gdp_growth",
                   data=growth_data_updated).fit()
print(model_5.summary())

                            OLS Regression Results                            
Dep. Variable:       UK_gdp_pc_growth   R-squared:                       0.601
Model:                            OLS   Adj. R-squared:                  0.559
Method:                 Least Squares   F-statistic:                     14.43
Date:                Thu, 06 Feb 2025   Prob (F-statistic):           1.26e-08
Time:                        14:30:01   Log-Likelihood:                -91.845
No. Observations:                  54   AIC:                             195.7
Df Residuals:                      48   BIC:                             207.6
Df Model:                           5                                         
Covariance Type:            nonrobust                                         
                     coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------------------------------------------
Intercept        -24.6800     10.158     -2.

In [4]:
# Model 7 (7-year lag)
model_7 = smf.ols("UK_gdp_pc_growth ~ HC_index + Capital_stock + Gini_lag7 + change_in_cpi + wrl_gdp_growth",
                   data=growth_data_updated).fit()
print(model_7.summary())

                            OLS Regression Results                            
Dep. Variable:       UK_gdp_pc_growth   R-squared:                       0.634
Model:                            OLS   Adj. R-squared:                  0.594
Method:                 Least Squares   F-statistic:                     15.94
Date:                Thu, 06 Feb 2025   Prob (F-statistic):           4.29e-09
Time:                        14:30:03   Log-Likelihood:                -87.047
No. Observations:                  52   AIC:                             186.1
Df Residuals:                      46   BIC:                             197.8
Df Model:                           5                                         
Covariance Type:            nonrobust                                         
                     coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------------------------------------------
Intercept        -13.5717     10.783     -1.

The model with a 10-year lag on Gini had the highest explanatory power, although Gini_lag10 was not statistically significant.

In [5]:

# Model 10 (10-year lag)
model_10 = smf.ols("UK_gdp_pc_growth ~ HC_index + Capital_stock + Gini_lag10 + change_in_cpi + wrl_gdp_growth",
                    data=growth_data_updated).fit()
print(model_10.summary())

                            OLS Regression Results                            
Dep. Variable:       UK_gdp_pc_growth   R-squared:                       0.704
Model:                            OLS   Adj. R-squared:                  0.670
Method:                 Least Squares   F-statistic:                     20.46
Date:                Thu, 06 Feb 2025   Prob (F-statistic):           2.12e-10
Time:                        14:30:05   Log-Likelihood:                -77.254
No. Observations:                  49   AIC:                             166.5
Df Residuals:                      43   BIC:                             177.9
Df Model:                           5                                         
Covariance Type:            nonrobust                                         
                     coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------------------------------------------
Intercept          2.9825     11.283      0.