In [1]:
import pandas as pd
import numpy as np
# from sklearn.linear_model import LinearRegression
from itertools import product
import warnings
warnings.filterwarnings("ignore")

In [2]:
class ModelGroupSpecs:
    def __init__(self, ar_orders, desired_model_groups):
        self.ar_orders = ar_orders
        self.desired_model_groups = desired_model_groups

    def get_all_possible_combinations(self, model_group, MG_ar_orders, MG_regressors):
        return list(product(model_group, MG_ar_orders, MG_regressors))

    def create_functional_sets(self):
        output = []
        if "AR" in self.desired_model_groups:
            # MG1 = Model Group 1: AR models.

            self.AR_models = self.get_all_possible_combinations(
                model_group=['AR'],
                MG_ar_orders=self.ar_orders,
                MG_regressors=[None])
            output.append(self.AR_models)

        if "OLS" in self.desired_model_groups:
            # MG2N = Model Group 2N: Single Variable Exogenous OLS

            self.OLS_models = self.get_all_possible_combinations(
                model_group=['OLS'],
                MG_ar_orders=[None],#'log_new_vaccines_per_capita	', 
                MG_regressors=['log_new_people_vaccinated_per_capita', 
                                'delta_cases_per_capita_United Kingdom', 'delta_cases_per_capita_Germany', 'delta_cases_per_capita_France',
                                'full_lockdown', 'full_lockdown.l30', 'full_lockdown.l45',
                                'max_tp', 'min_tp', 'rain', 'humidity',
                                'day_of_the_week', 'season', 'trend'])
            output.append(self.OLS_models)


        if "ARX" in self.desired_model_groups:
            # MG2T and MG3T: Introducing lagged dependent terms to the previous model specifications.

            self.ARX_models = self.get_all_possible_combinations(
                model_group=['ARX'],
                MG_ar_orders=self.ar_orders,
                MG_regressors=['log_new_people_vaccinated_per_capita', 
                                'delta_cases_per_capita_United Kingdom', 'delta_cases_per_capita_Germany', 'delta_cases_per_capita_France',
                                'full_lockdown', 'full_lockdown.l30', 'full_lockdown.l45',
                                'max_tp', 'min_tp', 'rain', 'humidity',
                                'day_of_the_week', 'season', 'trend'])
            output.append(self.ARX_models)

        # Returning the functional sets to be deployed.
        return output

def naming(model_group, ar_order, regressor):
    return f"{model_group}, AR{ar_order}, Regressor = {regressor}"

In [3]:
ar_orders = np.arange(1, 5)

In [4]:
models = ModelGroupSpecs(ar_orders, ['AR', "OLS", "ARX"])

In [5]:
model_groups = models.create_functional_sets()

In [6]:
combined = pd.read_csv("Combined_Dataset.csv", index_col="Unnamed: 0")

In [7]:
y_var = 'delta_deaths_per_capita'

In [8]:
from statsmodels.tsa.arima.model import ARIMA
from tqdm import tqdm
import statsmodels.formula.api as sm

In [9]:
combined['delta_deaths_per_capita_l1'] = combined['delta_deaths_per_capita'].shift(1).fillna(0)

In [10]:
combined.rename(columns = {"delta_deaths_per_capita_United Kingdom": "delta_deaths_per_capita_United_Kingdom"}, inplace = True)

In [11]:
combined

Unnamed: 0,date,delta_deaths_per_capita,delta_cases_per_capita,delta_deaths_per_capita.l1,delta_cases_per_capita.l1,log_new_vaccines_per_capita,log_new_people_vaccinated_per_capita,delta_cases_per_capita_United_Kingdom,delta_deaths_per_capita_United_Kingdom,delta_cases_per_capita_Germany,...,full_lockdown.l30,full_lockdown.l45,max_tp,min_tp,rain,humidity,day_of_the_week,season,trend,delta_deaths_per_capita_l1
0,2020-03-01,0.0,0.000000e+00,0.000000,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,...,0.0,0.0,0.0000,1.650000e+00,1.65000,0.0000,0,0,1,0.000000
1,2020-03-02,0.0,0.000000e+00,0.000000,0.000000e+00,0.000000e+00,0.000000e+00,5.864491e-07,0.000000e+00,3.933232e-07,...,0.0,0.0,0.0875,7.750000e-01,2.15625,0.0875,1,0,2,0.000000
2,2020-03-03,0.0,2.006862e-07,0.000000,0.000000e+00,0.000000e+00,0.000000e+00,8.063675e-07,0.000000e+00,4.529176e-07,...,0.0,0.0,1.3750,2.162500e+00,0.80000,1.2875,2,0,3,0.000000
3,2020-03-04,0.0,8.027447e-07,0.000000,2.006862e-07,0.000000e+00,0.000000e+00,8.210287e-07,0.000000e+00,6.197820e-07,...,0.0,0.0,0.4125,-1.110223e-16,0.50625,-0.9625,3,0,4,0.000000
4,2020-03-05,0.0,0.000000e+00,0.000000,8.027447e-07,0.000000e+00,0.000000e+00,7.184001e-07,0.000000e+00,1.299158e-06,...,0.0,0.0,-0.7875,-9.250000e-01,2.36250,-1.2000,4,0,5,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
606,2021-10-28,0.0,5.213827e-04,0.000013,3.259144e-04,2.693943e-10,1.112052e-10,5.728728e-04,2.433764e-06,3.341698e-04,...,0.0,0.0,-1.0375,8.612500e+00,4.76250,-0.0500,4,3,607,0.000013
607,2021-10-29,0.0,5.099436e-04,0.000000,5.213827e-04,1.482583e-10,1.046810e-10,6.327639e-04,2.726988e-06,2.940150e-04,...,0.0,0.0,-1.6250,6.212500e+00,4.56875,-0.5875,5,3,608,0.000000
608,2021-10-30,0.0,5.952352e-04,0.000000,5.099436e-04,1.034203e-10,8.457030e-11,5.982660e-04,2.433764e-06,2.567685e-04,...,0.0,0.0,-0.9250,5.450000e+00,10.87500,0.7000,6,3,609,0.000000
609,2021-10-31,0.0,3.939470e-04,0.000000,5.952352e-04,0.000000e+00,0.000000e+00,5.535053e-04,1.084931e-06,2.012742e-04,...,0.0,0.0,-0.8375,6.812500e+00,7.90000,0.0875,0,3,610,0.000000


In [16]:
model=sm.ols(f'delta_deaths_per_capita ~ delta_deaths_per_capita_United_Kingdom + delta_deaths_per_capita_Germany + delta_deaths_per_capita_France + season + full_lockdown', data= combined)
results=model.fit(cov_type='HAC',
                                            cov_kwds={'maxlags':12,'use_correction':True})




In [17]:
for table in results.summary().tables:
    print(table.as_latex_tabular())

\begin{center}
\begin{tabular}{lclc}
\toprule
\textbf{Dep. Variable:}    & delta\_deaths\_per\_capita & \textbf{  R-squared:         } &     0.533   \\
\textbf{Model:}            &            OLS             & \textbf{  Adj. R-squared:    } &     0.529   \\
\textbf{Method:}           &       Least Squares        & \textbf{  F-statistic:       } &     19.09   \\
\textbf{Date:}             &      Fri, 29 Apr 2022      & \textbf{  Prob (F-statistic):} &  1.18e-17   \\
\textbf{Time:}             &          11:25:22          & \textbf{  Log-Likelihood:    } &    7024.4   \\
\textbf{No. Observations:} &              611           & \textbf{  AIC:               } & -1.404e+04  \\
\textbf{Df Residuals:}     &              605           & \textbf{  BIC:               } & -1.401e+04  \\
\textbf{Df Model:}         &                5           & \textbf{                     } &             \\
\textbf{Covariance Type:}  &            HAC             & \textbf{                     } &             \\
