In [7]:
# Import libraries
import os
from tqdm import tqdm
import pandas as pd

# Redirecting to the directory
# os.chdir(os.path.join('.\ECON4999X'))
CLEANED_DATA_DIR = os.path.normpath(r'.\Cleaned_Data')
DA_DATA_DIR = os.path.normpath(r'.\da_working_files')
# print(os.listdir(CLEANED_DATA_DIR))

import scipy.stats as st
import statsmodels.api as sm
import statsmodels.formula.api as smf
from matplotlib import pyplot as plt
import seaborn as sns


In [8]:
dataset_url = 'https://gist.githubusercontent.com/sachinsdate/c40651e9e4bc13a696780462209f1992/raw/95f58d30404b2bd205f738b2466ca58c34f7b2ec/wb_data_panel_2ind_7units_1992_2014.csv'

In [9]:
df_panel = pd.read_csv(dataset_url, header=0)

In [10]:
unit_col_name='COUNTRY'
time_period_col_name='YEAR'
 
#Create the dummy variables, one for each country
df_dummies = pd.get_dummies(df_panel[unit_col_name])
df_panel_with_dummies = df_panel.join(df_dummies)
df_panel_with_dummies

Unnamed: 0,COUNTRY,YEAR,GCF_GWTH_PCNT,GDP_PCAP_GWTH_PCNT,Belgium,CzechRepublic,France,Ireland,Portugal,UK,USA
0,Belgium,1992,1.829137,1.119566,True,False,False,False,False,False,False
1,Belgium,1993,-2.956525,-1.348000,True,False,False,False,False,False,False
2,Belgium,1994,3.764435,2.909319,True,False,False,False,False,False,False
3,Belgium,1995,4.113741,2.170550,True,False,False,False,False,False,False
4,Belgium,1996,0.415439,1.123669,True,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...
156,USA,2010,10.457543,1.716400,False,False,False,False,False,False,True
157,USA,2011,4.218060,0.815453,False,False,False,False,False,False,True
158,USA,2012,7.662971,1.502188,False,False,False,False,False,False,True
159,USA,2013,4.665743,1.138897,False,False,False,False,False,False,True


In [11]:
y_var_name = 'GDP_PCAP_GWTH_PCNT'
X_var_names = ['GCF_GWTH_PCNT']

In [12]:
unit_names = ['Belgium', 'CzechRepublic', 'France', 'Ireland', 'Portugal', 'UK', 'USA']
unit_names.sort()

In [13]:
lsdv_expr = y_var_name + ' ~ '
i = 0
for X_var_name in X_var_names:
    if i > 0:
        lsdv_expr = lsdv_expr + ' + ' + X_var_name
    else:
        lsdv_expr = lsdv_expr + X_var_name
    i = i + 1
for dummy_name in unit_names[:-1]:
    lsdv_expr = lsdv_expr + ' + ' + dummy_name
 
print('Regression expression for OLS with dummies=' + lsdv_expr)

Regression expression for OLS with dummies=GDP_PCAP_GWTH_PCNT ~ GCF_GWTH_PCNT + Belgium + CzechRepublic + France + Ireland + Portugal + UK


In [14]:
lsdv_model = smf.ols(formula=lsdv_expr, data=df_panel_with_dummies)
lsdv_model_results = lsdv_model.fit()
print(lsdv_model_results.summary())

                            OLS Regression Results                            
Dep. Variable:     GDP_PCAP_GWTH_PCNT   R-squared:                       0.655
Model:                            OLS   Adj. R-squared:                  0.639
Method:                 Least Squares   F-statistic:                     41.48
Date:                Fri, 05 May 2023   Prob (F-statistic):           2.57e-32
Time:                        10:57:54   Log-Likelihood:                -292.91
No. Observations:                 161   AIC:                             601.8
Df Residuals:                     153   BIC:                             626.5
Df Model:                           7                                         
Covariance Type:            nonrobust                                         
                            coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------------
Intercept                 0.66

In [15]:
#n=number of groups
n=len(unit_names)
 
#T=number of time periods per unit
T=df_panel.shape[0]/n
 
#N=total number of rows in the panel data set
N=n*T
 
#k=number of regression variables of the Pooled OLS model including the intercept
k=len(X_var_names)+1

In [14]:
ssr_restricted_model = pooled_olsr_model_results.ssr
ssr_unrestricted_model = lsdv_model_results.ssr
k1 = len(pooled_olsr_model_results.params)
k2 = len(lsdv_model_results.params)
f_statistic = ((ssr_restricted_model - ssr_unrestricted_model) /ssr_unrestricted_model) * ((N-k2)/(k2-k1))
print('F-statistic for FE model='+str(f_statistic))

alpha=0.05
 
f_critical_value=st.f.ppf((1.0-alpha), (k2-k1), (N-k2))
print('F test critical value at alpha of 0.05='+str(f_critical_value))

NameError: name 'pooled_olsr_model_results' is not defined

In [16]:
from stargazer.stargazer import Stargazer

In [17]:
stargazer = Stargazer([lsdv_model_results])

In [23]:
stargazer.extract_data()