In [1]:
import numpy as np
import pandas as pd
import statsmodels.api as sm
from statsmodels.iolib.summary2 import summary_col
import os

In [2]:
os.chdir("C:\\Users\\Administrator\\Documents\\personal\\mrw1992\\mrw1992")

In [3]:
data = pd.read_csv("data/mrw.csv")

#### Reformatting the data

In [4]:
#convenience stuff
data = data.drop(data.columns[0], axis = 1)
data = data.set_index('country')

#creating desired variables
data['lngd'] = np.log(data['popgrowth']/100 + 0.05)
data['ls'] = np.log(data['i_y']/100)
data['const'] = 1
data['ls_lngd'] = data['ls'] - data['lngd']

# Standard Solow Model

The model starts with a Cobb-Douglas production function:

\begin{align}
Y(t) = K(t)^{\alpha}(A(t)L(t))^{1 - \alpha}
\end{align}

The evolution of capital is determined by the following equation:

\begin{align}
\dot{k(t)} = sk(t)^{\alpha} - (n - g - \delta)k(t)
\end{align}

From this we can get the steady state level of $k(t)$ by equating $\dot{k(t)}$ to 0.  
We then get a simple formula to determine the steady state $k(t)^*$

\begin{align}
k(t)^* = (s/(n + g + \delta))^{1/(1 - \alpha)}
\end{align}

We can take logs of the production function and substitute in $\dot{k(t)}$ to find the equation we want to estimate, which is the steady state level of output. We also add an initial $A(0)$ which we take as equal to $a + \epsilon$ where $a$ is a constant and $\epsilon$ is a country specific shock

\begin{align}
log(Y/L) = a + (\alpha/(1 - \alpha))log(s) - (\alpha/(1 - \alpha))log(n + g + \delta) + \epsilon
\end{align}

where:
a is the constant in the assumed $log(A)$ equation  
$\epsilon$ is the country shock for $log(a)$  
$\alpha$ is the share of capital  
s is the exogenous savings rate (measured with average share of investment in GDP)  
n is the exogenous population growth rate  
g is the exogenous tfp growth rate  
$\delta$ is the depriciation rate of capital  

This corresponds to equation 7 in the paper

In [5]:
#subsetting data
data_reg = data.loc[data['n'] == 1, :] #no oil
data_d = data_reg[data_reg.i == 1] #pop in 1960 less than 1 mil
data_oecd = data_reg[data_reg.o == 1] #oecd

Unrestricted Model

In [6]:
#no restrictions on coefficients here
reg1 = sm.OLS(endog = np.log(data_reg['rgdpw85']),
             exog = data_reg[['const', 'ls', 'lngd']],
             missing = 'drop').fit()

reg2 = sm.OLS(endog = np.log(data_d['rgdpw85']),
             exog = data_d[['const', 'ls', 'lngd']],
             missing = 'drop').fit()

reg3 = sm.OLS(endog = np.log(data_oecd['rgdpw85']),
             exog = data_oecd[['const', 'ls', 'lngd']],
             missing = 'drop').fit()

Restricted Model

In [7]:
#coeff(log(savings)) = -coeff(log(n+g+d))
regr1 = sm.OLS(endog = np.log(data_reg['rgdpw85']),
             exog = data_reg[['const', 'ls_lngd']],
             missing = 'drop').fit()

regr2 = sm.OLS(endog = np.log(data_d['rgdpw85']),
             exog = data_d[['const', 'ls_lngd']],
             missing = 'drop').fit()

regr3 = sm.OLS(endog = np.log(data_oecd['rgdpw85']),
             exog = data_oecd[['const', 'ls_lngd']],
             missing = 'drop').fit()

Making table 1

In [20]:
info_dict = {'R^2': lambda x: x.rsquared,
            'N': lambda x: x.nobs,
            's.e.e.': lambda x: np.sqrt(x.scale),
            'Implied α': lambda x: f"{x.params[1]/(1 + x.params[1]):.2f}"}

results_unres = summary_col(results = [reg1, reg2, reg3],
                           float_format='%0.2f',
                           stars = True,
                           model_names = ['Non-Oil',
                                         'Intermediate',
                                         'OECD'],
                           info_dict = info_dict,
                           regressor_order = ['const',
                                             'ls',
                                             'lngd'])


results_res = summary_col(results = [regr1, regr2, regr3],
                           float_format='%0.2f',
                           stars = True,
                           model_names = ['Non-Oil',
                                         'Intermediate',
                                         'OECD'],
                           info_dict = info_dict,
                           regressor_order = ['const',
                                             'ls',
                                             'ls_lngd'])


results_res.add_title('Restricted Regressions')
results_unres.add_title('Unrestricted Regressions')
print(results_unres)
print('')
print(results_res)

       Unrestricted Regressions
          Non-Oil  Intermediate   OECD 
---------------------------------------
const     5.43***  5.35***      8.02***
          (1.58)   (1.54)       (2.52) 
ls        1.42***  1.32***      0.50   
          (0.14)   (0.17)       (0.43) 
lngd      -1.99*** -2.02***     -0.74  
          (0.56)   (0.53)       (0.85) 
R^2       0.6009   0.5989       0.1059 
N         98.0000  75.0000      22.0000
s.e.e.    0.6891   0.6106       0.3774 
Implied α 0.59     0.57         0.33   
Standard errors in parentheses.
* p<.1, ** p<.05, ***p<.01

        Restricted Regressions
          Non-Oil Intermediate   OECD 
--------------------------------------
const     6.87*** 7.09***      8.62***
          (0.12)  (0.15)       (0.53) 
ls_lngd   1.49*** 1.43***      0.55   
          (0.12)  (0.14)       (0.37) 
R^2       0.5974  0.5917       0.1031 
N         98.0000 75.0000      22.0000
s.e.e.    0.6885  0.6119       0.3684 
Implied α 0.60    0.59         0.36   
Standar

### Interpretting

Lets not get starry eyed with the regression tables too quickly.  

MRW take these regression tables with cautious optimism. While we see the direction and significance in the variables we want in the way we expect - i.e. savings positively determines steady state level of output per capita, and $(n + g + \delta)$ negatively affects output per capita. Also putting a restriction on the coefficient $(n + g + \delta)$ doesn't really change our results too much. 
Also, the model seems to explain a lot of the cross country variation in output per capita ($R^2$ is fairly high). But this shouldn't be taken as evidence for the Solow model. 

The big but here is that the implied $\alpha$ level isn't consistent with the empirical level of capital share, according to which the capital share should've been around 1/3.  

This basically amounts to these regressions being very weak evidence for the Solow model. If we think of all the things the Solow model says, this regression only says 1 thing: savings, population growth and depriciation are highly correlated to GDP, which is consistent with an entire class of growth models. 

Because of this, MRW augment the solow model

# Augmenting the Solow Model

MRW Augment the Solow model with human capital. 

With that, the Cobb-Douglas production function becomes:
\begin{align}
Y(t) = K(t)^{\alpha}H(t)^{\beta}(A(t)L(t))^{1 - \alpha - \beta}
\end{align}

Evolution of the economy is determined by:

\begin{align}
\dot{k(t)} = s_{k}y(t) - (n + g +\delta)k(t)\\
\dot{h(t)} = s_{h}y(t) - (n + g +\delta)h(t)
\end{align}

We can use these to derive the steady state values for k and h by setting $\dot{k(t)}$ and $\dot{h(t)}$ to 0.  
Using those we can arrive at the equation that we want to estimate.

\begin{align}
log(Y(t)/L(t)) = log(A(0)) + gt + (\alpha/(1 - alpha))log(s_k) - (\alpha/(1 - \alpha))log(n + g + \delta) + (\beta/(1 - \alpha))log(h^*)
\end{align}

This is very similar to the standard solow model equation we estimated before except for the last term. We can think of the last term being in the error term in our previous specification. This would'nt be a problem by itself but since $h^*$ is determined by $s$ and $n$, this would've biased our coefficients in the previous specification

In [16]:
data['lschool'] = np.log(data['school'])
data['lsch_ngd'] = data['lschool'] - data['lngd']

#subsetting data
data_reg = data.loc[data['n'] == 1, :] #no oil
data_d = data_reg[data_reg.i == 1] #pop in 1960 less than 1 mil
data_oecd = data_reg[data_reg.o == 1] #oecd

Unrestricted Model

In [17]:
#no restrictions on coefficients here
regh1 = sm.OLS(endog = np.log(data_reg['rgdpw85']),
             exog = data_reg[['const', 'ls', 'lngd', 'lschool']],
             missing = 'drop').fit()

regh2 = sm.OLS(endog = np.log(data_d['rgdpw85']),
             exog = data_d[['const', 'ls', 'lngd', 'lschool']],
             missing = 'drop').fit()

regh3 = sm.OLS(endog = np.log(data_oecd['rgdpw85']),
             exog = data_oecd[['const', 'ls', 'lngd', 'lschool']],
             missing = 'drop').fit()

Restricted Model

In [18]:
#coeff(log(savings)) = -coeff(log(n+g+d))
reghr1 = sm.OLS(endog = np.log(data_reg['rgdpw85']),
             exog = data_reg[['const', 'ls_lngd', 'lsch_ngd']],
             missing = 'drop').fit()

reghr2 = sm.OLS(endog = np.log(data_d['rgdpw85']),
             exog = data_d[['const', 'ls_lngd', 'lsch_ngd']],
             missing = 'drop').fit()

reghr3 = sm.OLS(endog = np.log(data_oecd['rgdpw85']),
             exog = data_oecd[['const', 'ls_lngd', 'lsch_ngd']],
             missing = 'drop').fit()

In [19]:
info_dict = {'R^2': lambda x: x.rsquared,
            'N': lambda x: x.nobs,
            's.e.e.': lambda x: np.sqrt(x.scale),
            'Implied α': lambda x: f"{x.params[1]/(1 + x.params[1]):.2f}"}

results_unres = summary_col(results = [regh1, regh2, regh3],
                           float_format='%0.2f',
                           stars = True,
                           model_names = ['Non-Oil',
                                         'Intermediate',
                                         'OECD'],
                           info_dict = info_dict,
                           regressor_order = ['const',
                                             'ls',
                                             'lngd',
                                             'lschool'])


results_res = summary_col(results = [reghr1, reghr2, reghr3],
                           float_format='%0.2f',
                           stars = True,
                           model_names = ['Non-Oil',
                                         'Intermediate',
                                         'OECD'],
                           info_dict = info_dict,
                           regressor_order = ['const',
                                             'ls',
                                             'ls_lngd',
                                             'lsch_ngd'])


results_res.add_title('Restricted Regressions')
results_unres.add_title('Unrestricted Regressions')
print(results_unres)
print('')
print(results_res)

       Unrestricted Regressions
          Non-Oil  Intermediate   OECD 
---------------------------------------
const     3.83***  4.43***      5.10*  
          (1.18)   (1.16)       (2.47) 
ls        0.70***  0.70***      0.28   
          (0.13)   (0.15)       (0.39) 
lngd      -1.75*** -1.50***     -1.08  
          (0.42)   (0.40)       (0.76) 
lschool   0.65***  0.73***      0.77** 
          (0.07)   (0.10)       (0.29) 
R^2       0.7856   0.7807       0.3524 
N         98.0000  75.0000      22.0000
s.e.e.    0.5077   0.4547       0.3300 
Implied α 0.41     0.41         0.22   
Standard errors in parentheses.
* p<.1, ** p<.05, ***p<.01

        Restricted Regressions
          Non-Oil Intermediate   OECD 
--------------------------------------
const     4.83*** 4.59***      5.18***
          (0.24)  (0.34)       (1.36) 
ls_lngd   0.74*** 0.71***      0.28   
          (0.12)  (0.14)       (0.33) 
lsch_ngd  0.66*** 0.73***      0.77** 
          (0.07)  (0.09)       (0.28) 
R^2  