# Returns to Education

In [1]:
import statsmodels.formula.api as smf
import pandas as pd
import numpy as np

pd.options.display.float_format = '${:,.2f}'.format

We start by loading an observed dataset.

In [2]:
df = pd.read_pickle('data.mincer.pkl')
df.head()

Unnamed: 0_level_0,Age,Earnings,Schooling,Experience
Identifier,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,35,"$1,339.03",11,18
1,35,"$1,495.97",12,17
2,51,"$4,331.89",11,34
3,65,"$8,373.52",15,44
4,45,"$2,746.87",10,29


## Mincer Returns

Now we can run the baseline regression.

In [3]:
formula = 'np.log(Earnings) ~ Schooling + Experience + np.square(Experience)'
model = smf.ols(formula=formula, data=df)
model.fit().summary()

0,1,2,3
Dep. Variable:,np.log(Earnings),R-squared:,0.988
Model:,OLS,Adj. R-squared:,0.988
Method:,Least Squares,F-statistic:,26670.0
Date:,"Mon, 28 May 2018",Prob (F-statistic):,0.0
Time:,13:55:38,Log-Likelihood:,884.23
No. Observations:,1000,AIC:,-1760.0
Df Residuals:,996,BIC:,-1741.0
Df Model:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,4.4074,0.026,172.438,0.000,4.357,4.458
Schooling,0.1304,0.002,70.527,0.000,0.127,0.134
Experience,0.0947,0.001,113.750,0.000,0.093,0.096
np.square(Experience),-0.0007,1.45e-05,-48.200,0.000,-0.001,-0.001

0,1,2,3
Omnibus:,0.149,Durbin-Watson:,2.006
Prob(Omnibus):,0.928,Jarque-Bera (JB):,0.122
Skew:,0.027,Prob(JB):,0.941
Kurtosis:,3.008,Cond. No.,10900.0


What about the underlying economic model?

## True Returns

In [63]:
from scipy.stats import lognorm
from scipy.stats import norm



Y_s = np.tile(np.nan, 5)
V_s = np.tile(np.nan, 5)
p_s = np.tile(np.nan, 4)

r = 0.1
s = 0.1

Y_s[0] = 1
for i in range(1, 5):
    Y_s[i] = Y_s[i - 1] * (1 + r)


# This is the adjustment to the expected earnings
shift = np.exp(s ** 2 / 2)

# Value of choosing five years of schooling from the perspective of four years.
V_s[4] = Y_s[4] * shift

# Value of choosing four years of schooling from the perspective of three years.
eval_point = V_s[4]/ ((1 + r) * Y_s[3])
p_s[3] = lognorm.cdf(eval_point, s)

V_s[3] = 0
V_s[3] += (1 - p_s[3]) * Y_s[3] * conditional_expectation(eval_point, s) 
V_s[3] += p_s[3] * (V_s[4] / (1 + r))

# Value of choosing three years of schooling from teh perspective of two
eval_point = V_s[3] / ((1 + r) * Y_s[2]) 
p_s[2] = lognorm.cdf(eval_point, s)

V_s[2] = 0
V_s[2] += (1 - p_s[2]) * Y_s[2] * conditional_expectation(eval_point, s)
V_s[2] += p_s[2] * (V_s[3] / (1 + r))

# Value of choosing two years of schooling from the perspective of two
eval_point = V_s[2] / ((1 + r) * Y_s[1]) 
p_s[1] = lognorm.cdf(eval_point, s)

V_s[1] = 0
V_s[1] += (1 - p_s[1]) * Y_s[1] * conditional_expectation(eval_point, s)
V_s[1] += p_s[1] * (V_s[2] / (1 + r))

# Value of choosing one year of schooling from teh perspective of zero
eval_point = V_s[1] / ((1 + r) * Y_s[0]) 
p_s[0] = lognorm.cdf(eval_point, s)

V_s[0] = 0
V_s[0] += (1 - p_s[0]) * Y_s[0] * conditional_expectation(eval_point, s) 
V_s[0] += p_s[0] * (V_s[1] / (1 + r))
p_s

array([0.79530393, 0.74772136, 0.67040445, 0.51993881])