# Mincer returns

In [3]:
import statsmodels.formula.api as smf
import pandas as pd
import numpy as np

pd.options.display.float_format = '${:,.2f}'.format

## Simulation

We start by simulating a dataset based on the accounting identity model.

In [6]:
P_0 = 239.15215950404396
kappa = 1.0
rho_0 = 0.075
rho_s = 0.1250
T = 55
num_agents = 1000



def log_observed_earnings(s, x):
    """This function simulates logarithmic earnings directly from the accounting-identify model."""
    rslt = 0
    rslt += np.log(P_0) - kappa
    rslt += rho_s * s
    rslt += (rho_0 * kappa + (rho_0*kappa)/ (2 * T) + kappa / T) * x
    rslt -= (rho_0 * kappa / (2 * T)) * (x ** 2) + np.random.normal(scale=0.1)

    return rslt

data = []
for i in range(num_agents):
    s = np.random.choice(range(10, 16))
    x = np.random.choice(range(1, T))
    y = log_observed_earnings(s, x)
    age = s + x + 6
    
    data += [[i, age, np.exp(y), s, x]]

Now we are ready to store the dataset.

In [11]:
columns = ['Identifier', 'Age', 'Earnings', 'Schooling', 'Experience']
df = pd.DataFrame(data, columns=columns)
df.set_index('Identifier', inplace=True)
df.to_pickle('data.mincer.pkl')

## Estimation

We can now load our simulated dataset and run the conventional Mincer regression.

In [12]:
df = pd.read_pickle('data.mincer.pkl')
df.head()

Unnamed: 0_level_0,Age,Earnings,Schooling,Experience
Identifier,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,63,"$5,841.26",11,46
1,33,"$1,643.54",15,12
2,19,$398.34,11,2
3,30,"$1,149.03",10,14
4,67,"$8,965.13",14,47


Now we can run the baseline regression.

In [13]:
formula = 'np.log(Earnings) ~ Schooling + Experience + np.square(Experience)'
model = smf.ols(formula=formula, data=df)
model.fit().summary()

0,1,2,3
Dep. Variable:,np.log(Earnings),R-squared:,0.988
Model:,OLS,Adj. R-squared:,0.987
Method:,Least Squares,F-statistic:,26250.0
Date:,"Tue, 29 May 2018",Prob (F-statistic):,0.0
Time:,08:33:46,Log-Likelihood:,866.99
No. Observations:,1000,AIC:,-1726.0
Df Residuals:,996,BIC:,-1706.0
Df Model:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,4.4705,0.025,176.035,0.000,4.421,4.520
Schooling,0.1241,0.002,67.110,0.000,0.120,0.128
Experience,0.0956,0.001,113.649,0.000,0.094,0.097
np.square(Experience),-0.0007,1.49e-05,-48.079,0.000,-0.001,-0.001

0,1,2,3
Omnibus:,1.443,Durbin-Watson:,2.036
Prob(Omnibus):,0.486,Jarque-Bera (JB):,1.342
Skew:,0.086,Prob(JB):,0.511
Kurtosis:,3.054,Cond. No.,10300.0


These results are designed so that they line up rather closely with the estimated coeffiecients reported in Table 2 for Whites in 1940.

## True Returns

In [63]:
from scipy.stats import lognorm
from scipy.stats import norm



Y_s = np.tile(np.nan, 5)
V_s = np.tile(np.nan, 5)
p_s = np.tile(np.nan, 4)

r = 0.1
s = 0.1

Y_s[0] = 1
for i in range(1, 5):
    Y_s[i] = Y_s[i - 1] * (1 + r)


# This is the adjustment to the expected earnings
shift = np.exp(s ** 2 / 2)

# Value of choosing five years of schooling from the perspective of four years.
V_s[4] = Y_s[4] * shift

# Value of choosing four years of schooling from the perspective of three years.
eval_point = V_s[4]/ ((1 + r) * Y_s[3])
p_s[3] = lognorm.cdf(eval_point, s)

V_s[3] = 0
V_s[3] += (1 - p_s[3]) * Y_s[3] * conditional_expectation(eval_point, s) 
V_s[3] += p_s[3] * (V_s[4] / (1 + r))

# Value of choosing three years of schooling from teh perspective of two
eval_point = V_s[3] / ((1 + r) * Y_s[2]) 
p_s[2] = lognorm.cdf(eval_point, s)

V_s[2] = 0
V_s[2] += (1 - p_s[2]) * Y_s[2] * conditional_expectation(eval_point, s)
V_s[2] += p_s[2] * (V_s[3] / (1 + r))

# Value of choosing two years of schooling from the perspective of two
eval_point = V_s[2] / ((1 + r) * Y_s[1]) 
p_s[1] = lognorm.cdf(eval_point, s)

V_s[1] = 0
V_s[1] += (1 - p_s[1]) * Y_s[1] * conditional_expectation(eval_point, s)
V_s[1] += p_s[1] * (V_s[2] / (1 + r))

# Value of choosing one year of schooling from teh perspective of zero
eval_point = V_s[1] / ((1 + r) * Y_s[0]) 
p_s[0] = lognorm.cdf(eval_point, s)

V_s[0] = 0
V_s[0] += (1 - p_s[0]) * Y_s[0] * conditional_expectation(eval_point, s) 
V_s[0] += p_s[0] * (V_s[1] / (1 + r))
p_s

array([0.79530393, 0.74772136, 0.67040445, 0.51993881])