In [219]:
%load_ext autoreload
%load_ext line_profiler
%autoreload 2

import numpy as np
import pandas as pd
from mpl_toolkits import mplot3d
import matplotlib
import matplotlib.pyplot as plt
import scipy.optimize as optimize
import statsmodels.api as sm

from LimitedCommitmentModel import LimitedCommitmentModelClass

# plot style
linestyles = ['-','--','-.',':',':']
markers = ['o','s','D','*','P']
linewidth = 2
font_size = 17
font = {'size':font_size}
matplotlib.rc('font', **font)

plt.rcParams.update({'figure.max_open_warning': 0,'text.usetex': False})
path = 'output/'

SAVE = False

# c++ settings
do_compile = False
threads = 16


#dowload nlopt
import consav
consav.cpptools.setup_nlopt()

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
The line_profiler extension is already loaded. To reload it, use:
  %reload_ext line_profiler


# Solve the model

In [220]:
# compile c++ files
T = 10
specs = {
    'model 3':{'latexname':'$\sigma_{\psi}=0.1$', 'par':{'sigma_love':0.1, 'sigma_K': 0.5, 'T': T , 'threads':threads}},
}


# solve different models
models = {}
for m,(name,spec) in enumerate(specs.items()):
    print(f'{name} loading...',end='')
    
    # setup model
    models[name] = LimitedCommitmentModelClass(name=name,par=spec['par'])
    if do_compile & (m==0):
        compile_now = True
    else:
        compile_now = False
    models[name].link_to_cpp(force_compile=compile_now)
    models[name].spec = spec
    
    print(' solving...')
    %time models[name].solve() #Wall time: 42.3 s for T=2

    

model 3 loading... solving...
CPU times: total: 1h 16min 30s
Wall time: 15min 52s


# Prepare the data

In [272]:
#simulate TODO put in seperate function "Estimate"
model = models['model 3']

model.sim.init_A[:] = 8.0 # temp just to check that it works, if they have too little initial saving they will just consume everything, and saving is therefore close to zero
model.simulate()


In [343]:

#Save the data in a data frame
data_w = {}
data_m = {}
data   = pd.DataFrame()


wage_w      =  np.exp(model.par.wage_const_w +model.par.wage_K_w* model.sim.Kw)  
y_w         =  wage_w*model.sim.labor_w
wage_m      =  np.exp(model.par.wage_const_m+model.par.wage_K_m* model.sim.Km)
y_m         =  wage_m*model.sim.labor_m
init_barg   =  np.random.randint(2, size = model.par.simN) #TODO: use the correct one
Z_w         = 1 #TODO: Include Z values
Z_m         = 1 #TODO: Include Z values

for i in range(2,6): #use some periods in the middle of the simluation

    # WOMAN
    #value this period and last period, 
    data_w[i] = pd.DataFrame({
        'idx': range(1,model.par.simN+1) ,
        't' : i,
        'woman' : 1,
        'A' : model.sim.A[:,i] ,
        'couple': model.sim.couple[:,i],
        'Labor': model.sim.labor_w[:,i],
        'Labor_j': model.sim.labor_m[:,i],
        'cons': model.sim.cons_w[:,i],
        'wage': wage_w[:,i],
        'wage_j': wage_m[:,i],
        'y': y_w[:,i],
        'y_j': y_m[:,i],
        'Z': Z_w,
        'Z_j': Z_m,
        'init_barg': init_barg,
        'A_1' : model.sim.A[:,i-1] ,  
        'Labor_1': model.sim.labor_w[:,i-1],
        'Labor_j_1': model.sim.labor_m[:,i-1],
        'cons_1': model.sim.cons_w[:,i-1],
        'wage_1': wage_w[:,i-1],
        'wage_j_1': wage_m[:,i-1],
        'y_1': y_w[:,i-1],
        'y_j_1': y_m[:,i-1],
        'Z_1': Z_w,
        'Z_j_1': Z_m
    })

    
    # MAN
    data_m[i] = pd.DataFrame({
        'idx': range(model.par.simN+1,2*model.par.simN+1) ,
        't' : i,
        'woman' : 0,
        'A' : model.sim.A[:,i] ,
        'couple': model.sim.couple[:,i],
        'Labor': model.sim.labor_m[:,i],
        'Labor_j': model.sim.labor_w[:,i],
        'cons': model.sim.cons_m[:,i],
        'wage': wage_m[:,i],
        'wage_j': wage_w[:,i],
        'y': y_m[:,i],
        'y_j': y_w[:,i],
        'Z': Z_m,
        'Z_j': Z_w,
        'init_barg': init_barg,
        'A_1' : model.sim.A[:,i-1] ,
        'Labor_1': model.sim.labor_m[:,i-1],
        'Labor_j_1': model.sim.labor_w[:,i-1],
        'cons_1': model.sim.cons_m[:,i-1],
        'wage_1': wage_m[:,i-1],
        'wage_j_1': wage_w[:,i-1],
        'y_1': y_m[:,i-1],
        'y_j_1': y_w[:,i-1],
        'Z_1': Z_m,
        'Z_j_1': Z_w      
      
    }) 

    #collect the data
    data = pd.concat([data,data_w[i], data_m[i] ] )


#drop if single
data= data.drop(data[data.couple==0].index) #few obs dropped?


#sort data
data = data.sort_values(by =['idx','t'])


In [344]:
#generate variables 
list = ['Labor', 'Labor_j', 'wage' , 'wage_j', 'y', 'y_j', 'Z', 'Z_j', 'A', 'cons']

for val in list:
    val2  =  val + '_1'

    val_name = 'log_' + val
    val_name2 = 'log_' + val + '_1'
    name       = 'delta_log_' + val
    data[val_name] = np.log(data[val])
    data[val_name2] = np.log(data[val2])    
    data[name] = data[val].sub(data[val2]) 


data['Labor_inv'] = data.apply(lambda row: 1/row['Labor'], axis=1)
data['total_inc']  = data['y'].add(data['y_j']) 
data['inc_share_j'] =  data['y_j'].div(data['total_inc'])
data['Laborinv_t'] =  data['Labor_inv'].mul(data['t'])



  result = getattr(ufunc, method)(*inputs, **kwargs)


# ESTIMATE THE MODEL

In [345]:

# drop nan
data_regress = data[['idx','t','Labor' , 'Labor_inv', 'Labor_1' , 'Laborinv_t', 'delta_log_Labor', 'delta_log_wage_j','delta_log_wage']].dropna()



#Step1 find the residuals from hours equation

#defining the variables
x = data_regress[['Labor_inv', 'Laborinv_t']]
y  = data_regress['delta_log_Labor']
#x = sm.add_constant(x) #Noconstant

#performing the regression and fitting the model
result = sm.OLS(y,x).fit()
print(result.summary())

#find the residuals
data_regress['uhat'] = result.resid



#Step2 find the residuals from the wage regression, (Note: we can find it directly from our solution)
x = data_regress[['t']]
y  = data_regress['delta_log_wage']
#Noconstant
result = sm.OLS(y,x).fit()
print(result.summary())
data_regress['omega'] = result.resid


x = data_regress[['t']]
y  = data_regress['delta_log_wage_j']
result = sm.OLS(y,x).fit()
print(result.summary())
data_regress['omega_j'] = result.resid



# merge the residuals to the main data
data_regress = data_regress[['idx', 't','uhat', 'omega', 'omega_j']]
data = data.merge(data_regress, on = ['idx', 't'])



                                 OLS Regression Results                                
Dep. Variable:        delta_log_Labor   R-squared (uncentered):                   0.000
Model:                            OLS   Adj. R-squared (uncentered):              0.000
Method:                 Least Squares   F-statistic:                              78.85
Date:                Wed, 17 Jan 2024   Prob (F-statistic):                    5.79e-35
Time:                        12:00:35   Log-Likelihood:                      3.7987e+05
No. Observations:              321128   AIC:                                 -7.597e+05
Df Residuals:                  321126   BIC:                                 -7.597e+05
Df Model:                           2                                                  
Covariance Type:            nonrobust                                                  
                 coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------

In [346]:
#step3 prepare for main regression


#Find the lagged value. Note: it works because single is absorbing, when people leave the data they will not come back
list = ['omega','omega_j','delta_log_Z', 'delta_log_Z_j', 'delta_log_y','delta_log_y_j',  'delta_log_A','inc_share_j']
data = data.sort_values(by =['idx','t']) 
data['idx_1'] = data['idx'].shift(periods =1 )

for val in enumerate(list):
    val_name = val + '_1'
    data[val_name] = data[val].shift(periods =1 ) 
    data.loc[data['idx_1'] !=  data['idx'],val:name] = np.nan


data['X3'] =  data['omega'].mul(data['Labor_inv'])
data['X4'] =  data['omega_j'].mul(data['Labor_inv'])
data['X5'] =  data['omega_1'].mul(data['Labor_inv'])
data['X6'] =  data['omega_j_1'].mul(data['Labor_inv'])

data['X7a'] =  data['delta_log_Z'].mul(data['Labor_inv'])
data['X7b'] =  data['delta_log_Z_1'].mul(data['Labor_inv'])
data['X8a'] =  data['delta_log_Z_j'].mul(data['Labor_inv'])
data['X8b'] =  data['delta_log_Z_j_1'].mul(data['Labor_inv'])

data['X9'] = data['init_barg'].mul(data['Labor_inv'])

data['X10'] = data['delta_log_y'].mul(data['Labor_inv'])
data['X11'] = data['delta_log_A'].mul(data['Labor_inv'])
data['X12'] = data['delta_log_y_1'].mul(data['Labor_inv'])
data['X13'] = data['delta_log_A_1'].mul(data['Labor_inv'])

data['X14'] = data['inc_share_j_1'].mul(data['delta_log_y_j_1']).mul(data['Labor_inv'])
data['X15'] = data['cons_1'].mul(data['delta_log_cons']).mul(data['Labor_inv'])
#x16: the same as x13, when we only have two periods


#data_regress = data[['X3','X4','X5','X6','X7a','X7b','X8a','X8b','X9','X10','X11','X12','X13','X14','X15','uhat']] #with Z
data_regress = data[['X3','X4','X5','X6','X9','X10','X11','X12','X13','X14','X15','uhat']] #without Z


#drop nan 
data_regress = data_regress.dropna()
#print(data_regress)


In [347]:
#STEP 3 run main regression
#x = data[['X3','X4','X5','X6','X7a','X7b','X8a','X8b','X9','X10','X11','X12','X13','X14','X15']] #with Z
x = data_regress[['X3','X4','X5','X6','X9','X10','X11','X12','X13','X14','X15']] #without Z
y  = data_regress['uhat']
#noconst

result = sm.OLS(y,x).fit() #TODO: use correct standard errors
print(result.summary())


#test for full commitment* TODO CHECK IF IT IS THE CORRECT variable that is tested! 
print(f' Test for full commitment')
#print(result.wald_test('(X4=0, X5=0, X6=0, X7a=0, X7b=0, X8a=0, X8b=0 , X9 =0)', use_f = False)) #with Z
print(result.wald_test('(X4=0, X5=0, X6=0, X9 =0)', use_f = False))

#test for no commitment* these should be zero
print(f' Test for no commitment')
#print(result.wald_test('(X5=0, X6=0, X7a=0, X7b=0, X8a=0, X8b=0 , X9 =0)', use_f = False)) #with Z
print(result.wald_test('(X5=0, X6=0, X9 =0)', use_f = False))



                                 OLS Regression Results                                
Dep. Variable:                   uhat   R-squared (uncentered):                   0.036
Model:                            OLS   Adj. R-squared (uncentered):              0.036
Method:                 Least Squares   F-statistic:                              544.9
Date:                Wed, 17 Jan 2024   Prob (F-statistic):                        0.00
Time:                        12:00:41   Log-Likelihood:                      1.6658e+05
No. Observations:              160564   AIC:                                 -3.331e+05
Df Residuals:                  160553   BIC:                                 -3.330e+05
Df Model:                          11                                                  
Covariance Type:            nonrobust                                                  
                 coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------

