In [1]:
import pickle
import numpy as np
import pandas as pd
import math

We import the model specification parameters and externally defined constants here.

In [2]:
# Execute entire file and make all variables/functions/classes
# available for further use
from ipynb.fs.full.model_spec import (num_periods,
                                      num_choices,
                                      educ_max,
                                      educ_min,
                                      educ_range,
                                      mu,
                                      delta,
                                      optim_paras,
                                      num_draws_emax,
                                      num_agents_sim,
                                      seed_emax,
                                      seed_sim,
                                      shocks_cov)

# Import specified definitions only from given notebook
import ipynb.fs
from .defs.shared_constants import MISSING_INT, MISSING_FLOAT
from .defs.shared_auxiliary import draw_disturbances

In [3]:
# Import the final output of pyth_create_state_space, args
# In the modular implementation pyth_create_state_space will be called by by pyth_solve
# pyth_solve is executed before pyth_simulate
file_name = "args_file.pkl"
# Open the file for reading
file_object = open(file_name,'rb')  
# load the object from the file into var args
args = pickle.load(file_object)

In [4]:
# Unpack objects from agrs
states_all, states_number_period, mapping_states_index, max_states_period = args[0], args[1], args[2], args[3]

In [5]:
# Import the final output of pyth_backward_induction, periods_emax
# In the modular implementation pyth_create_state_space will be called by by pyth_solve
# pyth_solve is executed before pyth_simulate
file_name = "periods_emax_file.pkl"
# Open the file for reading
file_object = open(file_name,'rb')  
# load the object from the file into var args
periods_emax = pickle.load(file_object)

In [6]:
# Create draws for simulated sample
draws_sim = draw_disturbances((num_periods, num_agents_sim), shocks_cov, seed_sim)

Then, we need to define additional function called in the loop to determine agents choices. 

In [7]:
def calculate_wage_systematic(educ_level, exp_p, exp_f, optim_paras):
    """Calculate systematic wages, i.e. net of shock, for specified state."""
    
    # Initialize container
    wage_systematic = np.nan
    
    # Construct wage components
    gamma_s0 = np.dot(educ_level, optim_paras[0:3]) 
    gamma_s1 = np.dot(educ_level, optim_paras[3:6])
    period_exp_sum = exp_p * np.dot(educ_level, optim_paras[6:9]) + exp_f 
    depreciation = 1 - np.dot(educ_level, optim_paras[9:12])
    
    # Calculate wage in the given state
    period_exp_total = period_exp_sum * depreciation + 1
    returns_to_exp = gamma_s1 * period_exp_total
    wage_systematic = np.exp(gamma_s0)*returns_to_exp
    
    # Return function output
    return wage_systematic # This is a scalar, equal for all choices

In [8]:
def calculate_period_wages(wage_systematic, draws):
    """Calculate wages for each choice including choice specific productivty shock."""
    
    # Initialize container
    period_wages = np.tile(np.nan, num_choices)
    
    # Take the exponential of the disturbances
    exp_draws = np.exp(draws)
    
    # Calculate choice specific wages including productivity shock
    period_wages = wage_systematic * exp_draws
    
    # Return function output
    return period_wages # This is a vector, difference between choices comes from disturbance term.

In [9]:
def calculate_consumption_utilities(period_wages):
    """Calculate the first part of the period utilities related to consumption"""
    
    # Initialize container
    consumption_utilities = np.tile(np.nan, num_choices)
    
    # Define hours array, possibly move to another file
    hours = np.array([0, 18, 38])
    
    # Calculate choice specific wages including productivity shock
    consumption_utilities = hours * period_wages
    consumption_utilities[1]  = consumption_utilities[1]**mu/mu
    consumption_utilities[2]  = consumption_utilities[2]**mu/mu
    
    # Return function output
    return consumption_utilities

In [10]:
def calculate_total_utilities(consumption_utilities, optim_paras):
    """Calculate total flow utility for all choices."""
    
    # Initialize container for utilities at state space point and period
    total_utilities = np.tile(np.nan, num_choices)
    
    # Calculate U(.) for the three available choices
    U_ = np.array([math.exp(0.00),  math.exp(optim_paras[12]), math.exp(optim_paras[13])])
    
    # Calculate utilities for the avaibale joices N, P, F
    total_utilities = consumption_utilities * U_
    
    # Return function_output
    return total_utilities

In [11]:
def calculate_utilities(educ_level, exp_p, exp_f, optim_paras, draws):
    """Calculate flow utilities for all choices given state, period, and shocks."""
    
    # Calculate wage net of period productivity shock
    wage_systematic = calculate_wage_systematic(educ_level, exp_p, exp_f, optim_paras)
    
    # Calculate period wages for the three choices includings chocks' realizations
    period_wages = calculate_period_wages(wage_systematic, draws)
    
    # Calculate 1st part of the period flow utility related to consumption
    consumption_utilities = calculate_consumption_utilities(period_wages)
    
    # Calculate total utility by multiplying U(.) component
    utilities = calculate_total_utilities(consumption_utilities, optim_paras)
    
    # Return function output
    return utilities, consumption_utilities, period_wages, wage_systematic

In [12]:
def calculate_continuation_values (period, educ_years_idx, exp_p, exp_f):
    """Obtain continuation values for all choices."""

    # Initialize container for continuation values
    continuation_values = np.tile(MISSING_FLOAT, num_choices)

    if period != (num_periods - 1):

        # Choice: Non-employment
        # Create index for extracting the continuation value
        future_idx = mapping_states_index[period + 1, educ_years_idx, 0, exp_p, exp_f]
        # Extract continuation value
        continuation_values[0] = periods_emax[period + 1, future_idx] 

        # Choice: Part-time
        future_idx = mapping_states_index[period + 1, educ_years_idx, 1, exp_p + 1, exp_f]
        continuation_values[1] = periods_emax[period + 1, future_idx]

        # Choice: Full-time
        future_idx = mapping_states_index[period + 1, educ_years_idx, 2, exp_p, exp_f + 1]
        continuation_values[2] = periods_emax[period + 1, future_idx]
    
    else:
        continuation_values = np.tile(0.0, num_choices)
        
    # Record function output
    return continuation_values

In [13]:
def extract_individual_covariates (i):
    """Constructs additional covariates given agent indicator."""
    
    # Determine education level given number of years of education
    # Would it be more efficient to do this somewhere else?

    # Unpack state space components
    educ_years_i = educ_years[i]

    # Extract education information
    if (educ_years_i <= 10):
        educ_level = [1,0,0]

    elif (educ_years_i > 10) and (educ_years_i <= 12):
        educ_level = [0,1,0]

    else:
        educ_level = [0,0,1]

    educ_years_idx = educ_years_i - educ_min
    
    # Return function output
    return educ_years_i, educ_level, educ_years_idx

Finally, we need to simulate a sample of initial conditions. In this example, we need to assing a value for the years of education to every agent whose life-cycle we want to simulate.

In [14]:
educ_years = list(range(educ_min, educ_max + 1))
educ_years = np.random.choice(educ_years, num_agents_sim)

Now we can simulate the model life-cycle experiences of the individuals.

In [15]:
# Start count over all simulations/row (number of agents times number of periods)
count = 0

# Initialize container for the final output
num_columns = 14 # count of the information units we wish to record
dataset = np.tile(MISSING_FLOAT, (num_agents_sim*num_periods, num_columns))

# Loop over all agents
for i in range(num_agents_sim):
    
    
    # Construct additional education information
    educ_years_i, educ_level, educ_years_idx = extract_individual_covariates (i)
    
    # Extract the indicator of the initial state for the individual
    # depending on the individuals initial condition
    initial_state_index = mapping_states_index[educ_years_idx, educ_years_idx, 0, 0, 0]
    
    # Assign the initial state as current state
    current_state = states_all[educ_years_idx, initial_state_index, :].copy()
    
    # Loop over all remaining
    for period in range(num_periods):
        
        # Extract state space components
        choice_lagged, exp_p, exp_f = current_state[1], current_state[2], current_state[3]
        
        # Look up the indicator for the current state
        k = mapping_states_index[period, educ_years_i - educ_min, choice_lagged, exp_p, exp_f]
        
        # Record agent identifier and current period number in the dataset
        dataset[count, :2] = i, period
        
        # Calculate choice specific value functions
        # for individual, period and state space point
        
        # Extract the error term draws corresponding to
        # period number and individual
        corresponding_draws = draws_sim[period, i, :]
        
        # Calculate correspongind flow utilities
        flow_utilities, consumption_utilities, period_wages, wage_systematic = calculate_utilities(educ_level,
                                                                                                   exp_p,
                                                                                                   exp_f,
                                                                                                   optim_paras,
                                                                                                   corresponding_draws)
        
        # Obtain continuation values for all choices
        continuation_values = calculate_continuation_values(period, educ_years_idx, exp_p, exp_f)
        
        # Calculate total values for all choices
        value_functions = flow_utilities + delta * continuation_values
        
        # Determine choice as option with highest choice specific value function
        max_idx = np.argmax(value_functions)
        
        
        # Record output
        # Record agent identifier, period number, and choice
        dataset[count, :2] = i, period, 
        dataset[count, 2:3] = educ_years_i
        dataset[count, 3:4] = max_idx
        dataset[count, 4:5] = wage_systematic
        dataset[count, 5:8] = period_wages[:]
        dataset[count, 8:11] = consumption_utilities[:]
        dataset[count, 11:14] = flow_utilities[:]
        
        
        # Update state space component experience
        current_state[max_idx + 1] += 1
        
        # Update state space component choice_lagged
        current_state[1] = max_idx
        
        # Update simulation/row count
        count += 1

Finally, we want to record the dataset as a Pandas Dataframe.

In [16]:
def replace_missing_values (arguments):
    """Replace MISSING_FLOAT with NAN."""
    
    # Antibugging
    assert isinstance(arguments, tuple) or isinstance(arguments, np.ndarray)

    if isinstance(arguments, np.ndarray):
        arguments = (arguments,)

    rslt = tuple()

    for argument in arguments:
        
        # Transform to float array to evaluate missing values
        argument_internal = np.asfarray(argument)

        # Determine missing values
        is_missing = argument_internal == MISSING_FLOAT
        if np.any(is_missing):
            # Replace missing values
            argument = np.asfarray(argument)
            argument[is_missing] = np.nan

        rslt += (argument,)

    # Align interface
    if len(rslt) == 1:
        rslt = rslt[0]

    # Function output
    return rslt

In [17]:
# Create fixed objects needed to record simulated dataset to Pandas Dataframe

# Define column lables
DATA_LABLES_SIM = []
DATA_LABLES_SIM += ["Identifier", "Period"]
DATA_LABLES_SIM += ["Years of Education"]
DATA_LABLES_SIM += ["Choice"]
DATA_LABLES_SIM += ["Systematic Wage"]
DATA_LABLES_SIM += ["Period Wage N", "Period Wage P", "Period Wage F"]
DATA_LABLES_SIM += ["Consumption Utility N", "Consumption Utility P", "Consumption Utility F"]
DATA_LABLES_SIM += ["Flow Utility N", "Flow Utility P", "Flow Utility F"]

# Define data types for data set columns
DATA_FORMATS_SIM = dict()
for key_ in DATA_LABLES_SIM:
    DATA_FORMATS_SIM[key_] = np.int
    if key_ in ["Systematic Wage",
                "Period Wage N",
                "Period Wage P",
                "Period_wage F",
                "Consumption Utility N",
                "Consumption Utility P",
                "Consumption Utility F",
                "Flow Utility N",
                "Flow Utility P",
                "Flow Utility F"]:
        DATA_FORMATS_SIM[key_] = np.float

In [18]:
# Create data frame from simulated dataset
data_frame = pd.DataFrame(
    data = replace_missing_values(dataset), columns = DATA_LABLES_SIM
)

# Set specific columns to desired data types
data_frame = data_frame.astype(DATA_FORMATS_SIM)

# Define identifier for unique observation in the data frame
data_frame.set_index(["Identifier", "Period"], drop=False, inplace=True)

In [19]:
data_frame

Unnamed: 0_level_0,Unnamed: 1_level_0,Identifier,Period,Years of Education,Choice,Systematic Wage,Period Wage N,Period Wage P,Period Wage F,Consumption Utility N,Consumption Utility P,Consumption Utility F,Flow Utility N,Flow Utility P,Flow Utility F
Identifier,Period,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
0,0,0,0,12,0,7.679833,22.122486,48.926609,497,0.0,-0.040060,-0.007197,0.0,-0.044097,-0.008082
0,1,0,1,12,0,7.679833,20.817364,14.399362,3,0.0,-0.079467,-0.111763,0.0,-0.087474,-0.125510
0,2,0,2,12,0,7.679833,6.439254,6.760098,4,0.0,-0.121363,-0.102219,0.0,-0.133591,-0.114792
0,3,0,3,12,0,7.679833,12.411202,1.711864,15,0.0,-0.261889,-0.051058,0.0,-0.288276,-0.057337
0,4,0,4,12,0,7.679833,10.486539,44.889694,1,0.0,-0.042039,-0.163565,0.0,-0.046275,-0.183683
0,5,0,5,12,0,7.679833,19.762919,5.968693,50,0.0,-0.130127,-0.025774,0.0,-0.143238,-0.028944
0,6,0,6,12,0,7.679833,13.158487,901.415445,18,0.0,-0.007836,-0.045548,0.0,-0.008626,-0.051151
0,7,0,7,12,0,7.679833,57.364070,119.078136,0,0.0,-0.024344,-3.358203,0.0,-0.026797,-3.771248
0,8,0,8,12,0,7.679833,6.261082,1.914417,255,0.0,-0.245991,-0.010451,0.0,-0.270777,-0.011736
0,9,0,9,12,0,7.679833,6.376684,11.370673,8,0.0,-0.090702,-0.068680,0.0,-0.099842,-0.077127
