TO DO: Set seed for generator of initial conditions.

In [1]:
import pickle
import numpy as np
import pandas as pd
import math
import yaml

We import the model specification parameters and externally defined constants here.

In [2]:
# Import specified definitions only from given notebook
import ipynb.fs
from .defs.shared_constants import MISSING_INT, MISSING_FLOAT
from .defs.shared_auxiliary import draw_disturbances
from .defs.read import read_init_file
from .defs.read import init_dict_to_attr_dict

from .defs.shared_auxiliary import calculate_wage_systematic
from .defs.shared_auxiliary import calculate_period_wages
from .defs.shared_auxiliary import calculate_consumption_utilities
from .defs.shared_auxiliary import calculate_total_utilities
from .defs.shared_auxiliary import calculate_utilities
from .defs.shared_auxiliary import calculate_continuation_values

In [3]:
# Read in initialization file as attr_dict
attr_dict = read_init_file('toy_model_init_file.yml')

In [4]:
# Import the final output of pyth_create_state_space, args
# In the modular implementation pyth_create_state_space will be called by by pyth_solve
# pyth_solve is executed before pyth_simulate
file_name = "args_file.pkl"
# Open the file for reading
file_object = open(file_name,'rb')  
# load the object from the file into var args
args = pickle.load(file_object)

In [5]:
# Unpack objects from agrs
states_all, states_number_period, mapping_states_index, max_states_period = args[0], args[1], args[2], args[3]

In [6]:
# Import the final output of pyth_backward_induction, periods_emax
# In the modular implementation pyth_create_state_space will be called by by pyth_solve
# pyth_solve is executed before pyth_simulate
file_name = "periods_emax_file.pkl"
# Open the file for reading
file_object = open(file_name,'rb')  
# load the object from the file into var args
periods_emax = pickle.load(file_object)

Then, we need to define additional function called in the loop to determine agents choices. 

In [7]:
def extract_individual_covariates (educ_years, educ_min, i):
    """Constructs additional covariates given agent indicator."""
    
    # Determine education level given number of years of education
    # Would it be more efficient to do this somewhere else?

    # Unpack state space components
    educ_years_i = educ_years[i]

    # Extract education information
    if (educ_years_i <= 10):
        educ_level = [1,0,0]

    elif (educ_years_i > 10) and (educ_years_i <= 12):
        educ_level = [0,1,0]

    else:
        educ_level = [0,0,1]

    educ_years_idx = educ_years_i - educ_min
    
    # Return function output
    return educ_years_i, educ_level, educ_years_idx

In [8]:
def calculate_continuation_values (attr_dict, period, educ_years_idx, exp_p, exp_f):
    """Obtain continuation values for all choices."""
    
    # Unpack attributes from the model specification:
    num_choices = attr_dict['GENERAL']['num_choices']
    num_periods = attr_dict['GENERAL']['num_periods']

    # Initialize container for continuation values
    continuation_values = np.tile(MISSING_FLOAT, num_choices)

    if period != (num_periods - 1):

        # Choice: Non-employment
        # Create index for extracting the continuation value
        future_idx = mapping_states_index[period + 1, educ_years_idx, 0, exp_p, exp_f]
        # Extract continuation value
        continuation_values[0] = periods_emax[period + 1, future_idx] 

        # Choice: Part-time
        future_idx = mapping_states_index[period + 1, educ_years_idx, 1, exp_p + 1, exp_f]
        continuation_values[1] = periods_emax[period + 1, future_idx]

        # Choice: Full-time
        future_idx = mapping_states_index[period + 1, educ_years_idx, 2, exp_p, exp_f + 1]
        continuation_values[2] = periods_emax[period + 1, future_idx]
    
    else:
        continuation_values = np.tile(0.0, num_choices)
        
    # Record function output
    return continuation_values

In [9]:
def pyth_simulate(attr_dict, mapping_states_index, states_all):
    """Simulate agent experiences."""
    
    # Unpack parameter from the model specification
    educ_min = attr_dict['INITIAL_CONDITIONS']['educ_min']
    educ_max = attr_dict['INITIAL_CONDITIONS']['educ_max']
    num_periods = attr_dict['GENERAL']['num_periods']
    num_agents_sim = attr_dict['SIMULATION']['num_agents_sim']
    seed_sim = attr_dict['SIMULATION']['seed_sim']
    shocks_cov = attr_dict['DERIVED_ATTR']['shocks_cov']
    optim_paras = attr_dict['PARAMETERS']['optim_paras']
    delta = attr_dict['CONSTANTS']['delta']

    educ_years = list(range(educ_min, educ_max + 1))
    educ_years = np.random.choice(educ_years, num_agents_sim)

    # Create draws for simulated sample
    draws_sim = draw_disturbances((num_periods, num_agents_sim), shocks_cov, seed_sim)

    # Start count over all simulations/row (number of agents times number of periods)
    count = 0

    # Initialize container for the final output
    num_columns = 14 # count of the information units we wish to record
    dataset = np.tile(MISSING_FLOAT, (num_agents_sim*num_periods, num_columns))

    # Loop over all agents
    for i in range(num_agents_sim):


        # Construct additional education information
        educ_years_i, educ_level, educ_years_idx = extract_individual_covariates (educ_years, educ_min, i)

        # Extract the indicator of the initial state for the individual
        # depending on the individuals initial condition
        initial_state_index = mapping_states_index[educ_years_idx, educ_years_idx, 0, 0, 0]

        # Assign the initial state as current state
        current_state = states_all[educ_years_idx, initial_state_index, :].copy()

        # Loop over all remaining
        for period in range(num_periods):

            # Extract state space components
            choice_lagged, exp_p, exp_f = current_state[1], current_state[2], current_state[3]

            # Look up the indicator for the current state
            k = mapping_states_index[period, educ_years_i - educ_min, choice_lagged, exp_p, exp_f]

            # Record agent identifier and current period number in the dataset
            dataset[count, :2] = i, period

            # Calculate choice specific value functions
            # for individual, period and state space point

            # Extract the error term draws corresponding to
            # period number and individual
            corresponding_draws = draws_sim[period, i, :]

            # Calculate correspongind flow utilities
            flow_utilities, consumption_utilities, period_wages, wage_systematic = calculate_utilities(attr_dict,
                                                                                                       educ_level,
                                                                                                       exp_p,
                                                                                                       exp_f,
                                                                                                       optim_paras,
                                                                                                       corresponding_draws)

            # Obtain continuation values for all choices
            continuation_values = calculate_continuation_values(attr_dict,
                                                                period,
                                                                educ_years_idx,
                                                                exp_p,
                                                                exp_f)

            # Calculate total values for all choices
            value_functions = flow_utilities + delta * continuation_values

            # Determine choice as option with highest choice specific value function
            max_idx = np.argmax(value_functions)


            # Record output
            # Record agent identifier, period number, and choice
            dataset[count, :2] = i, period, 
            dataset[count, 2:3] = educ_years_i
            dataset[count, 3:4] = max_idx
            dataset[count, 4:5] = wage_systematic
            dataset[count, 5:8] = period_wages[:]
            dataset[count, 8:11] = consumption_utilities[:]
            dataset[count, 11:14] = flow_utilities[:]


            # Update state space component experience
            current_state[max_idx + 1] += 1

            # Update state space component choice_lagged
            current_state[1] = max_idx

            # Update simulation/row count
            count += 1
    
    # Return function output
    return dataset

In [10]:
dataset = pyth_simulate(attr_dict, mapping_states_index, states_all)

Finally, we want to record the dataset as a Pandas Dataframe.

In [11]:
def replace_missing_values (arguments):
    """Replace MISSING_FLOAT with NAN."""
    
    # Antibugging
    assert isinstance(arguments, tuple) or isinstance(arguments, np.ndarray)

    if isinstance(arguments, np.ndarray):
        arguments = (arguments,)

    rslt = tuple()

    for argument in arguments:
        
        # Transform to float array to evaluate missing values
        argument_internal = np.asfarray(argument)

        # Determine missing values
        is_missing = argument_internal == MISSING_FLOAT
        if np.any(is_missing):
            # Replace missing values
            argument = np.asfarray(argument)
            argument[is_missing] = np.nan

        rslt += (argument,)

    # Align interface
    if len(rslt) == 1:
        rslt = rslt[0]

    # Function output
    return rslt

In [12]:
# Create fixed objects needed to record simulated dataset to Pandas Dataframe

# Define column lables
DATA_LABLES_SIM = []
DATA_LABLES_SIM += ["Identifier", "Period"]
DATA_LABLES_SIM += ["Years of Education"]
DATA_LABLES_SIM += ["Choice"]
DATA_LABLES_SIM += ["Systematic Wage"]
DATA_LABLES_SIM += ["Period Wage N", "Period Wage P", "Period Wage F"]
DATA_LABLES_SIM += ["Consumption Utility N", "Consumption Utility P", "Consumption Utility F"]
DATA_LABLES_SIM += ["Flow Utility N", "Flow Utility P", "Flow Utility F"]

# Define data types for data set columns
DATA_FORMATS_SIM = dict()
for key_ in DATA_LABLES_SIM:
    DATA_FORMATS_SIM[key_] = np.int
    if key_ in ["Systematic Wage",
                "Period Wage N",
                "Period Wage P",
                "Period Wage F",
                "Consumption Utility N",
                "Consumption Utility P",
                "Consumption Utility F",
                "Flow Utility N",
                "Flow Utility P",
                "Flow Utility F"]:
        DATA_FORMATS_SIM[key_] = np.float

In [13]:
# Create data frame from simulated dataset
data_frame = pd.DataFrame(
    data = replace_missing_values(dataset), columns = DATA_LABLES_SIM
)

# Set specific columns to desired data types
data_frame = data_frame.astype(DATA_FORMATS_SIM)

# Define identifier for unique observation in the data frame
data_frame.set_index(["Identifier", "Period"], drop=False, inplace=True)

In [14]:
data_frame

Unnamed: 0_level_0,Unnamed: 1_level_0,Identifier,Period,Years of Education,Choice,Systematic Wage,Period Wage N,Period Wage P,Period Wage F,Consumption Utility N,Consumption Utility P,Consumption Utility F,Flow Utility N,Flow Utility P,Flow Utility F
Identifier,Period,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
0,0,0,0,11,2,5.147946,14.829146,15.636876,62.846812,-0.141488,-0.075881,-0.022914,-0.141488,-0.083527,-0.025732
0,1,0,1,11,1,9.920092,26.889929,14.464738,6.410796,-0.141488,-0.079266,-0.082274,-0.141488,-0.087252,-0.092393
0,2,0,2,11,2,11.380368,9.542015,10.541856,8.092601,-0.141488,-0.094629,-0.072211,-0.141488,-0.104164,-0.081093
0,3,0,3,11,2,16.152514,26.103707,6.563123,24.164561,-0.141488,-0.123389,-0.039134,-0.141488,-0.135822,-0.043947
0,4,0,4,11,1,20.924660,28.571879,60.358069,8.991891,-0.141488,-0.035616,-0.068073,-0.141488,-0.039205,-0.076446
0,5,0,5,11,2,22.384936,57.604337,19.242982,69.658784,-0.141488,-0.067556,-0.021631,-0.141488,-0.074363,-0.024291
0,6,0,6,11,1,27.157082,46.530457,473.834901,45.914634,-0.141488,-0.011233,-0.027317,-0.141488,-0.012365,-0.030677
0,7,0,7,11,1,28.617359,213.755719,148.223720,0.482682,-0.141488,-0.021535,-0.350173,-0.141488,-0.023705,-0.393243
0,8,0,8,11,2,30.077635,24.521176,13.069369,246.211100,-0.141488,-0.083899,-0.010666,-0.141488,-0.092353,-0.011978
0,9,0,9,11,2,34.849781,28.936314,44.102245,37.946453,-0.141488,-0.042458,-0.030395,-0.141488,-0.046736,-0.034133
