In [1]:
import numpy as np
import pandas as pd
from skillmodels.config import TEST_DIR
import yaml
from skillmodels.simulate_data import simulate_dataset
from skillmodels.likelihood_function import get_maximization_inputs


# How to simulate dataset



Below we show how to simulate dataset for a test model. 

## Getting params

For more details on this check out the introductory tutorial. 

In [2]:
with open(TEST_DIR/"model2.yaml") as y:
        model_dict = yaml.load(y, Loader=yaml.FullLoader)

params = pd.read_csv(TEST_DIR / "regression_vault" / f"one_stage_anchoring.csv")
params = params.set_index(["category", "period", "name1", "name2"])


##  Simulated data without policy

In [3]:
control_means = pd.Series([0], index=["x1"])
control_sds = pd.Series([1], index=["x1"])

In [4]:
initial_data =simulate_dataset(model_dict, params, n_obs=100, control_means=control_means,
                             control_sds=control_sds, policies=None)
initial_data 



(            Q1_fac1  constant        x1        y1        y2        y3  \
 id period                                                               
 0  0       2.759489       1.0  0.298359  0.188541  2.211018  2.500775   
    1       0.162026       1.0  0.298359  0.794224  3.014125  0.221136   
    2       0.788402       1.0  0.298359 -0.563458 -0.272157  0.342180   
    3      -0.013867       1.0  0.298359 -1.013532  1.013031  1.834724   
    4       3.454170       1.0  0.298359  2.084928  3.041390  0.104724   
 ...             ...       ...       ...       ...       ...       ...   
 99 3      -1.075366       1.0  0.937389 -1.543976 -1.000913 -0.478862   
    4      -1.664305       1.0  0.937389 -1.352240 -0.301531  0.045044   
    5      -1.519237       1.0  0.937389  0.293441  0.427669 -0.471328   
    6       1.193672       1.0  0.937389 -0.735118  0.225063  1.208368   
    7       2.699464       1.0  0.937389  2.986717  1.738306  3.055116   
 
                  y4        y5      

##  Simulated data with policy

In [5]:
policies = [
        {"period": 0, "factor": "fac1", "effect_size": 0.2, "standard_deviation": 0.0},
        {"period": 1, "factor": "fac2", "effect_size": 0.1, "standard_deviation": 0.0}]

In [6]:
data_after_policies = simulate_dataset(model_dict, params, n_obs=100, control_means=control_means,
                             control_sds=control_sds, policies=policies)
data_after_policies

(            Q1_fac1  constant        x1        y1        y2        y3  \
 id period                                                               
 0  0       3.392551       1.0  1.233778  1.088742  3.069778  2.313672   
    1       0.320152       1.0  1.233778 -1.597029 -0.177177  0.787827   
    2       0.636779       1.0  1.233778  0.285468  0.142373  2.350087   
    3       1.421329       1.0  1.233778  0.095934  0.339438  1.299482   
    4       1.573849       1.0  1.233778  1.134013  1.122015  1.445268   
 ...             ...       ...       ...       ...       ...       ...   
 99 3       1.486454       1.0 -0.594728  2.220170  0.915202  1.636390   
    4       1.457270       1.0 -0.594728  3.599744  3.636927  0.227899   
    5       2.893405       1.0 -0.594728  2.648821 -0.794044  0.828340   
    6       1.410918       1.0 -0.594728  0.237525  1.960261 -0.144451   
    7       1.238352       1.0 -0.594728  1.676059  1.631343  0.657746   
 
                  y4        y5      