In [1]:
import statsmodels.formula.api as smf
import pandas as pd
import numpy as np
from sklearn import preprocessing

In [2]:
# mapping experiment setting to variance and std on early node
mapping = {
            'cogsci_learning': [-8, -4, 4, 8],
            'mini_variance'  : [-2, -1, 1, 2],
            'zero_variance'  : [1, 1, 1, 1],
            'large_variance' : [-48, -24, 24, 48],
            'high_increasing': [-4, -2, 2, 4]
                          }

In [3]:
considered_experiment_settings = ["high_increasing", "large_variance", "cogsci_learning", "mini_variance", "zero_variance"]

In [4]:
# load data
data = pd.DataFrame()
for experiment_setting in considered_experiment_settings:
    data_in = pd.read_csv(f"data/processed/simulated/{experiment_setting}/MCL/linear_depth/search_space/1729_depth_only_baseline_null.csv")
    data_in['experiment_setting']= experiment_setting
    data = pd.concat([data, data_in])


In [5]:
# create a new column with variance and std of the experiment settings
data["std_early_nodes"] = data["experiment_setting"].apply(lambda experiment_setting: np.std(mapping[experiment_setting]))
data["variance_early_nodes"] = data["experiment_setting"].apply(lambda experiment_setting: np.var(mapping[experiment_setting]))

In [6]:
## normalization
# variance
data["norm_variance_early_nodes"] = (
                                    (data["variance_early_nodes"]-data["variance_early_nodes"].min())/
                                    (data["variance_early_nodes"].max()-data["variance_early_nodes"].min()))
# std
data["norm_std_early_nodes"]      = (
                                    (data["std_early_nodes"]-data["std_early_nodes"].min())/
                                    (data["std_early_nodes"].max()-data["std_early_nodes"].min()))
# data["norm_std_early_nodes"] = preprocessing.Normalizer(data["std_early_nodes"])
# data["norm_variance_early_nodes"] = preprocessing.Normalizer(data["variance_early_nodes"])

In [7]:
data.head()

Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,w,taken_paths,costs,loss,ground_truth,trial_id,i_episode,actions,...,num_late,num_clicks,unbounded_present_bias,unbounded_loss,state,experiment_setting,std_early_nodes,variance_early_nodes,norm_variance_early_nodes,norm_std_early_nodes
0,0,0,"[0, 0, 0, 0, 0]","[1, 2, 3]",[54.0],,"[0.0, -2.0, 8.0, 48.0, -24.0, 2.0, -4.0, -48.0...",3827219843471436276,0,0,...,False,False,0.0,0.0,"(0, Cat, Cat, Cat, Cat, Cat, Cat, Cat, Cat, Ca...",high_increasing,3.162278,10.0,0.006944,0.083333
1,1,1,"[0.0, 0.0, 0.0, 15.5454392236338, 48.857094702...","[5, 6, 8]","[-1.0, -1.0, -1.0, 20.0]",,"[0.0, -2.0, -4.0, -24.0, -48.0, 4.0, -8.0, 24....",-4289689371375290671,1,9,...,False,True,8.0,-28.0,"(0, Cat, Cat, Cat, Cat, Cat, Cat, Cat, Cat, Ca...",high_increasing,3.162278,10.0,0.006944,0.083333
2,2,1,"[0.0, 0.0, 0.0, 15.5454392236338, 48.857094702...","[5, 6, 8]","[-1.0, -1.0, -1.0, 20.0]",,"[0.0, -2.0, -4.0, -24.0, -48.0, 4.0, -8.0, 24....",-4289689371375290671,1,1,...,False,True,8.0,-28.0,"(0, Cat, Cat, Cat, Cat, Cat, Cat, Cat, Cat, -4...",high_increasing,3.162278,10.0,0.006944,0.083333
3,3,1,"[0.0, 0.0, 0.0, 15.5454392236338, 48.857094702...","[5, 6, 8]","[-1.0, -1.0, -1.0, 20.0]",,"[0.0, -2.0, -4.0, -24.0, -48.0, 4.0, -8.0, 24....",-4289689371375290671,1,5,...,False,True,8.0,-28.0,"(0, -2.0, Cat, Cat, Cat, Cat, Cat, Cat, Cat, -...",high_increasing,3.162278,10.0,0.006944,0.083333
4,4,1,"[0.0, 0.0, 0.0, 15.5454392236338, 48.857094702...","[5, 6, 8]","[-1.0, -1.0, -1.0, 20.0]",,"[0.0, -2.0, -4.0, -24.0, -48.0, 4.0, -8.0, 24....",-4289689371375290671,1,0,...,False,False,8.0,-28.0,"(0, -2.0, Cat, Cat, Cat, 4.0, Cat, Cat, Cat, -...",high_increasing,3.162278,10.0,0.006944,0.083333


In [14]:
data.columns

Index(['Unnamed: 0', 'Unnamed: 0.1', 'w', 'taken_paths', 'costs', 'loss',
       'ground_truth', 'trial_id', 'i_episode', 'actions', 'return',
       'full_actions', 'sim_experiment_setting', 'sim_model_yaml',
       'sim_feature_yaml', 'sim_prior_json', 'sim_constant_yaml',
       'sim_cost_function', 'sim_cost_parameter_values', 'sim_num_simulated',
       'sim_num_trials', 'static_cost_weight', 'depth_cost_weight', 'pid',
       'num_early', 'num_middle', 'num_late', 'num_clicks',
       'unbounded_present_bias', 'unbounded_loss', 'state',
       'experiment_setting', 'std_early_nodes', 'variance_early_nodes',
       'norm_variance_early_nodes', 'norm_std_early_nodes'],
      dtype='object')

In [7]:
# group data by factors
grouped_data = data.groupby(["pid", "i_episode", "sim_cost_parameter_values", "sim_experiment_setting"], as_index=False).sum()

In [8]:
grouped_data.head()

Unnamed: 0.2,pid,i_episode,sim_cost_parameter_values,sim_experiment_setting,Unnamed: 0,Unnamed: 0.1,loss,trial_id,actions,return,...,num_early,num_middle,num_late,num_clicks,unbounded_present_bias,unbounded_loss,std_early_nodes,variance_early_nodes,norm_variance_early_nodes,norm_std_early_nodes
0,0,0,"-1.0,2.5",cogsci_learning,5699016.0,0.0,0.0,2.439592e+19,8.0,120.0,...,1.0,0.0,1.0,2.0,0.0,0.0,18.973666,120.0,0.083333,0.5
1,0,0,"-1.0,2.5",high_increasing,3495651.0,0.0,0.0,-1.702437e+19,11.0,31.0,...,0.0,0.0,1.0,1.0,0.0,-16.0,6.324555,20.0,0.013889,0.166667
2,0,0,"-1.0,2.5",large_variance,4156537.0,0.0,0.0,-1.118975e+19,7.0,-77.0,...,0.0,0.0,1.0,1.0,0.0,-224.0,75.894664,2880.0,2.0,2.0
3,0,0,"-1.0,2.5",mini_variance,5119206.0,0.0,0.0,-1.149878e+19,8.0,-189.0,...,1.0,0.0,1.0,2.0,6.0,-210.0,4.743416,7.5,0.005208,0.125
4,0,0,"-1.0,2.5",zero_variance,3645513.0,0.0,0.0,1.813679e+19,2.0,-46.0,...,0.0,1.0,0.0,1.0,0.0,-144.0,0.0,0.0,0.0,0.0


# 1. number of early nodes

### 1. early_node ~ depth_cost_weight and i_episode 

In [64]:
formula = "num_early ~ i_episode + i_episode:depth_cost_weight + depth_cost_weight + 1"
res = smf.ols(formula=formula, data=grouped_data).fit(
                    missing="drop"
                )
res.summary()

0,1,2,3
Dep. Variable:,num_early,R-squared:,0.005
Model:,OLS,Adj. R-squared:,0.005
Method:,Least Squares,F-statistic:,5183.0
Date:,"Fri, 06 Jan 2023",Prob (F-statistic):,0.0
Time:,21:20:17,Log-Likelihood:,-5242100.0
No. Observations:,3400000,AIC:,10480000.0
Df Residuals:,3399996,BIC:,10480000.0
Df Model:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,0.7834,0.001,643.347,0.000,0.781,0.786
i_episode,-0.0015,2.13e-05,-68.849,0.000,-0.002,-0.001
i_episode:depth_cost_weight,-2.859e-05,1.85e-06,-15.440,0.000,-3.22e-05,-2.5e-05
depth_cost_weight,-0.0043,0.000,-40.353,0.000,-0.004,-0.004

0,1,2,3
Omnibus:,559576.601,Durbin-Watson:,1.946
Prob(Omnibus):,0.0,Jarque-Bera (JB):,895813.867
Skew:,1.257,Prob(JB):,0.0
Kurtosis:,2.939,Cond. No.,1290.0


In [65]:
# 1. cogsci_learning
cogsci_learning_grouped_data = grouped_data.loc[grouped_data['sim_experiment_setting'] == 'cogsci_learning']
res = smf.ols(formula=formula, data=cogsci_learning_grouped_data).fit(
                    missing="drop"
                )
res.summary()

0,1,2,3
Dep. Variable:,num_early,R-squared:,0.016
Model:,OLS,Adj. R-squared:,0.016
Method:,Least Squares,F-statistic:,3639.0
Date:,"Fri, 06 Jan 2023",Prob (F-statistic):,0.0
Time:,21:20:25,Log-Likelihood:,-1045500.0
No. Observations:,680000,AIC:,2091000.0
Df Residuals:,679996,BIC:,2091000.0
Df Model:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,0.7766,0.003,285.595,0.000,0.771,0.782
i_episode,-0.0024,4.75e-05,-50.243,0.000,-0.002,-0.002
i_episode:depth_cost_weight,3.22e-06,3.82e-06,0.844,0.399,-4.26e-06,1.07e-05
depth_cost_weight,-0.0103,0.000,-47.457,0.000,-0.011,-0.010

0,1,2,3
Omnibus:,123625.866,Durbin-Watson:,1.986
Prob(Omnibus):,0.0,Jarque-Bera (JB):,206557.958
Skew:,1.347,Prob(JB):,0.0
Kurtosis:,3.183,Cond. No.,1420.0


In [66]:
# 2. high_increasing
high_increasing_grouped_data = grouped_data.loc[grouped_data['sim_experiment_setting'] == 'high_increasing']
res = smf.ols(formula=formula, data=high_increasing_grouped_data).fit(
                    missing="drop"
                )
res.summary()

0,1,2,3
Dep. Variable:,num_early,R-squared:,0.016
Model:,OLS,Adj. R-squared:,0.016
Method:,Least Squares,F-statistic:,3626.0
Date:,"Fri, 06 Jan 2023",Prob (F-statistic):,0.0
Time:,21:20:31,Log-Likelihood:,-1008000.0
No. Observations:,680000,AIC:,2016000.0
Df Residuals:,679996,BIC:,2016000.0
Df Model:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,0.7231,0.003,281.930,0.000,0.718,0.728
i_episode,-0.0029,4.48e-05,-65.052,0.000,-0.003,-0.003
i_episode:depth_cost_weight,-0.0001,4.41e-06,-26.155,0.000,-0.000,-0.000
depth_cost_weight,-0.0046,0.000,-18.436,0.000,-0.005,-0.004

0,1,2,3
Omnibus:,160977.288,Durbin-Watson:,1.932
Prob(Omnibus):,0.0,Jarque-Bera (JB):,301599.642
Skew:,1.57,Prob(JB):,0.0
Kurtosis:,3.883,Cond. No.,1130.0


In [67]:
# 3. large_variance
large_variance_grouped_data = grouped_data.loc[grouped_data['sim_experiment_setting'] == 'large_variance']
res = smf.ols(formula=formula, data=large_variance_grouped_data).fit(
                    missing="drop"
                )
res.summary()

0,1,2,3
Dep. Variable:,num_early,R-squared:,0.005
Model:,OLS,Adj. R-squared:,0.005
Method:,Least Squares,F-statistic:,1160.0
Date:,"Fri, 06 Jan 2023",Prob (F-statistic):,0.0
Time:,21:20:35,Log-Likelihood:,-1098600.0
No. Observations:,680000,AIC:,2197000.0
Df Residuals:,679996,BIC:,2197000.0
Df Model:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,1.0617,0.003,361.682,0.000,1.056,1.067
i_episode,0.0026,5.13e-05,51.596,0.000,0.003,0.003
i_episode:depth_cost_weight,7.885e-05,3.7e-06,21.306,0.000,7.16e-05,8.61e-05
depth_cost_weight,-0.0062,0.000,-29.171,0.000,-0.007,-0.006

0,1,2,3
Omnibus:,7290789.236,Durbin-Watson:,1.995
Prob(Omnibus):,0.0,Jarque-Bera (JB):,76724.692
Skew:,0.446,Prob(JB):,0.0
Kurtosis:,1.617,Cond. No.,1580.0


In [68]:
# 4. mini_variance
mini_variance_grouped_data = grouped_data.loc[grouped_data['sim_experiment_setting'] == 'mini_variance']
res = smf.ols(formula=formula, data=mini_variance_grouped_data).fit(
                    missing="drop"
                )
res.summary()

0,1,2,3
Dep. Variable:,num_early,R-squared:,0.009
Model:,OLS,Adj. R-squared:,0.009
Method:,Least Squares,F-statistic:,2068.0
Date:,"Fri, 06 Jan 2023",Prob (F-statistic):,0.0
Time:,21:20:39,Log-Likelihood:,-1000800.0
No. Observations:,680000,AIC:,2002000.0
Df Residuals:,679996,BIC:,2002000.0
Df Model:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,0.6920,0.003,272.639,0.000,0.687,0.697
i_episode,-0.0024,4.43e-05,-53.733,0.000,-0.002,-0.002
i_episode:depth_cost_weight,-9.712e-05,4.18e-06,-23.212,0.000,-0.000,-8.89e-05
depth_cost_weight,-0.0021,0.000,-8.812,0.000,-0.003,-0.002

0,1,2,3
Omnibus:,164057.367,Durbin-Watson:,1.876
Prob(Omnibus):,0.0,Jarque-Bera (JB):,310655.061
Skew:,1.588,Prob(JB):,0.0
Kurtosis:,3.939,Cond. No.,1170.0


In [69]:
# 5. zero_variance
zero_variance_grouped_data = grouped_data.loc[grouped_data['sim_experiment_setting'] == 'zero_variance']
res = smf.ols(formula=formula, data=zero_variance_grouped_data).fit(
                    missing="drop"
                )
res.summary()

0,1,2,3
Dep. Variable:,num_early,R-squared:,0.007
Model:,OLS,Adj. R-squared:,0.007
Method:,Least Squares,F-statistic:,1623.0
Date:,"Fri, 06 Jan 2023",Prob (F-statistic):,0.0
Time:,21:20:42,Log-Likelihood:,-986470.0
No. Observations:,680000,AIC:,1973000.0
Df Residuals:,679996,BIC:,1973000.0
Df Model:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,0.6474,0.002,260.199,0.000,0.643,0.652
i_episode,-0.0021,4.34e-05,-47.357,0.000,-0.002,-0.002
i_episode:depth_cost_weight,-0.0001,4.41e-06,-23.092,0.000,-0.000,-9.31e-05
depth_cost_weight,0.0107,0.000,42.088,0.000,0.010,0.011

0,1,2,3
Omnibus:,187883.566,Durbin-Watson:,1.94
Prob(Omnibus):,0.0,Jarque-Bera (JB):,388989.177
Skew:,1.719,Prob(JB):,0.0
Kurtosis:,4.383,Cond. No.,1100.0


In [None]:
#print(cogsci_learning_grouped_data["sim_experiment_setting"].unique())
#print(high_increasing_grouped_data["sim_experiment_setting"].unique())
#print(large_variance_grouped_data["sim_experiment_setting"].unique())
#print(mini_variance_grouped_data["sim_experiment_setting"].unique())
#print(zero_variance_grouped_data["sim_experiment_setting"].unique())

### 2. early_node ~ experiment_setting


In [44]:
formula = "num_early ~ i_episode + i_episode:depth_cost_weight + depth_cost_weight + sim_experiment_setting + sim_experiment_setting:i_episode + sim_experiment_setting:i_episode:depth_cost_weight + sim_experiment_setting:depth_cost_weight + 1"
res = smf.ols(formula=formula, data=grouped_data).fit(
                    missing="drop"
                )
res.summary()

0,1,2,3
Dep. Variable:,num_early,R-squared:,0.056
Model:,OLS,Adj. R-squared:,0.056
Method:,Least Squares,F-statistic:,10630.0
Date:,"Fri, 06 Jan 2023",Prob (F-statistic):,0.0
Time:,20:42:04,Log-Likelihood:,-5151800.0
No. Observations:,3400000,AIC:,10300000.0
Df Residuals:,3399980,BIC:,10300000.0
Df Model:,19,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,0.7766,0.003,292.018,0.000,0.771,0.782
sim_experiment_setting[T.high_increasing],-0.0534,0.004,-14.228,0.000,-0.061,-0.046
sim_experiment_setting[T.large_variance],0.2851,0.004,75.876,0.000,0.278,0.292
sim_experiment_setting[T.mini_variance],-0.0845,0.004,-22.515,0.000,-0.092,-0.077
sim_experiment_setting[T.zero_variance],-0.1292,0.004,-34.385,0.000,-0.137,-0.122
i_episode,-0.0024,4.65e-05,-51.373,0.000,-0.002,-0.002
sim_experiment_setting[T.high_increasing]:i_episode,-0.0005,6.56e-05,-8.025,0.000,-0.001,-0.000
sim_experiment_setting[T.large_variance]:i_episode,0.0050,6.57e-05,76.660,0.000,0.005,0.005
sim_experiment_setting[T.mini_variance]:i_episode,4.513e-06,6.56e-05,0.069,0.945,-0.000,0.000

0,1,2,3
Omnibus:,562601.603,Durbin-Watson:,1.916
Prob(Omnibus):,0.0,Jarque-Bera (JB):,898401.102
Skew:,1.255,Prob(JB):,0.0
Kurtosis:,3.199,Cond. No.,7580.0




### 3. early_node ~ norm_variance_early_nodes

- All environment_settings

In [45]:
formula = "num_early ~ i_episode + i_episode:depth_cost_weight + depth_cost_weight + norm_variance_early_nodes + norm_variance_early_nodes:i_episode + norm_variance_early_nodes:i_episode:depth_cost_weight + norm_variance_early_nodes:depth_cost_weight + 1"
res = smf.ols(formula=formula, data=grouped_data).fit(
                    missing="drop"
                )
res.summary()

0,1,2,3
Dep. Variable:,num_early,R-squared:,0.119
Model:,OLS,Adj. R-squared:,0.119
Method:,Least Squares,F-statistic:,65650.0
Date:,"Fri, 06 Jan 2023",Prob (F-statistic):,0.0
Time:,20:42:23,Log-Likelihood:,-5034400.0
No. Observations:,3400000,AIC:,10070000.0
Df Residuals:,3399992,BIC:,10070000.0
Df Model:,7,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,0.6485,0.001,525.696,0.000,0.646,0.651
i_episode,-0.0019,2.16e-05,-88.906,0.000,-0.002,-0.002
i_episode:depth_cost_weight,-4.081e-05,2.03e-06,-20.108,0.000,-4.48e-05,-3.68e-05
depth_cost_weight,-0.0015,0.000,-12.816,0.000,-0.002,-0.001
norm_variance_early_nodes,0.1581,0.001,294.696,0.000,0.157,0.159
norm_variance_early_nodes:i_episode,0.0004,9.37e-06,40.127,0.000,0.000,0.000
norm_variance_early_nodes:i_episode:depth_cost_weight,1.916e-05,4.3e-07,44.563,0.000,1.83e-05,2e-05
norm_variance_early_nodes:depth_cost_weight,0.0004,2.35e-05,17.179,0.000,0.000,0.000

0,1,2,3
Omnibus:,619902.746,Durbin-Watson:,1.929
Prob(Omnibus):,0.0,Jarque-Bera (JB):,1018112.631
Skew:,1.308,Prob(JB):,0.0
Kurtosis:,3.583,Cond. No.,6770.0


### 4. early_node ~ norm_std_early_nodes

- All experiment settings

In [63]:
formula = "num_early ~ i_episode + i_episode:depth_cost_weight + depth_cost_weight + norm_std_early_nodes + norm_std_early_nodes:i_episode + norm_std_early_nodes:i_episode:depth_cost_weight + norm_std_early_nodes:depth_cost_weight + 1"
res = smf.ols(formula=formula, data=grouped_data).fit(
                    missing="drop"
                )
res.summary()

0,1,2,3
Dep. Variable:,num_early,R-squared:,0.152
Model:,OLS,Adj. R-squared:,0.152
Method:,Least Squares,F-statistic:,86840.0
Date:,"Fri, 06 Jan 2023",Prob (F-statistic):,0.0
Time:,21:04:46,Log-Likelihood:,-4970300.0
No. Observations:,3400000,AIC:,9941000.0
Df Residuals:,3399992,BIC:,9941000.0
Df Model:,7,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,0.5810,0.001,461.654,0.000,0.579,0.583
i_episode,-0.0019,2.21e-05,-87.072,0.000,-0.002,-0.002
i_episode:depth_cost_weight,-4.242e-05,2.11e-06,-20.117,0.000,-4.66e-05,-3.83e-05
depth_cost_weight,0.0004,0.000,3.634,0.000,0.000,0.001
norm_std_early_nodes,0.1918,0.001,356.089,0.000,0.191,0.193
norm_std_early_nodes:i_episode,0.0003,9.42e-06,28.334,0.000,0.000,0.000
norm_std_early_nodes:i_episode:depth_cost_weight,1.892e-05,4.4e-07,42.959,0.000,1.81e-05,1.98e-05
norm_std_early_nodes:depth_cost_weight,0.0003,2.4e-05,12.879,0.000,0.000,0.000

0,1,2,3
Omnibus:,605881.398,Durbin-Watson:,1.938
Prob(Omnibus):,0.0,Jarque-Bera (JB):,983329.869
Skew:,1.277,Prob(JB):,0.0
Kurtosis:,3.647,Cond. No.,7170.0


- Individual experiment setting

# 2. present bias

In [9]:
# define early and late ratio
grouped_data['early_ratio'] = grouped_data['num_early']/grouped_data['num_clicks']
grouped_data['late_ratio'] = 1- grouped_data['num_late']/grouped_data['num_clicks'] 

In [10]:
grouped_data.head()

Unnamed: 0.2,pid,i_episode,sim_cost_parameter_values,sim_experiment_setting,Unnamed: 0,Unnamed: 0.1,loss,trial_id,actions,return,...,num_late,num_clicks,unbounded_present_bias,unbounded_loss,std_early_nodes,variance_early_nodes,norm_variance_early_nodes,norm_std_early_nodes,early_ratio,late_ratio
0,0,0,"-1.0,2.5",cogsci_learning,5699016.0,0.0,0.0,2.439592e+19,8.0,120.0,...,1.0,2.0,0.0,0.0,18.973666,120.0,0.083333,0.5,0.5,0.5
1,0,0,"-1.0,2.5",high_increasing,3495651.0,0.0,0.0,-1.702437e+19,11.0,31.0,...,1.0,1.0,0.0,-16.0,6.324555,20.0,0.013889,0.166667,0.0,0.0
2,0,0,"-1.0,2.5",large_variance,4156537.0,0.0,0.0,-1.118975e+19,7.0,-77.0,...,1.0,1.0,0.0,-224.0,75.894664,2880.0,2.0,2.0,0.0,0.0
3,0,0,"-1.0,2.5",mini_variance,5119206.0,0.0,0.0,-1.149878e+19,8.0,-189.0,...,1.0,2.0,6.0,-210.0,4.743416,7.5,0.005208,0.125,0.5,0.5
4,0,0,"-1.0,2.5",zero_variance,3645513.0,0.0,0.0,1.813679e+19,2.0,-46.0,...,0.0,1.0,0.0,-144.0,0.0,0.0,0.0,0.0,0.0,1.0


### present bias

In [14]:
formula_present_bias = "early_ratio ~ i_episode + i_episode:depth_cost_weight + depth_cost_weight + 1"
res = smf.ols(formula=formula_present_bias, data=grouped_data).fit(
                    missing="drop"
                )
res.summary()

0,1,2,3
Dep. Variable:,early_ratio,R-squared:,0.033
Model:,OLS,Adj. R-squared:,0.033
Method:,Least Squares,F-statistic:,33240.0
Date:,"Tue, 17 Jan 2023",Prob (F-statistic):,0.0
Time:,17:44:40,Log-Likelihood:,-1086500.0
No. Observations:,2949808,AIC:,2173000.0
Df Residuals:,2949804,BIC:,2173000.0
Df Model:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,0.2928,0.000,709.524,0.000,0.292,0.294
i_episode,-0.0009,7.13e-06,-131.600,0.000,-0.001,-0.001
i_episode:depth_cost_weight,1.926e-05,5.8e-07,33.212,0.000,1.81e-05,2.04e-05
depth_cost_weight,0.0038,3.3e-05,114.462,0.000,0.004,0.004

0,1,2,3
Omnibus:,433846.441,Durbin-Watson:,2.033
Prob(Omnibus):,0.0,Jarque-Bera (JB):,662292.368
Skew:,1.161,Prob(JB):,0.0
Kurtosis:,2.969,Cond. No.,1400.0


In [15]:
# 1. cogsci_learning
cogsci_learning_grouped_data = grouped_data.loc[grouped_data['sim_experiment_setting'] == 'cogsci_learning']
res = smf.ols(formula=formula_present_bias, data=cogsci_learning_grouped_data).fit(
                    missing="drop"
                )
res.summary()

0,1,2,3
Dep. Variable:,early_ratio,R-squared:,0.035
Model:,OLS,Adj. R-squared:,0.035
Method:,Least Squares,F-statistic:,7131.0
Date:,"Tue, 17 Jan 2023",Prob (F-statistic):,0.0
Time:,17:44:51,Log-Likelihood:,-131950.0
No. Observations:,586916,AIC:,263900.0
Df Residuals:,586912,BIC:,263900.0
Df Model:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,0.2636,0.001,327.763,0.000,0.262,0.265
i_episode,-0.0013,1.39e-05,-94.327,0.000,-0.001,-0.001
i_episode:depth_cost_weight,-3.677e-06,1.04e-06,-3.537,0.000,-5.71e-06,-1.64e-06
depth_cost_weight,0.0035,5.9e-05,58.731,0.000,0.003,0.004

0,1,2,3
Omnibus:,126610.046,Durbin-Watson:,1.963
Prob(Omnibus):,0.0,Jarque-Bera (JB):,226035.634
Skew:,1.431,Prob(JB):,0.0
Kurtosis:,4.026,Cond. No.,1550.0


In [16]:
# 2.high_increasing
high_increasing_grouped_data = grouped_data.loc[grouped_data['sim_experiment_setting'] == 'high_increasing']
res = smf.ols(formula=formula_present_bias, data=high_increasing_grouped_data).fit(
                    missing="drop"
                )
res.summary()

0,1,2,3
Dep. Variable:,early_ratio,R-squared:,0.033
Model:,OLS,Adj. R-squared:,0.033
Method:,Least Squares,F-statistic:,6808.0
Date:,"Tue, 17 Jan 2023",Prob (F-statistic):,0.0
Time:,17:45:02,Log-Likelihood:,-112790.0
No. Observations:,590329,AIC:,225600.0
Df Residuals:,590325,BIC:,225600.0
Df Model:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,0.2521,0.001,326.415,0.000,0.251,0.254
i_episode,-0.0016,1.33e-05,-117.973,0.000,-0.002,-0.002
i_episode:depth_cost_weight,-1.036e-05,1.23e-06,-8.400,0.000,-1.28e-05,-7.94e-06
depth_cost_weight,0.0033,6.94e-05,47.046,0.000,0.003,0.003

0,1,2,3
Omnibus:,157276.919,Durbin-Watson:,1.949
Prob(Omnibus):,0.0,Jarque-Bera (JB):,325968.047
Skew:,1.619,Prob(JB):,0.0
Kurtosis:,4.664,Cond. No.,1220.0


In [17]:
# 3. large_variance
large_variance_grouped_data = grouped_data.loc[grouped_data['sim_experiment_setting'] == 'large_variance']
res = smf.ols(formula=formula_present_bias, data=large_variance_grouped_data).fit(
                    missing="drop"
                )
res.summary()

0,1,2,3
Dep. Variable:,early_ratio,R-squared:,0.086
Model:,OLS,Adj. R-squared:,0.086
Method:,Least Squares,F-statistic:,19400.0
Date:,"Tue, 17 Jan 2023",Prob (F-statistic):,0.0
Time:,17:45:06,Log-Likelihood:,-330290.0
No. Observations:,621819,AIC:,660600.0
Df Residuals:,621815,BIC:,660600.0
Df Model:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,0.4228,0.001,398.962,0.000,0.421,0.425
i_episode,0.0008,1.83e-05,44.290,0.000,0.001,0.001
i_episode:depth_cost_weight,6.8e-05,1.26e-06,54.053,0.000,6.55e-05,7.05e-05
depth_cost_weight,0.0051,7.22e-05,70.465,0.000,0.005,0.005

0,1,2,3
Omnibus:,3435073.744,Durbin-Watson:,1.974
Prob(Omnibus):,0.0,Jarque-Bera (JB):,62468.092
Skew:,0.118,Prob(JB):,0.0
Kurtosis:,1.465,Cond. No.,1690.0


In [18]:
# 4. mini_variance
mini_variance_grouped_data = grouped_data.loc[grouped_data['sim_experiment_setting'] == 'mini_variance']
res = smf.ols(formula=formula_present_bias, data=mini_variance_grouped_data).fit(
                    missing="drop"
                )
res.summary()

0,1,2,3
Dep. Variable:,early_ratio,R-squared:,0.03
Model:,OLS,Adj. R-squared:,0.03
Method:,Least Squares,F-statistic:,5946.0
Date:,"Tue, 17 Jan 2023",Prob (F-statistic):,0.0
Time:,17:45:11,Log-Likelihood:,-127460.0
No. Observations:,575587,AIC:,254900.0
Df Residuals:,575583,BIC:,255000.0
Df Model:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,0.2565,0.001,318.192,0.000,0.255,0.258
i_episode,-0.0014,1.39e-05,-101.170,0.000,-0.001,-0.001
i_episode:depth_cost_weight,-1.434e-06,1.22e-06,-1.177,0.239,-3.82e-06,9.54e-07
depth_cost_weight,0.0030,6.85e-05,44.297,0.000,0.003,0.003

0,1,2,3
Omnibus:,137586.193,Durbin-Watson:,2.02
Prob(Omnibus):,0.0,Jarque-Bera (JB):,261780.638
Skew:,1.523,Prob(JB):,0.0
Kurtosis:,4.28,Cond. No.,1280.0


In [19]:
# 5. zero_variance
zero_variance_grouped_data = grouped_data.loc[grouped_data['sim_experiment_setting'] == 'zero_variance']
res = smf.ols(formula=formula_present_bias, data=zero_variance_grouped_data).fit(
                    missing="drop"
                )
res.summary()

0,1,2,3
Dep. Variable:,early_ratio,R-squared:,0.037
Model:,OLS,Adj. R-squared:,0.037
Method:,Least Squares,F-statistic:,7328.0
Date:,"Tue, 17 Jan 2023",Prob (F-statistic):,0.0
Time:,17:45:14,Log-Likelihood:,-157900.0
No. Observations:,575157,AIC:,315800.0
Df Residuals:,575153,BIC:,315800.0
Df Model:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,0.2602,0.001,306.362,0.000,0.259,0.262
i_episode,-0.0013,1.47e-05,-91.486,0.000,-0.001,-0.001
i_episode:depth_cost_weight,1.01e-05,1.38e-06,7.308,0.000,7.39e-06,1.28e-05
depth_cost_weight,0.0041,7.94e-05,51.299,0.000,0.004,0.004

0,1,2,3
Omnibus:,132405.766,Durbin-Watson:,2.014
Prob(Omnibus):,0.0,Jarque-Bera (JB):,244711.491
Skew:,1.505,Prob(JB):,0.0
Kurtosis:,4.074,Cond. No.,1190.0


### late present bias

In [11]:
formula_late_present_bias = "late_ratio ~ i_episode + i_episode:depth_cost_weight + depth_cost_weight + 1"
res = smf.ols(formula=formula_late_present_bias, data=grouped_data).fit(
                    missing="drop"
                )
res.summary()

0,1,2,3
Dep. Variable:,late_ratio,R-squared:,0.063
Model:,OLS,Adj. R-squared:,0.063
Method:,Least Squares,F-statistic:,65840.0
Date:,"Wed, 18 Jan 2023",Prob (F-statistic):,0.0
Time:,15:44:59,Log-Likelihood:,-1345200.0
No. Observations:,2949808,AIC:,2690000.0
Df Residuals:,2949804,BIC:,2690000.0
Df Model:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,0.4004,0.000,888.850,0.000,0.400,0.401
i_episode,-0.0016,7.79e-06,-207.561,0.000,-0.002,-0.002
i_episode:depth_cost_weight,1.506e-05,6.33e-07,23.797,0.000,1.38e-05,1.63e-05
depth_cost_weight,0.0063,3.6e-05,176.226,0.000,0.006,0.006

0,1,2,3
Omnibus:,718650.024,Durbin-Watson:,1.983
Prob(Omnibus):,0.0,Jarque-Bera (JB):,370383.902
Skew:,0.728,Prob(JB):,0.0
Kurtosis:,2.056,Cond. No.,1400.0


In [12]:
# 1. cogsci_learning
cogsci_learning_grouped_data = grouped_data.loc[grouped_data['sim_experiment_setting'] == 'cogsci_learning']
res = smf.ols(formula=formula_late_present_bias, data=cogsci_learning_grouped_data).fit(
                    missing="drop"
                )
res.summary()

0,1,2,3
Dep. Variable:,late_ratio,R-squared:,0.065
Model:,OLS,Adj. R-squared:,0.065
Method:,Least Squares,F-statistic:,13530.0
Date:,"Wed, 18 Jan 2023",Prob (F-statistic):,0.0
Time:,15:45:13,Log-Likelihood:,-217240.0
No. Observations:,586916,AIC:,434500.0
Df Residuals:,586912,BIC:,434500.0
Df Model:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,0.3773,0.001,405.586,0.000,0.375,0.379
i_episode,-0.0021,1.61e-05,-128.239,0.000,-0.002,-0.002
i_episode:depth_cost_weight,-1.036e-05,1.2e-06,-8.623,0.000,-1.27e-05,-8.01e-06
depth_cost_weight,0.0058,6.82e-05,84.982,0.000,0.006,0.006

0,1,2,3
Omnibus:,69319.618,Durbin-Watson:,1.966
Prob(Omnibus):,0.0,Jarque-Bera (JB):,89564.895
Skew:,0.935,Prob(JB):,0.0
Kurtosis:,2.589,Cond. No.,1550.0


In [13]:
# 2.high_increasing
high_increasing_grouped_data = grouped_data.loc[grouped_data['sim_experiment_setting'] == 'high_increasing']
res = smf.ols(formula=formula_late_present_bias, data=high_increasing_grouped_data).fit(
                    missing="drop"
                )
res.summary()

0,1,2,3
Dep. Variable:,late_ratio,R-squared:,0.061
Model:,OLS,Adj. R-squared:,0.061
Method:,Least Squares,F-statistic:,12770.0
Date:,"Wed, 18 Jan 2023",Prob (F-statistic):,0.0
Time:,15:45:13,Log-Likelihood:,-208500.0
No. Observations:,590329,AIC:,417000.0
Df Residuals:,590325,BIC:,417100.0
Df Model:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,0.3601,0.001,396.344,0.000,0.358,0.362
i_episode,-0.0023,1.57e-05,-147.838,0.000,-0.002,-0.002
i_episode:depth_cost_weight,-1.742e-05,1.45e-06,-12.017,0.000,-2.03e-05,-1.46e-05
depth_cost_weight,0.0061,8.16e-05,74.303,0.000,0.006,0.006

0,1,2,3
Omnibus:,80101.164,Durbin-Watson:,1.931
Prob(Omnibus):,0.0,Jarque-Bera (JB):,118423.216
Skew:,1.097,Prob(JB):,0.0
Kurtosis:,2.948,Cond. No.,1220.0


In [14]:
# 3. large_variance
large_variance_grouped_data = grouped_data.loc[grouped_data['sim_experiment_setting'] == 'large_variance']
res = smf.ols(formula=formula_late_present_bias, data=large_variance_grouped_data).fit(
                    missing="drop"
                )
res.summary()

0,1,2,3
Dep. Variable:,late_ratio,R-squared:,0.117
Model:,OLS,Adj. R-squared:,0.117
Method:,Least Squares,F-statistic:,27540.0
Date:,"Wed, 18 Jan 2023",Prob (F-statistic):,0.0
Time:,15:45:14,Log-Likelihood:,-321590.0
No. Observations:,621819,AIC:,643200.0
Df Residuals:,621815,BIC:,643200.0
Df Model:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,0.5162,0.001,493.946,0.000,0.514,0.518
i_episode,0.0002,1.81e-05,9.613,0.000,0.000,0.000
i_episode:depth_cost_weight,6.705e-05,1.24e-06,54.040,0.000,6.46e-05,6.95e-05
depth_cost_weight,0.0068,7.12e-05,95.507,0.000,0.007,0.007

0,1,2,3
Omnibus:,3469012.348,Durbin-Watson:,1.969
Prob(Omnibus):,0.0,Jarque-Bera (JB):,62476.302
Skew:,-0.132,Prob(JB):,0.0
Kurtosis:,1.47,Cond. No.,1690.0


In [15]:
# 4. mini_variance
mini_variance_grouped_data = grouped_data.loc[grouped_data['sim_experiment_setting'] == 'mini_variance']
res = smf.ols(formula=formula_late_present_bias, data=mini_variance_grouped_data).fit(
                    missing="drop"
                )
res.summary()

0,1,2,3
Dep. Variable:,late_ratio,R-squared:,0.058
Model:,OLS,Adj. R-squared:,0.058
Method:,Least Squares,F-statistic:,11910.0
Date:,"Wed, 18 Jan 2023",Prob (F-statistic):,0.0
Time:,15:45:15,Log-Likelihood:,-220300.0
No. Observations:,575587,AIC:,440600.0
Df Residuals:,575583,BIC:,440600.0
Df Model:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,0.3710,0.001,391.714,0.000,0.369,0.373
i_episode,-0.0021,1.64e-05,-128.567,0.000,-0.002,-0.002
i_episode:depth_cost_weight,-5.298e-06,1.43e-06,-3.700,0.000,-8.1e-06,-2.49e-06
depth_cost_weight,0.0059,8.04e-05,72.758,0.000,0.006,0.006

0,1,2,3
Omnibus:,70777.706,Durbin-Watson:,2.037
Prob(Omnibus):,0.0,Jarque-Bera (JB):,95590.235
Skew:,0.982,Prob(JB):,0.0
Kurtosis:,2.646,Cond. No.,1280.0


In [16]:
# 5. zero_variance
zero_variance_grouped_data = grouped_data.loc[grouped_data['sim_experiment_setting'] == 'zero_variance']
res = smf.ols(formula=formula_late_present_bias, data=zero_variance_grouped_data).fit(
                    missing="drop"
                )
res.summary()

0,1,2,3
Dep. Variable:,late_ratio,R-squared:,0.077
Model:,OLS,Adj. R-squared:,0.077
Method:,Least Squares,F-statistic:,15960.0
Date:,"Wed, 18 Jan 2023",Prob (F-statistic):,0.0
Time:,15:45:15,Log-Likelihood:,-236300.0
No. Observations:,575157,AIC:,472600.0
Df Residuals:,575153,BIC:,472600.0
Df Model:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,0.3681,0.001,378.082,0.000,0.366,0.370
i_episode,-0.0019,1.68e-05,-111.634,0.000,-0.002,-0.002
i_episode:depth_cost_weight,6.866e-06,1.58e-06,4.333,0.000,3.76e-06,9.97e-06
depth_cost_weight,0.0082,9.1e-05,90.362,0.000,0.008,0.008

0,1,2,3
Omnibus:,71925.566,Durbin-Watson:,2.03
Prob(Omnibus):,0.0,Jarque-Bera (JB):,91330.82
Skew:,0.948,Prob(JB):,0.0
Kurtosis:,2.532,Cond. No.,1190.0
