In [36]:
import statsmodels.formula.api as smf
import pandas as pd
import numpy as np
from sklearn import preprocessing

In [37]:
# mapping experiment setting to variance and std on early node
mapping = {
            'cogsci_learning': [-8, -4, 4, 8],
            'mini_variance'  : [-2, -1, 1, 2],
            'zero_variance'  : [1, 1, 1, 1],
            'large_variance' : [-48, -24, 24, 48],
            'high_increasing': [-4, -2, 2, 4]
                          }

In [38]:
considered_experiment_settings = ["high_increasing", "large_variance", "cogsci_learning", "mini_variance", "zero_variance"]

In [39]:
# load data
data = pd.DataFrame()
for experiment_setting in considered_experiment_settings:
    data_in = pd.read_csv(f"data/processed/simulated/{experiment_setting}/MCL/linear_depth/search_space/1729_depth_only_baseline_null.csv")
    data_in['experiment_setting']= experiment_setting
    data = pd.concat([data, data_in])


In [40]:
data.head()

Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,w,taken_paths,costs,loss,ground_truth,trial_id,i_episode,actions,...,depth_cost_weight,pid,num_early,num_middle,num_late,num_clicks,unbounded_present_bias,unbounded_loss,state,experiment_setting
0,0,0,"[0, 0, 0, 0, 0]","[1, 2, 3]",[54.0],,"[0.0, -2.0, 8.0, 48.0, -24.0, 2.0, -4.0, -48.0...",3827219843471436276,0,0,...,2.5,0,False,False,False,False,0.0,0.0,"(0, Cat, Cat, Cat, Cat, Cat, Cat, Cat, Cat, Ca...",high_increasing
1,1,1,"[0.0, 0.0, 0.0, 15.5454392236338, 48.857094702...","[5, 6, 8]","[-1.0, -1.0, -1.0, 20.0]",,"[0.0, -2.0, -4.0, -24.0, -48.0, 4.0, -8.0, 24....",-4289689371375290671,1,9,...,2.5,0,True,False,False,True,8.0,-28.0,"(0, Cat, Cat, Cat, Cat, Cat, Cat, Cat, Cat, Ca...",high_increasing
2,2,1,"[0.0, 0.0, 0.0, 15.5454392236338, 48.857094702...","[5, 6, 8]","[-1.0, -1.0, -1.0, 20.0]",,"[0.0, -2.0, -4.0, -24.0, -48.0, 4.0, -8.0, 24....",-4289689371375290671,1,1,...,2.5,0,True,False,False,True,8.0,-28.0,"(0, Cat, Cat, Cat, Cat, Cat, Cat, Cat, Cat, -4...",high_increasing
3,3,1,"[0.0, 0.0, 0.0, 15.5454392236338, 48.857094702...","[5, 6, 8]","[-1.0, -1.0, -1.0, 20.0]",,"[0.0, -2.0, -4.0, -24.0, -48.0, 4.0, -8.0, 24....",-4289689371375290671,1,5,...,2.5,0,True,False,False,True,8.0,-28.0,"(0, -2.0, Cat, Cat, Cat, Cat, Cat, Cat, Cat, -...",high_increasing
4,4,1,"[0.0, 0.0, 0.0, 15.5454392236338, 48.857094702...","[5, 6, 8]","[-1.0, -1.0, -1.0, 20.0]",,"[0.0, -2.0, -4.0, -24.0, -48.0, 4.0, -8.0, 24....",-4289689371375290671,1,0,...,2.5,0,False,False,False,False,8.0,-28.0,"(0, -2.0, Cat, Cat, Cat, 4.0, Cat, Cat, Cat, -...",high_increasing


In [41]:
# create a new column with variance and std of the experiment settings
data["std_early_nodes"] = data["experiment_setting"].apply(lambda experiment_setting: np.std(mapping[experiment_setting]))
data["variance_early_nodes"] = data["experiment_setting"].apply(lambda experiment_setting: np.var(mapping[experiment_setting]))

In [42]:
## normalization
# variance
data["norm_variance_early_nodes"] = (
                                    (data["variance_early_nodes"]-data["variance_early_nodes"].min())/
                                    (data["variance_early_nodes"].max()-data["variance_early_nodes"].min()))
# std
data["norm_std_early_nodes"]      = (
                                    (data["std_early_nodes"]-data["std_early_nodes"].min())/
                                    (data["std_early_nodes"].max()-data["std_early_nodes"].min()))
# data["norm_std_early_nodes"] = preprocessing.Normalizer(data["std_early_nodes"])
# data["norm_variance_early_nodes"] = preprocessing.Normalizer(data["variance_early_nodes"])

In [43]:
# group data by factors
grouped_data = data.groupby(["pid", "i_episode", "sim_cost_parameter_values", "sim_experiment_setting"], as_index=False).sum()

In [44]:
formula = "num_early ~ i_episode + i_episode:depth_cost_weight + depth_cost_weight + 1"
res = smf.ols(formula=formula, data=grouped_data).fit(
                    missing="drop"
                )
res.summary()

0,1,2,3
Dep. Variable:,num_early,R-squared:,0.005
Model:,OLS,Adj. R-squared:,0.005
Method:,Least Squares,F-statistic:,5183.0
Date:,"Wed, 04 Jan 2023",Prob (F-statistic):,0.0
Time:,12:53:03,Log-Likelihood:,-5242100.0
No. Observations:,3400000,AIC:,10480000.0
Df Residuals:,3399996,BIC:,10480000.0
Df Model:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,0.7834,0.001,643.347,0.000,0.781,0.786
i_episode,-0.0015,2.13e-05,-68.849,0.000,-0.002,-0.001
i_episode:depth_cost_weight,-2.859e-05,1.85e-06,-15.440,0.000,-3.22e-05,-2.5e-05
depth_cost_weight,-0.0043,0.000,-40.353,0.000,-0.004,-0.004

0,1,2,3
Omnibus:,559576.601,Durbin-Watson:,1.946
Prob(Omnibus):,0.0,Jarque-Bera (JB):,895813.867
Skew:,1.257,Prob(JB):,0.0
Kurtosis:,2.939,Cond. No.,1290.0


### 1. early_node_variance ~ experiment_setting


In [45]:
formula = "num_early ~ i_episode + i_episode:depth_cost_weight + depth_cost_weight + sim_experiment_setting + sim_experiment_setting:i_episode + sim_experiment_setting:i_episode:depth_cost_weight + sim_experiment_setting:depth_cost_weight + 1"
res = smf.ols(formula=formula, data=grouped_data).fit(
                    missing="drop"
                )
res.summary()

0,1,2,3
Dep. Variable:,num_early,R-squared:,0.056
Model:,OLS,Adj. R-squared:,0.056
Method:,Least Squares,F-statistic:,10630.0
Date:,"Wed, 04 Jan 2023",Prob (F-statistic):,0.0
Time:,12:53:28,Log-Likelihood:,-5151800.0
No. Observations:,3400000,AIC:,10300000.0
Df Residuals:,3399980,BIC:,10300000.0
Df Model:,19,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,0.7766,0.003,292.018,0.000,0.771,0.782
sim_experiment_setting[T.high_increasing],-0.0534,0.004,-14.228,0.000,-0.061,-0.046
sim_experiment_setting[T.large_variance],0.2851,0.004,75.876,0.000,0.278,0.292
sim_experiment_setting[T.mini_variance],-0.0845,0.004,-22.515,0.000,-0.092,-0.077
sim_experiment_setting[T.zero_variance],-0.1292,0.004,-34.385,0.000,-0.137,-0.122
i_episode,-0.0024,4.65e-05,-51.373,0.000,-0.002,-0.002
sim_experiment_setting[T.high_increasing]:i_episode,-0.0005,6.56e-05,-8.025,0.000,-0.001,-0.000
sim_experiment_setting[T.large_variance]:i_episode,0.0050,6.57e-05,76.660,0.000,0.005,0.005
sim_experiment_setting[T.mini_variance]:i_episode,4.513e-06,6.56e-05,0.069,0.945,-0.000,0.000

0,1,2,3
Omnibus:,562601.603,Durbin-Watson:,1.916
Prob(Omnibus):,0.0,Jarque-Bera (JB):,898401.102
Skew:,1.255,Prob(JB):,0.0
Kurtosis:,3.199,Cond. No.,7580.0




### 2. early_node_variance ~ norm_variance_early_nodes

- All environment settings

In [46]:
formula = "num_early ~ i_episode + i_episode:depth_cost_weight + depth_cost_weight + norm_variance_early_nodes + norm_variance_early_nodes:i_episode + norm_variance_early_nodes:i_episode:depth_cost_weight + norm_variance_early_nodes:depth_cost_weight + 1"
res = smf.ols(formula=formula, data=grouped_data).fit(
                    missing="drop"
                )
res.summary()

0,1,2,3
Dep. Variable:,num_early,R-squared:,0.119
Model:,OLS,Adj. R-squared:,0.119
Method:,Least Squares,F-statistic:,65650.0
Date:,"Wed, 04 Jan 2023",Prob (F-statistic):,0.0
Time:,12:55:06,Log-Likelihood:,-5034400.0
No. Observations:,3400000,AIC:,10070000.0
Df Residuals:,3399992,BIC:,10070000.0
Df Model:,7,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,0.6485,0.001,525.696,0.000,0.646,0.651
i_episode,-0.0019,2.16e-05,-88.906,0.000,-0.002,-0.002
i_episode:depth_cost_weight,-4.081e-05,2.03e-06,-20.108,0.000,-4.48e-05,-3.68e-05
depth_cost_weight,-0.0015,0.000,-12.816,0.000,-0.002,-0.001
norm_variance_early_nodes,0.1581,0.001,294.696,0.000,0.157,0.159
norm_variance_early_nodes:i_episode,0.0004,9.37e-06,40.127,0.000,0.000,0.000
norm_variance_early_nodes:i_episode:depth_cost_weight,1.916e-05,4.3e-07,44.563,0.000,1.83e-05,2e-05
norm_variance_early_nodes:depth_cost_weight,0.0004,2.35e-05,17.179,0.000,0.000,0.000

0,1,2,3
Omnibus:,619902.746,Durbin-Watson:,1.929
Prob(Omnibus):,0.0,Jarque-Bera (JB):,1018112.631
Skew:,1.308,Prob(JB):,0.0
Kurtosis:,3.583,Cond. No.,6770.0


- Individual experiment setting

In [47]:
# 1. cogsci_learning
current_grouped_data = grouped_data.loc[grouped_data['sim_experiment_setting'] == 'cogsci_learning']
res = smf.ols(formula=formula, data=current_grouped_data).fit(
                    missing="drop"
                )
res.summary()

0,1,2,3
Dep. Variable:,num_early,R-squared:,0.53
Model:,OLS,Adj. R-squared:,0.53
Method:,Least Squares,F-statistic:,109400.0
Date:,"Wed, 04 Jan 2023",Prob (F-statistic):,0.0
Time:,12:55:18,Log-Likelihood:,-794440.0
No. Observations:,680000,AIC:,1589000.0
Df Residuals:,679992,BIC:,1589000.0
Df Model:,7,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,-0.3892,0.003,-120.058,0.000,-0.396,-0.383
i_episode,-0.0039,5.89e-05,-67.047,0.000,-0.004,-0.004
i_episode:depth_cost_weight,0.0004,7.36e-06,60.374,0.000,0.000,0.000
depth_cost_weight,0.0447,0.000,110.912,0.000,0.044,0.045
norm_variance_early_nodes,10.1744,0.023,434.510,0.000,10.129,10.220
norm_variance_early_nodes:i_episode,0.0002,0.000,0.486,0.627,-0.001,0.001
norm_variance_early_nodes:i_episode:depth_cost_weight,-0.0020,3.16e-05,-64.058,0.000,-0.002,-0.002
norm_variance_early_nodes:depth_cost_weight,-0.1196,0.002,-71.775,0.000,-0.123,-0.116

0,1,2,3
Omnibus:,18635.287,Durbin-Watson:,2.02
Prob(Omnibus):,0.0,Jarque-Bera (JB):,20302.514
Skew:,0.407,Prob(JB):,0.0
Kurtosis:,3.229,Cond. No.,18200.0


In [48]:
# 2. high_increasing
current_grouped_data = grouped_data.loc[grouped_data['sim_experiment_setting'] == 'high_increasing']
res = smf.ols(formula=formula, data=current_grouped_data).fit(
                    missing="drop"
                )
res.summary()

0,1,2,3
Dep. Variable:,num_early,R-squared:,0.517
Model:,OLS,Adj. R-squared:,0.517
Method:,Least Squares,F-statistic:,103900.0
Date:,"Wed, 04 Jan 2023",Prob (F-statistic):,0.0
Time:,12:55:25,Log-Likelihood:,-766020.0
No. Observations:,680000,AIC:,1532000.0
Df Residuals:,679992,BIC:,1532000.0
Df Model:,7,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,-0.3643,0.003,-120.264,0.000,-0.370,-0.358
i_episode,-0.0033,5.48e-05,-60.347,0.000,-0.003,-0.003
i_episode:depth_cost_weight,0.0003,7.35e-06,40.544,0.000,0.000,0.000
depth_cost_weight,0.0305,0.000,74.641,0.000,0.030,0.031
norm_variance_early_nodes,40.6531,0.092,440.920,0.000,40.472,40.834
norm_variance_early_nodes:i_episode,-0.0241,0.002,-14.320,0.000,-0.027,-0.021
norm_variance_early_nodes:i_episode:depth_cost_weight,-0.0062,0.000,-44.619,0.000,-0.006,-0.006
norm_variance_early_nodes:depth_cost_weight,-0.2833,0.008,-37.605,0.000,-0.298,-0.269

0,1,2,3
Omnibus:,27193.069,Durbin-Watson:,2.02
Prob(Omnibus):,0.0,Jarque-Bera (JB):,32199.455
Skew:,0.466,Prob(JB):,0.0
Kurtosis:,3.517,Cond. No.,58100.0


In [49]:
# 3. large_variance
current_grouped_data = grouped_data.loc[grouped_data['sim_experiment_setting'] == 'large_variance']
res = smf.ols(formula=formula, data=current_grouped_data).fit(
                    missing="drop"
                )
res.summary()

0,1,2,3
Dep. Variable:,num_early,R-squared:,0.388
Model:,OLS,Adj. R-squared:,0.388
Method:,Least Squares,F-statistic:,61490.0
Date:,"Wed, 04 Jan 2023",Prob (F-statistic):,0.0
Time:,12:55:30,Log-Likelihood:,-933620.0
No. Observations:,680000,AIC:,1867000.0
Df Residuals:,679992,BIC:,1867000.0
Df Model:,7,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,-0.0341,0.004,-8.548,0.000,-0.042,-0.026
i_episode,0.0009,7.33e-05,12.434,0.000,0.001,0.001
i_episode:depth_cost_weight,0.0005,8.2e-06,59.450,0.000,0.000,0.001
depth_cost_weight,0.0399,0.000,87.841,0.000,0.039,0.041
norm_variance_early_nodes,0.2572,0.001,339.751,0.000,0.256,0.259
norm_variance_early_nodes:i_episode,-9.41e-05,1.37e-05,-6.852,0.000,-0.000,-6.72e-05
norm_variance_early_nodes:i_episode:depth_cost_weight,-4.718e-05,8.93e-07,-52.810,0.000,-4.89e-05,-4.54e-05
norm_variance_early_nodes:depth_cost_weight,-0.0030,4.77e-05,-62.866,0.000,-0.003,-0.003

0,1,2,3
Omnibus:,19986.546,Durbin-Watson:,1.999
Prob(Omnibus):,0.0,Jarque-Bera (JB):,17224.504
Skew:,0.326,Prob(JB):,0.0
Kurtosis:,2.574,Cond. No.,24300.0


In [50]:
# 4. mini_variance
current_grouped_data = grouped_data.loc[grouped_data['sim_experiment_setting'] == 'mini_variance']
res = smf.ols(formula=formula, data=current_grouped_data).fit(
                    missing="drop"
                )
res.summary()

0,1,2,3
Dep. Variable:,num_early,R-squared:,0.491
Model:,OLS,Adj. R-squared:,0.491
Method:,Least Squares,F-statistic:,93660.0
Date:,"Wed, 04 Jan 2023",Prob (F-statistic):,0.0
Time:,12:55:57,Log-Likelihood:,-774420.0
No. Observations:,680000,AIC:,1549000.0
Df Residuals:,679992,BIC:,1549000.0
Df Model:,7,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,-0.3504,0.003,-114.473,0.000,-0.356,-0.344
i_episode,-0.0029,5.55e-05,-52.137,0.000,-0.003,-0.003
i_episode:depth_cost_weight,0.0003,7.3e-06,44.979,0.000,0.000,0.000
depth_cost_weight,0.0300,0.000,74.558,0.000,0.029,0.031
norm_variance_early_nodes,162.6127,0.380,428.399,0.000,161.869,163.357
norm_variance_early_nodes:i_episode,-0.1477,0.007,-21.535,0.000,-0.161,-0.134
norm_variance_early_nodes:i_episode:depth_cost_weight,-0.0232,0.001,-44.218,0.000,-0.024,-0.022
norm_variance_early_nodes:depth_cost_weight,-1.1500,0.028,-41.074,0.000,-1.205,-1.095

0,1,2,3
Omnibus:,22529.127,Durbin-Watson:,2.076
Prob(Omnibus):,0.0,Jarque-Bera (JB):,25382.543
Skew:,0.436,Prob(JB):,0.0
Kurtosis:,3.367,Cond. No.,245000.0


In [51]:
# 5. zero_variance
current_grouped_data = grouped_data.loc[grouped_data['sim_experiment_setting'] == 'zero_variance']
res = smf.ols(formula=formula, data=current_grouped_data).fit(
                    missing="drop"
                )
res.summary()

  return np.sqrt(eigvals[0]/eigvals[-1])
  return self.params / self.bse
  cond2 = cond0 & (x <= _a)


0,1,2,3
Dep. Variable:,num_early,R-squared:,0.007
Model:,OLS,Adj. R-squared:,0.007
Method:,Least Squares,F-statistic:,1623.0
Date:,"Wed, 04 Jan 2023",Prob (F-statistic):,0.0
Time:,12:56:04,Log-Likelihood:,-986470.0
No. Observations:,680000,AIC:,1973000.0
Df Residuals:,679996,BIC:,1973000.0
Df Model:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,0.6474,0.002,260.199,0.000,0.643,0.652
i_episode,-0.0021,4.34e-05,-47.357,0.000,-0.002,-0.002
i_episode:depth_cost_weight,-0.0001,4.41e-06,-23.092,0.000,-0.000,-9.31e-05
depth_cost_weight,0.0107,0.000,42.088,0.000,0.010,0.011
norm_variance_early_nodes,0,0,,,0,0
norm_variance_early_nodes:i_episode,0,0,,,0,0
norm_variance_early_nodes:i_episode:depth_cost_weight,0,0,,,0,0
norm_variance_early_nodes:depth_cost_weight,0,0,,,0,0

0,1,2,3
Omnibus:,187883.566,Durbin-Watson:,1.94
Prob(Omnibus):,0.0,Jarque-Bera (JB):,388989.177
Skew:,1.719,Prob(JB):,0.0
Kurtosis:,4.383,Cond. No.,inf


### 3. early_node_variance ~ norm_std_early_nodes

- All experiment settings

In [52]:
formula = "num_early ~ i_episode + i_episode:depth_cost_weight + depth_cost_weight + norm_std_early_nodes + norm_std_early_nodes:i_episode + norm_std_early_nodes:i_episode:depth_cost_weight + norm_std_early_nodes:depth_cost_weight + 1"
res = smf.ols(formula=formula, data=grouped_data).fit(
                    missing="drop"
                )
res.summary()

0,1,2,3
Dep. Variable:,num_early,R-squared:,0.152
Model:,OLS,Adj. R-squared:,0.152
Method:,Least Squares,F-statistic:,86840.0
Date:,"Wed, 04 Jan 2023",Prob (F-statistic):,0.0
Time:,12:56:22,Log-Likelihood:,-4970300.0
No. Observations:,3400000,AIC:,9941000.0
Df Residuals:,3399992,BIC:,9941000.0
Df Model:,7,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,0.5810,0.001,461.654,0.000,0.579,0.583
i_episode,-0.0019,2.21e-05,-87.072,0.000,-0.002,-0.002
i_episode:depth_cost_weight,-4.242e-05,2.11e-06,-20.117,0.000,-4.66e-05,-3.83e-05
depth_cost_weight,0.0004,0.000,3.634,0.000,0.000,0.001
norm_std_early_nodes,0.1918,0.001,356.089,0.000,0.191,0.193
norm_std_early_nodes:i_episode,0.0003,9.42e-06,28.334,0.000,0.000,0.000
norm_std_early_nodes:i_episode:depth_cost_weight,1.892e-05,4.4e-07,42.959,0.000,1.81e-05,1.98e-05
norm_std_early_nodes:depth_cost_weight,0.0003,2.4e-05,12.879,0.000,0.000,0.000

0,1,2,3
Omnibus:,605881.398,Durbin-Watson:,1.938
Prob(Omnibus):,0.0,Jarque-Bera (JB):,983329.869
Skew:,1.277,Prob(JB):,0.0
Kurtosis:,3.647,Cond. No.,7170.0


- Individual experiment setting

In [53]:
# 1. cogsci_learning
current_grouped_data = grouped_data.loc[grouped_data['sim_experiment_setting'] == 'cogsci_learning']
res = smf.ols(formula=formula, data=current_grouped_data).fit(
                    missing="drop"
                )
res.summary()

0,1,2,3
Dep. Variable:,num_early,R-squared:,0.53
Model:,OLS,Adj. R-squared:,0.53
Method:,Least Squares,F-statistic:,109400.0
Date:,"Wed, 04 Jan 2023",Prob (F-statistic):,0.0
Time:,12:56:42,Log-Likelihood:,-794440.0
No. Observations:,680000,AIC:,1589000.0
Df Residuals:,679992,BIC:,1589000.0
Df Model:,7,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,-0.3892,0.003,-120.058,0.000,-0.396,-0.383
i_episode,-0.0039,5.89e-05,-67.047,0.000,-0.004,-0.004
i_episode:depth_cost_weight,0.0004,7.36e-06,60.374,0.000,0.000,0.000
depth_cost_weight,0.0447,0.000,110.912,0.000,0.044,0.045
norm_std_early_nodes,1.6957,0.004,434.510,0.000,1.688,1.703
norm_std_early_nodes:i_episode,3.442e-05,7.09e-05,0.486,0.627,-0.000,0.000
norm_std_early_nodes:i_episode:depth_cost_weight,-0.0003,5.26e-06,-64.058,0.000,-0.000,-0.000
norm_std_early_nodes:depth_cost_weight,-0.0199,0.000,-71.775,0.000,-0.020,-0.019

0,1,2,3
Omnibus:,18635.287,Durbin-Watson:,2.02
Prob(Omnibus):,0.0,Jarque-Bera (JB):,20302.514
Skew:,0.407,Prob(JB):,0.0
Kurtosis:,3.229,Cond. No.,6050.0


In [54]:
# 2. high_increasing
current_grouped_data = grouped_data.loc[grouped_data['sim_experiment_setting'] == 'high_increasing']
res = smf.ols(formula=formula, data=current_grouped_data).fit(
                    missing="drop"
                )
res.summary()

0,1,2,3
Dep. Variable:,num_early,R-squared:,0.517
Model:,OLS,Adj. R-squared:,0.517
Method:,Least Squares,F-statistic:,103900.0
Date:,"Wed, 04 Jan 2023",Prob (F-statistic):,0.0
Time:,12:57:10,Log-Likelihood:,-766020.0
No. Observations:,680000,AIC:,1532000.0
Df Residuals:,679992,BIC:,1532000.0
Df Model:,7,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,-0.3643,0.003,-120.264,0.000,-0.370,-0.358
i_episode,-0.0033,5.48e-05,-60.347,0.000,-0.003,-0.003
i_episode:depth_cost_weight,0.0003,7.35e-06,40.544,0.000,0.000,0.000
depth_cost_weight,0.0305,0.000,74.641,0.000,0.030,0.031
norm_std_early_nodes,3.3878,0.008,440.920,0.000,3.373,3.403
norm_std_early_nodes:i_episode,-0.0020,0.000,-14.320,0.000,-0.002,-0.002
norm_std_early_nodes:i_episode:depth_cost_weight,-0.0005,1.15e-05,-44.619,0.000,-0.001,-0.000
norm_std_early_nodes:depth_cost_weight,-0.0236,0.001,-37.605,0.000,-0.025,-0.022

0,1,2,3
Omnibus:,27193.069,Durbin-Watson:,2.02
Prob(Omnibus):,0.0,Jarque-Bera (JB):,32199.455
Skew:,0.466,Prob(JB):,0.0
Kurtosis:,3.517,Cond. No.,5900.0


In [55]:
# 3. large_variance
current_grouped_data = grouped_data.loc[grouped_data['sim_experiment_setting'] == 'large_variance']
res = smf.ols(formula=formula, data=current_grouped_data).fit(
                    missing="drop"
                )
res.summary()

0,1,2,3
Dep. Variable:,num_early,R-squared:,0.388
Model:,OLS,Adj. R-squared:,0.388
Method:,Least Squares,F-statistic:,61490.0
Date:,"Wed, 04 Jan 2023",Prob (F-statistic):,0.0
Time:,12:57:11,Log-Likelihood:,-933620.0
No. Observations:,680000,AIC:,1867000.0
Df Residuals:,679992,BIC:,1867000.0
Df Model:,7,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,-0.0341,0.004,-8.548,0.000,-0.042,-0.026
i_episode,0.0009,7.33e-05,12.434,0.000,0.001,0.001
i_episode:depth_cost_weight,0.0005,8.2e-06,59.450,0.000,0.000,0.001
depth_cost_weight,0.0399,0.000,87.841,0.000,0.039,0.041
norm_std_early_nodes,0.2572,0.001,339.751,0.000,0.256,0.259
norm_std_early_nodes:i_episode,-9.41e-05,1.37e-05,-6.852,0.000,-0.000,-6.72e-05
norm_std_early_nodes:i_episode:depth_cost_weight,-4.718e-05,8.93e-07,-52.810,0.000,-4.89e-05,-4.54e-05
norm_std_early_nodes:depth_cost_weight,-0.0030,4.77e-05,-62.866,0.000,-0.003,-0.003

0,1,2,3
Omnibus:,19986.546,Durbin-Watson:,1.999
Prob(Omnibus):,0.0,Jarque-Bera (JB):,17224.504
Skew:,0.326,Prob(JB):,0.0
Kurtosis:,2.574,Cond. No.,24300.0


In [56]:
# 4. mini_variance
current_grouped_data = grouped_data.loc[grouped_data['sim_experiment_setting'] == 'mini_variance']
res = smf.ols(formula=formula, data=current_grouped_data).fit(
                    missing="drop"
                )
res.summary()

0,1,2,3
Dep. Variable:,num_early,R-squared:,0.491
Model:,OLS,Adj. R-squared:,0.491
Method:,Least Squares,F-statistic:,93660.0
Date:,"Wed, 04 Jan 2023",Prob (F-statistic):,0.0
Time:,12:57:12,Log-Likelihood:,-774420.0
No. Observations:,680000,AIC:,1549000.0
Df Residuals:,679992,BIC:,1549000.0
Df Model:,7,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,-0.3504,0.003,-114.473,0.000,-0.356,-0.344
i_episode,-0.0029,5.55e-05,-52.137,0.000,-0.003,-0.003
i_episode:depth_cost_weight,0.0003,7.3e-06,44.979,0.000,0.000,0.000
depth_cost_weight,0.0300,0.000,74.558,0.000,0.029,0.031
norm_std_early_nodes,6.7755,0.016,428.399,0.000,6.745,6.807
norm_std_early_nodes:i_episode,-0.0062,0.000,-21.535,0.000,-0.007,-0.006
norm_std_early_nodes:i_episode:depth_cost_weight,-0.0010,2.19e-05,-44.218,0.000,-0.001,-0.001
norm_std_early_nodes:depth_cost_weight,-0.0479,0.001,-41.074,0.000,-0.050,-0.046

0,1,2,3
Omnibus:,22529.127,Durbin-Watson:,2.076
Prob(Omnibus):,0.0,Jarque-Bera (JB):,25382.543
Skew:,0.436,Prob(JB):,0.0
Kurtosis:,3.367,Cond. No.,10800.0


In [57]:
# 5. zero_variance
current_grouped_data = grouped_data.loc[grouped_data['sim_experiment_setting'] == 'zero_variance']
res = smf.ols(formula=formula, data=current_grouped_data).fit(
                    missing="drop"
                )
res.summary()

0,1,2,3
Dep. Variable:,num_early,R-squared:,0.007
Model:,OLS,Adj. R-squared:,0.007
Method:,Least Squares,F-statistic:,1623.0
Date:,"Wed, 04 Jan 2023",Prob (F-statistic):,0.0
Time:,12:57:13,Log-Likelihood:,-986470.0
No. Observations:,680000,AIC:,1973000.0
Df Residuals:,679996,BIC:,1973000.0
Df Model:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,0.6474,0.002,260.199,0.000,0.643,0.652
i_episode,-0.0021,4.34e-05,-47.357,0.000,-0.002,-0.002
i_episode:depth_cost_weight,-0.0001,4.41e-06,-23.092,0.000,-0.000,-9.31e-05
depth_cost_weight,0.0107,0.000,42.088,0.000,0.010,0.011
norm_std_early_nodes,0,0,,,0,0
norm_std_early_nodes:i_episode,0,0,,,0,0
norm_std_early_nodes:i_episode:depth_cost_weight,0,0,,,0,0
norm_std_early_nodes:depth_cost_weight,0,0,,,0,0

0,1,2,3
Omnibus:,187883.566,Durbin-Watson:,1.94
Prob(Omnibus):,0.0,Jarque-Bera (JB):,388989.177
Skew:,1.719,Prob(JB):,0.0
Kurtosis:,4.383,Cond. No.,inf
