# Imports

In [None]:
# basics
import pandas as pd
import numpy as np
from scipy.stats import bootstrap


# utils
import os

# Reading datasets

In [None]:
models = ['beta4','beta3']
datasets = [
    'generate_data_mc30_m20_i100_e50000_t1000_lr1.0',
    'generate_data_mc30_m50_i300_e50000_t1000_lr1.0',
    'generate_data_mc30_m100_i100_e50000_t1000_lr1.0'#'generate_data_mc100_m20_i100_e10000_t1000_lr1.0',
    #'generate_data_mc100_m50_i300_e10000_t1000_lr1.0',
    #'generate_data_mc100_m100_i100_e10000_t1000_lr1.0'
]
URLs = [os.path.join(i,j+'.csv') for i in models for j in datasets]

In [None]:
URLs

In [None]:
data = {model: 
        {dataset: pd.read_csv(os.path.join(model, dataset+'.csv'),index_col=0) for dataset in datasets}  for model in models}

In [None]:
data['beta4']['generate_data_mc30_m20_i100_e50000_t1000_lr1.0'].head()

# Processing

In [None]:
datasets_shape = [
    'N = 100, M = 20',
    'N = 300, M = 50',
    'N = 100, M = 100'
]

parameters = [
    "theta_i",
    "delta_j",
    "a_j"
]

models_name = [
    "beta^{3}-IRT",
    #"beta^{3}-IRT^{*}",
    "beta^{4}-IRT",
]

multi_index = []
for shape in datasets_shape:
    for param in parameters:
        for name in models_name:
            multi_index.append([shape,param,name])

In [None]:
table = pd.DataFrame(multi_index, columns=['Dataset', 'Parameter', 'Model']).groupby(['Dataset','Parameter','Model']).count()

In [None]:
table['RSE, 95% CI'] = [np.nan]*table.shape[0]
table['rho, 95% CI'] = [np.nan]*table.shape[0]

In [None]:
table

# Bootstrap

In [None]:
_datasets = ['generate_data_mc30_m100_i100_e50000_t1000_lr1.0',
             'generate_data_mc30_m20_i100_e50000_t1000_lr1.0',
             'generate_data_mc30_m50_i300_e50000_t1000_lr1.0']
_parameters = ['_aj','_delj','_thi']#*len(_datasets)
_models = ['beta3','beta4']#*len(_parameters)

In [None]:
tmp = (data['beta4']['generate_data_mc30_m100_i100_e50000_t1000_lr1.0'].RSE_thi.T.values,)
#calculate 95% bootstrapped confidence interval for median
bootstrap_ci = bootstrap(tmp, 
                         np.mean, confidence_level=0.95,
                         random_state=1, 
                         method='percentile')
bootstrap_ci.confidence_interval.low,bootstrap_ci.confidence_interval.high

In [None]:
RSE = []
RHO = []
for dataset in _datasets:
    for param in _parameters:
        for model in _models:
            #print('running:')
            #print(dataset +' '+param+' '+model)
            tmp_rse = (data[model][dataset]['RSE'+param].values,)
            tmp_rho = (data[model][dataset]["corr"+param+"_to_pred"+param].values,)

            rse_bootstrap_ci = bootstrap(tmp_rse, 
                                         np.mean, confidence_level=0.95,
                                         random_state=1, 
                                         method='percentile')
            rho_bootstrap_ci = bootstrap(tmp_rho, 
                                         np.mean, confidence_level=0.95,
                                         random_state=1, 
                                         method='percentile')
            
            values_rse = [round(rse_bootstrap_ci.confidence_interval.low,4),
                          round(rse_bootstrap_ci.confidence_interval.high,4)]
            values_rho = [round(rho_bootstrap_ci.confidence_interval.low,4),
                          round(rho_bootstrap_ci.confidence_interval.high,4)]
            
            RSE.append(values_rse)
            RHO.append(values_rho)

In [None]:
table['RSE, 95% CI'] = RSE
table['rho, 95% CI'] = RHO

In [None]:
table

In [None]:
#table
print(table.to_latex(multirow=True))

# Change sign

In [None]:
datasets_shape = [
    'N = 100, M = 20',
    'N = 300, M = 50',
    'N = 100, M = 100'
]

models_name = [
    "beta^{3}-IRT",
    "beta^{4}-IRT",
]

multi_index = []
for shape in datasets_shape:
    for name in models_name:
        multi_index.append([shape,name])

In [None]:
table = pd.DataFrame(multi_index, columns=['Dataset', 'Model']).groupby(['Dataset','Model']).count()
table

In [None]:
table['Inverted sign (%)'] = [np.nan]*table.shape[0]
table['Time to fit (s)'] = [np.nan]*table.shape[0]

In [None]:
table

In [None]:
SIGN = []
TIME = []
for dataset in _datasets:
    for aj_sign_changed, time_stamp in zip(['aj_sign_changed'],['time_stamp']):
        for model in _models:
            print('running:')
            print(dataset +' '+aj_sign_changed+' '+ time_stamp+' '+model)
            tmp_aj_sign_changed = (data[model][dataset][aj_sign_changed].values,)
            tmp_time_stamp = (data[model][dataset][time_stamp].values,)

            sign_bootstrap_ci = bootstrap(tmp_aj_sign_changed, 
                                         np.mean, confidence_level=0.95,
                                         random_state=1, 
                                         method='percentile')
            time_bootstrap_ci = bootstrap(tmp_time_stamp, 
                                         np.mean, confidence_level=0.95,
                                         random_state=1, 
                                         method='percentile')
            
            values_sign = [round(sign_bootstrap_ci.confidence_interval.low*100,4),
                          round(sign_bootstrap_ci.confidence_interval.high*100,4)]
            values_time = [round(time_bootstrap_ci.confidence_interval.low,4),
                          round(time_bootstrap_ci.confidence_interval.high,4)]
            
            SIGN.append(values_sign)
            TIME.append(values_time)

In [None]:
table['Inverted sign (%)'] = SIGN
table['Time to fit (s)'] = TIME

In [None]:
table

In [None]:
print(table.to_latex(multirow=True))