In [10]:
source('run_2_stage_util.R')
require(tidyverse)
require(rstan)
require(ggplot2)
require(mvtnorm)

In [14]:
sim_params = readRDS('simulation_params.RDS')

In [15]:
names(sim_params)

In [17]:
died = sim_params$died
sigma_0 = sim_params$sigma_0
mu_0 = sim_params$mu_0
sigma_1 = sim_params$sigma_1
mu_1 = sim_params$mu_1
mu_missing = sim_params$mu_missing
sigma_2_missing = sim_params$sigma_2_missing
metabolites = sim_params$metabolites
missing_rate = sim_params$missing_rate
missing_rate_numeric = sim_params$missing_rate_numeric

### LOAD THE MODEL
* This may need to compile and may take some time

In [6]:
rstan_options(auto_write = TRUE)
stage_1_impute_code_file = 'stage_1_impute.stan'
stage_1_impute_model = stan_model(file = stage_1_impute_code_file, verbose = FALSE)

stage_2_regress_code_file = 'stage_2_regress.stan'
stage_2_regress_model = stan_model(file = stage_2_regress_code_file, verbose = FALSE)


DIAGNOSTIC(S) FROM PARSER:
Info:
Left-hand side of sampling statement (~) may contain a non-linear transform of a parameter or local variable.
If it does, you need to include a target += statement with the log absolute determinant of the Jacobian of the transform.
Left-hand-side of sampling statement:
    x_impute ~ normal(...)



### SIMULATE DATA

In [18]:
missing_dist_params = get_alpha_beta_for_target_rate_2(missing_rate_numeric,TARGET_MISSING_RATE=0.2)

df_sim = make_df(n_0=100,mu_0,sigma_0,n_1=100,mu_1,sigma_1,frac_sig=0.5,censor=TRUE,max_missing=0.6,alpha=missing_dist_params$alpha,beta=missing_dist_params$beta)



#### Uncensored Data

In [20]:
df_sim$df_sample %>% head()

Unnamed: 0_level_0,hydroxybutyrate,oxoisocaproate,X3.hydoxybutyrate,alanine,betaine,carnitine,choline,citrate,creatine,creatinine,⋯,methionine,oacetylcarnitine,ornithine,phenylalanine,proline,propyleneglycol,pyruvate,tyrosine,valine,died_90_day
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1,-0.25191004,0.0943917,0.07089301,0.15473244,0.17541721,-1.4238165,-1.83272142,1.147711671,-2.7515198,-0.03548912,⋯,0.07765157,-1.45954067,-0.3256251,0.17290329,-0.09320093,-1.95178773,-0.6193418,0.5221018,0.5754503,0
2,0.007760949,-0.15921593,-0.05269317,-0.94884961,-0.26191822,-0.5082756,-2.18894986,-0.003823822,0.5745104,-0.00482917,⋯,-0.90627688,-1.46184884,-0.5278888,0.07411859,-0.41932549,0.019574,-0.1044677,-2.2352556,-1.0166284,0
3,-0.302569313,-0.36011995,0.25208341,-0.85430777,0.41994433,-1.3246501,1.40976523,-0.13980141,-0.4802372,-1.65252938,⋯,0.01007776,-0.01102202,-0.3508865,-0.38828762,0.87586427,-0.02815341,-0.1956322,-0.8043586,-0.8664301,0
4,-1.364167196,-0.24907956,0.34873039,-1.38536728,-0.8058259,-2.0945044,-0.00285199,0.612708514,-1.4502969,0.8975383,⋯,-1.54954956,-0.61408629,-1.1376746,-1.02564436,-0.93969669,-0.66105686,-1.20681,-0.9715164,-1.1861097,0
5,-0.322603386,0.06168073,0.91040493,0.04845448,-0.3605121,-1.0743505,-0.52117167,0.450628714,-0.8536158,-0.51962764,⋯,-0.47025478,-1.07995105,-0.4742309,-0.63774709,-0.70405325,-0.92095628,0.6953048,-0.8357657,-0.3628471,0
6,-0.386494876,-2.23431726,1.35315614,-0.29614738,-0.01570376,0.9053941,1.01911281,0.306965978,1.1586783,1.4604592,⋯,0.56923399,0.77715486,1.3287537,0.76245202,0.16704732,-0.33817931,-1.6079013,0.4890365,-0.155944,0


#### Censored Data

In [21]:
df_sim$df_censored %>% head()

Unnamed: 0_level_0,hydroxybutyrate,oxoisocaproate,X3.hydoxybutyrate,alanine,betaine,carnitine,choline,citrate,creatine,creatinine,⋯,methionine,oacetylcarnitine,ornithine,phenylalanine,proline,propyleneglycol,pyruvate,tyrosine,valine,died_90_day
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1,,0.0943917,0.07089301,0.15473244,0.17541721,,,1.147711671,,,⋯,0.07765157,,-0.3256251,0.17290329,-0.09320093,,-0.6193418,0.5221018,0.5754503,0
2,0.007760949,-0.15921593,-0.05269317,,-0.26191822,-0.5082756,,-0.003823822,0.5745104,,⋯,-0.90627688,,-0.5278888,0.07411859,-0.41932549,0.019574,-0.1044677,,,0
3,,-0.36011995,0.25208341,,0.41994433,,1.40976523,-0.13980141,-0.4802372,,⋯,0.01007776,-0.01102202,-0.3508865,-0.38828762,0.87586427,-0.02815341,-0.1956322,,-0.8664301,0
4,,-0.24907956,0.34873039,,,,-0.00285199,0.612708514,-1.4502969,0.8975383,⋯,,-0.61408629,,-1.02564436,,-0.66105686,,,,0
5,,0.06168073,0.91040493,0.04845448,-0.3605121,,-0.52117167,0.450628714,-0.8536158,,⋯,-0.47025478,,-0.4742309,-0.63774709,-0.70405325,-0.92095628,0.6953048,,-0.3628471,0
6,,,1.35315614,-0.29614738,-0.01570376,0.9053941,1.01911281,0.306965978,1.1586783,1.4604592,⋯,0.56923399,0.77715486,1.3287537,0.76245202,0.16704732,-0.33817931,,0.4890365,-0.155944,0


#### Naive Imputation

In [23]:
df_sim$df_naive_impute %>% head()

Unnamed: 0_level_0,hydroxybutyrate,oxoisocaproate,X3.hydoxybutyrate,alanine,betaine,carnitine,choline,citrate,creatine,creatinine,⋯,methionine,oacetylcarnitine,ornithine,phenylalanine,proline,propyleneglycol,pyruvate,tyrosine,valine,died_90_day
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1,-0.111827391,0.0943917,0.07089301,0.15473244,0.17541721,-0.8697738,-1.15685914,1.147711671,-1.8694631,0.07411154,⋯,0.07765157,-0.88957791,-0.3256251,0.17290329,-0.09320093,-1.38220264,-0.6193418,0.5221018,0.5754503,0
2,0.007760949,-0.15921593,-0.05269317,-0.55349254,-0.26191822,-0.5082756,-1.15685914,-0.003823822,0.5745104,0.07411154,⋯,-0.90627688,-0.88957791,-0.5278888,0.07411859,-0.41932549,0.019574,-0.1044677,-0.2003839,-0.8697296,0
3,-0.111827391,-0.36011995,0.25208341,-0.55349254,0.41994433,-0.8697738,1.40976523,-0.13980141,-0.4802372,0.07411154,⋯,0.01007776,-0.01102202,-0.3508865,-0.38828762,0.87586427,-0.02815341,-0.1956322,-0.2003839,-0.8664301,0
4,-0.111827391,-0.24907956,0.34873039,-0.55349254,-0.43103804,-0.8697738,-0.00285199,0.612708514,-1.4502969,0.8975383,⋯,-0.93846252,-0.61408629,-0.8393759,-1.02564436,-0.90791381,-0.66105686,-0.8068935,-0.2003839,-0.8697296,0
5,-0.111827391,0.06168073,0.91040493,0.04845448,-0.3605121,-0.8697738,-0.52117167,0.450628714,-0.8536158,0.07411154,⋯,-0.47025478,-0.88957791,-0.4742309,-0.63774709,-0.70405325,-0.92095628,0.6953048,-0.2003839,-0.3628471,0
6,-0.111827391,-1.32362933,1.35315614,-0.29614738,-0.01570376,0.9053941,1.01911281,0.306965978,1.1586783,1.4604592,⋯,0.56923399,0.77715486,1.3287537,0.76245202,0.16704732,-0.33817931,-0.8068935,0.4890365,-0.155944,0


#### Minimum Observed Values

In [24]:
df_sim$est.thresholds

In [25]:
df_sim$missing_rates