diff --git a/notebooks/aline/README.md b/notebooks/aline/README.md index ee81913..1017a1e 100644 --- a/notebooks/aline/README.md +++ b/notebooks/aline/README.md @@ -15,7 +15,7 @@ There are a number of prerequesites to running this code: * an installation of MIMIC-III in a PostgreSQL database * Python 2.7 with the numpy, pandas, matplotlib, and psycopg2 packages * Jupyter with a Python 2 kernel -* R with the Matching library +* R with the `Matching`, `pROC`, `MASS` libraries # Running the study diff --git a/notebooks/aline/aline_propensity_score.Rmd b/notebooks/aline/aline_propensity_score.Rmd index 5c76cc0..0ab9125 100644 --- a/notebooks/aline/aline_propensity_score.Rmd +++ b/notebooks/aline/aline_propensity_score.Rmd @@ -1,6 +1,6 @@ --- title: "aline-propensity-score" -author: "Alistair Johnson" +author: "Alistair Johnson, Jesse Raffa" date: "May 15, 2017" output: html_document --- @@ -70,60 +70,85 @@ for (col in c("weight_first","temp_first","spo2_first", } ``` +If we did not remove any missing data above, we need to subselect complete cases for analysis. + +```{r completecases, echo = FALSE} +# subselect the variables +dat = dataset[,c("aline_flag", + "age","gender","weight_first","sofa_first","service_surg", + "day_icu_intime","hour_icu_intime", + "chf_flag","afib_flag","renal_flag", + "liver_flag","copd_flag","cad_flag","stroke_flag", + "malignancy_flag","respfail_flag", + "map_first","hr_first","temp_first","spo2_first", + "bun_first","chloride_first","creatinine_first", + "hgb_first","platelet_first", + "potassium_first","sodium_first","tco2_first","wbc_first")] + +idxKeep = complete.cases(dat) +dat = dat[idxKeep,] +y <- dataset[idxKeep,"day_28_flag"] + +print(paste('Removed', sum(!idxKeep),'rows with missing data.')) +``` ## Propensity score model -Now, we build a logistic regression to predict the need for an arterial line catheter from physiology and administrative data. +Now, we build a logistic regression, using all the features, to predict the need for an arterial line catheter from physiology and administrative data. -```{r propensity} -library(Matching) +```{r glm} +# fit GLM +glm_fitted = glm(aline_flag ~ ., data=dat, family="binomial", na.action = na.exclude) +``` -set.seed(43770) +With our model fit, we now run step-wise AIC to remove features. We then plot the ROC curve, and calculate the area under the ROC curve. -# fit GLM -glm_fitted = glm(aline_flag ~ age + gender + weight_first + - # bmi + - sofa_first + service_surg + day_icu_intime + hour_icu_intime + - #icu_hour_flag + - # sedative usage - # sedative_flag + fentanyl_flag + midazolam_flag + propofol_flag + - # comorbidities - chf_flag + afib_flag + renal_flag + - liver_flag + copd_flag + cad_flag + - stroke_flag + malignancy_flag + respfail_flag + - # ards_flag + pneumonia_flag + - # vitals - map_first + hr_first + temp_first + spo2_first + - # labs - bun_first + chloride_first + creatinine_first + - hgb_first + platelet_first + potassium_first + - sodium_first + tco2_first + wbc_first, data=dataset, family="binomial", na.action = na.exclude) +```{r stepwiseAIC} +# run step-wise AIC +library(MASS); +glm_fitted <- stepAIC(glm_fitted ) X <- fitted(glm_fitted, type="response") -y <- dataset$day_28_flag -Tr <- dataset$aline_flag - -# remove the rows with missing data -idxMiss = is.na(X) -print(paste('Missing data for ', sum(idxMiss))) -X <- X[!idxMiss] -y <- y[!idxMiss] -Tr <- Tr[!idxMiss] -dat <- dataset[!idxMiss,] +Tr <- dat$aline_flag + +library("pROC") +roccurve <- roc(Tr ~ X) +plot(roccurve, col=rainbow(7), main="ROC curve", xlab="Specificity", ylab="Sensitivity") +auc(roccurve) ``` -The above shows how many rows we have excluded due to missing data (this analysis uses complete case analysis). +Our final model has a subset of features and OK AUROC. Let's plot the predictions it makes using a stacked bar chart. + +```{r stackedbar} +# plot stacked histogram of the predictions +xrange = seq(0,1,0.01) +# 3) subset your vectors to be inside xrange +g1 = subset(X,Tr==0) +g2 = subset(X,Tr==1) + +# 4) Now, use hist to compute the counts per interval +h1 = hist(g1,breaks=xrange,plot=F)$counts +h2 = hist(g2,breaks=xrange,plot=F)$counts + +barplot(rbind(h1,h2),col=3:2,names.arg=xrange[-1], + legend.text=c("No aline","Aline"),space=0,las=1,main="Stacked histogram of X") +``` + +We can see we have little support between 0-0.2, and above 0.9. We'll carry on with the knowledge that we'll have few pairs in these probability ranges. We have built the propensity score using logistic regression in the previous block. -We now use the `Matching` package to match patients with a caliper size of 0.01. +We now use the `Matching` package to match patients with a caliper size of 0.1. After matching, we'll apply McNemar's test for paired samples to determine if patients with and without an a-line have a difference in mortality. ```{r ps} -ps <- Match(Y=NULL, Tr=Tr, X=X, M=1, estimand='ATC', caliper=0.01, exact=FALSE, replace=FALSE); +library(Matching) + +set.seed(43770) -# jesse's propensity score workshop -> get pairs with treatment/outcome as cols +ps <- Match(Y=NULL, Tr=Tr, X=X, M=1, estimand='ATC', caliper=0.1, exact=FALSE, replace=FALSE); -outcome <- data.frame(aline_pt=dat[ps$index.treated,"hosp_exp_flag"], match_pt=dat[ps$index.control,"hosp_exp_flag"]) +# get pairs with treatment/outcome as cols +outcome <- data.frame(aline_pt=y[ps$index.treated], match_pt=y[ps$index.control]) head(outcome) # mcnemar's test to see if iac related to mort (test should use matched pairs) diff --git a/notebooks/aline/aline_propensity_score.html b/notebooks/aline/aline_propensity_score.html index e603f37..69050c0 100644 --- a/notebooks/aline/aline_propensity_score.html +++ b/notebooks/aline/aline_propensity_score.html @@ -9,7 +9,7 @@ - + @@ -117,7 +117,7 @@
wdpath = paste(path.expand("~"),'git/mimic-code/notebooks/aline/',sep='/')
setwd(wdpath)
dataset = read.csv(file="aline_data.csv",head=TRUE,sep=",")
+If we did not remove any missing data above, we need to subselect complete cases for analysis.
+## [1] "Removed 160 rows with missing data."
Now, we build a logistic regression to predict the need for an arterial line catheter from physiology and administrative data.
+Now, we build a logistic regression, using all the features, to predict the need for an arterial line catheter from physiology and administrative data.
+# fit GLM
+glm_fitted = glm(aline_flag ~ ., data=dat, family="binomial", na.action = na.exclude)
+With our model fit, we now run step-wise AIC to remove features. We then plot the ROC curve, and calculate the area under the ROC curve.
+# run step-wise AIC
+library(MASS);
+glm_fitted <- stepAIC(glm_fitted )
+## Start: AIC=3054.63
+## aline_flag ~ age + gender + weight_first + sofa_first + service_surg +
+## day_icu_intime + hour_icu_intime + chf_flag + afib_flag +
+## renal_flag + liver_flag + copd_flag + cad_flag + stroke_flag +
+## malignancy_flag + respfail_flag + map_first + hr_first +
+## temp_first + spo2_first + bun_first + chloride_first + creatinine_first +
+## hgb_first + platelet_first + potassium_first + sodium_first +
+## tco2_first + wbc_first
+##
+## Df Deviance AIC
+## - hour_icu_intime 23 2977.9 3045.9
+## - day_icu_intime 6 2949.4 3051.4
+## - creatinine_first 1 2940.7 3052.7
+## - hr_first 1 2940.8 3052.8
+## - afib_flag 1 2940.8 3052.8
+## - temp_first 1 2940.8 3052.8
+## - cad_flag 1 2940.9 3052.9
+## - age 1 2941.2 3053.2
+## - chf_flag 1 2941.2 3053.2
+## - malignancy_flag 1 2941.2 3053.2
+## - hgb_first 1 2941.2 3053.2
+## - spo2_first 1 2941.2 3053.2
+## - gender 1 2941.3 3053.3
+## - respfail_flag 1 2941.6 3053.6
+## - bun_first 1 2941.8 3053.8
+## - potassium_first 1 2942.6 3054.6
+## <none> 2940.6 3054.6
+## - platelet_first 1 2943.1 3055.1
+## - weight_first 1 2943.9 3055.9
+## - renal_flag 1 2946.0 3058.0
+## - liver_flag 1 2946.1 3058.1
+## - tco2_first 1 2946.5 3058.5
+## - copd_flag 1 2948.3 3060.3
+## - map_first 1 2950.6 3062.6
+## - sodium_first 1 2962.5 3074.5
+## - sofa_first 1 2966.3 3078.3
+## - wbc_first 1 2969.6 3081.6
+## - stroke_flag 1 2979.0 3091.0
+## - chloride_first 1 2980.9 3092.9
+## - service_surg 1 2993.1 3105.1
+##
+## Step: AIC=3045.91
+## aline_flag ~ age + gender + weight_first + sofa_first + service_surg +
+## day_icu_intime + chf_flag + afib_flag + renal_flag + liver_flag +
+## copd_flag + cad_flag + stroke_flag + malignancy_flag + respfail_flag +
+## map_first + hr_first + temp_first + spo2_first + bun_first +
+## chloride_first + creatinine_first + hgb_first + platelet_first +
+## potassium_first + sodium_first + tco2_first + wbc_first
+##
+## Df Deviance AIC
+## - day_icu_intime 6 2984.5 3040.5
+## - creatinine_first 1 2977.9 3043.9
+## - hr_first 1 2978.0 3044.0
+## - afib_flag 1 2978.1 3044.1
+## - age 1 2978.2 3044.2
+## - spo2_first 1 2978.2 3044.2
+## - gender 1 2978.3 3044.3
+## - temp_first 1 2978.4 3044.4
+## - cad_flag 1 2978.4 3044.4
+## - malignancy_flag 1 2978.4 3044.4
+## - chf_flag 1 2978.6 3044.6
+## - hgb_first 1 2978.6 3044.6
+## - respfail_flag 1 2978.7 3044.7
+## - bun_first 1 2979.1 3045.1
+## - potassium_first 1 2979.1 3045.1
+## <none> 2977.9 3045.9
+## - platelet_first 1 2980.8 3046.8
+## - weight_first 1 2980.9 3046.9
+## - renal_flag 1 2983.8 3049.8
+## - liver_flag 1 2984.0 3050.0
+## - tco2_first 1 2984.6 3050.6
+## - copd_flag 1 2987.2 3053.2
+## - map_first 1 2988.2 3054.2
+## - sodium_first 1 3000.7 3066.7
+## - sofa_first 1 3004.0 3070.0
+## - wbc_first 1 3006.3 3072.3
+## - stroke_flag 1 3016.5 3082.5
+## - chloride_first 1 3020.5 3086.5
+## - service_surg 1 3034.2 3100.2
+##
+## Step: AIC=3040.52
+## aline_flag ~ age + gender + weight_first + sofa_first + service_surg +
+## chf_flag + afib_flag + renal_flag + liver_flag + copd_flag +
+## cad_flag + stroke_flag + malignancy_flag + respfail_flag +
+## map_first + hr_first + temp_first + spo2_first + bun_first +
+## chloride_first + creatinine_first + hgb_first + platelet_first +
+## potassium_first + sodium_first + tco2_first + wbc_first
+##
+## Df Deviance AIC
+## - creatinine_first 1 2984.5 3038.5
+## - hr_first 1 2984.6 3038.6
+## - afib_flag 1 2984.7 3038.7
+## - age 1 2984.8 3038.8
+## - spo2_first 1 2984.8 3038.8
+## - temp_first 1 2984.9 3038.9
+## - cad_flag 1 2985.0 3039.0
+## - gender 1 2985.1 3039.1
+## - malignancy_flag 1 2985.1 3039.1
+## - hgb_first 1 2985.1 3039.1
+## - chf_flag 1 2985.2 3039.2
+## - respfail_flag 1 2985.3 3039.3
+## - bun_first 1 2985.8 3039.8
+## - potassium_first 1 2985.9 3039.9
+## <none> 2984.5 3040.5
+## - platelet_first 1 2987.2 3041.2
+## - weight_first 1 2987.3 3041.3
+## - renal_flag 1 2990.4 3044.4
+## - liver_flag 1 2990.6 3044.6
+## - tco2_first 1 2991.0 3045.0
+## - copd_flag 1 2993.8 3047.8
+## - map_first 1 2994.6 3048.6
+## - sodium_first 1 3007.9 3061.9
+## - sofa_first 1 3011.4 3065.4
+## - wbc_first 1 3013.2 3067.2
+## - stroke_flag 1 3023.1 3077.1
+## - chloride_first 1 3027.6 3081.6
+## - service_surg 1 3041.1 3095.1
+##
+## Step: AIC=3038.52
+## aline_flag ~ age + gender + weight_first + sofa_first + service_surg +
+## chf_flag + afib_flag + renal_flag + liver_flag + copd_flag +
+## cad_flag + stroke_flag + malignancy_flag + respfail_flag +
+## map_first + hr_first + temp_first + spo2_first + bun_first +
+## chloride_first + hgb_first + platelet_first + potassium_first +
+## sodium_first + tco2_first + wbc_first
+##
+## Df Deviance AIC
+## - hr_first 1 2984.6 3036.6
+## - afib_flag 1 2984.7 3036.7
+## - age 1 2984.8 3036.8
+## - spo2_first 1 2984.8 3036.8
+## - temp_first 1 2984.9 3036.9
+## - cad_flag 1 2985.1 3037.1
+## - gender 1 2985.1 3037.1
+## - malignancy_flag 1 2985.1 3037.1
+## - hgb_first 1 2985.1 3037.1
+## - chf_flag 1 2985.2 3037.2
+## - respfail_flag 1 2985.3 3037.3
+## - potassium_first 1 2985.9 3037.9
+## - bun_first 1 2986.4 3038.4
+## <none> 2984.5 3038.5
+## - platelet_first 1 2987.2 3039.2
+## - weight_first 1 2987.3 3039.3
+## - liver_flag 1 2990.6 3042.6
+## - renal_flag 1 2990.9 3042.9
+## - tco2_first 1 2991.1 3043.1
+## - copd_flag 1 2993.8 3045.8
+## - map_first 1 2994.6 3046.6
+## - sodium_first 1 3008.1 3060.1
+## - sofa_first 1 3012.7 3064.7
+## - wbc_first 1 3013.2 3065.2
+## - stroke_flag 1 3023.1 3075.1
+## - chloride_first 1 3028.7 3080.7
+## - service_surg 1 3041.1 3093.1
+##
+## Step: AIC=3036.62
+## aline_flag ~ age + gender + weight_first + sofa_first + service_surg +
+## chf_flag + afib_flag + renal_flag + liver_flag + copd_flag +
+## cad_flag + stroke_flag + malignancy_flag + respfail_flag +
+## map_first + temp_first + spo2_first + bun_first + chloride_first +
+## hgb_first + platelet_first + potassium_first + sodium_first +
+## tco2_first + wbc_first
+##
+## Df Deviance AIC
+## - afib_flag 1 2984.8 3034.8
+## - age 1 2984.8 3034.8
+## - temp_first 1 2985.0 3035.0
+## - spo2_first 1 2985.0 3035.0
+## - cad_flag 1 2985.2 3035.2
+## - gender 1 2985.2 3035.2
+## - malignancy_flag 1 2985.2 3035.2
+## - hgb_first 1 2985.2 3035.2
+## - chf_flag 1 2985.3 3035.3
+## - respfail_flag 1 2985.5 3035.5
+## - potassium_first 1 2986.0 3036.0
+## - bun_first 1 2986.5 3036.5
+## <none> 2984.6 3036.6
+## - platelet_first 1 2987.3 3037.3
+## - weight_first 1 2987.4 3037.4
+## - liver_flag 1 2990.9 3040.9
+## - renal_flag 1 2990.9 3040.9
+## - tco2_first 1 2991.3 3041.3
+## - copd_flag 1 2994.1 3044.1
+## - map_first 1 2994.7 3044.7
+## - sodium_first 1 3008.5 3058.5
+## - sofa_first 1 3012.8 3062.8
+## - wbc_first 1 3013.3 3063.3
+## - stroke_flag 1 3023.8 3073.8
+## - chloride_first 1 3029.1 3079.1
+## - service_surg 1 3041.1 3091.1
+##
+## Step: AIC=3034.76
+## aline_flag ~ age + gender + weight_first + sofa_first + service_surg +
+## chf_flag + renal_flag + liver_flag + copd_flag + cad_flag +
+## stroke_flag + malignancy_flag + respfail_flag + map_first +
+## temp_first + spo2_first + bun_first + chloride_first + hgb_first +
+## platelet_first + potassium_first + sodium_first + tco2_first +
+## wbc_first
+##
+## Df Deviance AIC
+## - age 1 2984.9 3032.9
+## - temp_first 1 2985.1 3033.1
+## - spo2_first 1 2985.1 3033.1
+## - cad_flag 1 2985.3 3033.3
+## - gender 1 2985.3 3033.3
+## - malignancy_flag 1 2985.4 3033.4
+## - hgb_first 1 2985.4 3033.4
+## - chf_flag 1 2985.5 3033.5
+## - respfail_flag 1 2985.6 3033.6
+## - potassium_first 1 2986.2 3034.2
+## - bun_first 1 2986.7 3034.7
+## <none> 2984.8 3034.8
+## - platelet_first 1 2987.5 3035.5
+## - weight_first 1 2987.6 3035.6
+## - renal_flag 1 2991.0 3039.0
+## - liver_flag 1 2991.2 3039.2
+## - tco2_first 1 2991.6 3039.6
+## - copd_flag 1 2994.2 3042.2
+## - map_first 1 2994.8 3042.8
+## - sodium_first 1 3008.6 3056.6
+## - sofa_first 1 3012.8 3060.8
+## - wbc_first 1 3013.5 3061.5
+## - stroke_flag 1 3024.6 3072.6
+## - chloride_first 1 3029.2 3077.2
+## - service_surg 1 3041.1 3089.1
+##
+## Step: AIC=3032.89
+## aline_flag ~ gender + weight_first + sofa_first + service_surg +
+## chf_flag + renal_flag + liver_flag + copd_flag + cad_flag +
+## stroke_flag + malignancy_flag + respfail_flag + map_first +
+## temp_first + spo2_first + bun_first + chloride_first + hgb_first +
+## platelet_first + potassium_first + sodium_first + tco2_first +
+## wbc_first
+##
+## Df Deviance AIC
+## - temp_first 1 2985.2 3031.2
+## - spo2_first 1 2985.3 3031.3
+## - hgb_first 1 2985.4 3031.4
+## - gender 1 2985.5 3031.5
+## - cad_flag 1 2985.5 3031.5
+## - malignancy_flag 1 2985.6 3031.6
+## - chf_flag 1 2985.6 3031.6
+## - respfail_flag 1 2985.8 3031.8
+## - potassium_first 1 2986.4 3032.4
+## - bun_first 1 2986.7 3032.7
+## <none> 2984.9 3032.9
+## - platelet_first 1 2987.6 3033.6
+## - weight_first 1 2987.9 3033.9
+## - renal_flag 1 2991.1 3037.1
+## - liver_flag 1 2991.2 3037.2
+## - tco2_first 1 2991.6 3037.6
+## - copd_flag 1 2994.5 3040.5
+## - map_first 1 2995.0 3041.0
+## - sodium_first 1 3008.8 3054.8
+## - sofa_first 1 3012.9 3058.9
+## - wbc_first 1 3014.5 3060.5
+## - stroke_flag 1 3027.3 3073.3
+## - chloride_first 1 3030.0 3076.0
+## - service_surg 1 3041.1 3087.1
+##
+## Step: AIC=3031.24
+## aline_flag ~ gender + weight_first + sofa_first + service_surg +
+## chf_flag + renal_flag + liver_flag + copd_flag + cad_flag +
+## stroke_flag + malignancy_flag + respfail_flag + map_first +
+## spo2_first + bun_first + chloride_first + hgb_first + platelet_first +
+## potassium_first + sodium_first + tco2_first + wbc_first
+##
+## Df Deviance AIC
+## - spo2_first 1 2985.6 3029.6
+## - hgb_first 1 2985.8 3029.8
+## - malignancy_flag 1 2985.9 3029.9
+## - gender 1 2985.9 3029.9
+## - chf_flag 1 2985.9 3029.9
+## - cad_flag 1 2986.0 3030.0
+## - respfail_flag 1 2986.1 3030.1
+## - potassium_first 1 2986.8 3030.8
+## - bun_first 1 2987.0 3031.0
+## <none> 2985.2 3031.2
+## - platelet_first 1 2987.9 3031.9
+## - weight_first 1 2988.3 3032.3
+## - renal_flag 1 2991.4 3035.4
+## - liver_flag 1 2991.5 3035.5
+## - tco2_first 1 2991.9 3035.9
+## - copd_flag 1 2995.0 3039.0
+## - map_first 1 2995.2 3039.2
+## - sodium_first 1 3009.0 3053.0
+## - sofa_first 1 3013.2 3057.2
+## - wbc_first 1 3015.5 3059.5
+## - stroke_flag 1 3027.9 3071.9
+## - chloride_first 1 3030.1 3074.1
+## - service_surg 1 3041.9 3085.9
+##
+## Step: AIC=3029.62
+## aline_flag ~ gender + weight_first + sofa_first + service_surg +
+## chf_flag + renal_flag + liver_flag + copd_flag + cad_flag +
+## stroke_flag + malignancy_flag + respfail_flag + map_first +
+## bun_first + chloride_first + hgb_first + platelet_first +
+## potassium_first + sodium_first + tco2_first + wbc_first
+##
+## Df Deviance AIC
+## - hgb_first 1 2986.2 3028.2
+## - chf_flag 1 2986.3 3028.3
+## - gender 1 2986.3 3028.3
+## - malignancy_flag 1 2986.3 3028.3
+## - cad_flag 1 2986.4 3028.4
+## - respfail_flag 1 2986.6 3028.6
+## - potassium_first 1 2987.2 3029.2
+## - bun_first 1 2987.3 3029.3
+## <none> 2985.6 3029.6
+## - platelet_first 1 2988.3 3030.3
+## - weight_first 1 2988.6 3030.6
+## - renal_flag 1 2991.8 3033.8
+## - liver_flag 1 2992.0 3034.0
+## - tco2_first 1 2992.1 3034.1
+## - map_first 1 2995.5 3037.5
+## - copd_flag 1 2995.8 3037.8
+## - sodium_first 1 3009.2 3051.2
+## - sofa_first 1 3013.9 3055.9
+## - wbc_first 1 3015.5 3057.5
+## - stroke_flag 1 3028.4 3070.4
+## - chloride_first 1 3030.3 3072.3
+## - service_surg 1 3042.0 3084.0
+##
+## Step: AIC=3028.18
+## aline_flag ~ gender + weight_first + sofa_first + service_surg +
+## chf_flag + renal_flag + liver_flag + copd_flag + cad_flag +
+## stroke_flag + malignancy_flag + respfail_flag + map_first +
+## bun_first + chloride_first + platelet_first + potassium_first +
+## sodium_first + tco2_first + wbc_first
+##
+## Df Deviance AIC
+## - gender 1 2986.6 3026.6
+## - malignancy_flag 1 2986.7 3026.7
+## - chf_flag 1 2986.9 3026.9
+## - cad_flag 1 2986.9 3026.9
+## - respfail_flag 1 2987.1 3027.1
+## - potassium_first 1 2987.9 3027.9
+## <none> 2986.2 3028.2
+## - bun_first 1 2988.4 3028.4
+## - platelet_first 1 2988.8 3028.8
+## - weight_first 1 2989.0 3029.0
+## - renal_flag 1 2992.2 3032.2
+## - liver_flag 1 2992.2 3032.2
+## - tco2_first 1 2994.0 3034.0
+## - map_first 1 2995.9 3035.9
+## - copd_flag 1 2996.2 3036.2
+## - sofa_first 1 3015.0 3055.0
+## - wbc_first 1 3015.6 3055.6
+## - sodium_first 1 3016.9 3056.9
+## - stroke_flag 1 3028.4 3068.4
+## - service_surg 1 3043.1 3083.1
+## - chloride_first 1 3044.2 3084.2
+##
+## Step: AIC=3026.62
+## aline_flag ~ weight_first + sofa_first + service_surg + chf_flag +
+## renal_flag + liver_flag + copd_flag + cad_flag + stroke_flag +
+## malignancy_flag + respfail_flag + map_first + bun_first +
+## chloride_first + platelet_first + potassium_first + sodium_first +
+## tco2_first + wbc_first
+##
+## Df Deviance AIC
+## - malignancy_flag 1 2987.2 3025.2
+## - chf_flag 1 2987.2 3025.2
+## - cad_flag 1 2987.3 3025.3
+## - respfail_flag 1 2987.6 3025.6
+## - potassium_first 1 2988.2 3026.2
+## <none> 2986.6 3026.6
+## - bun_first 1 2988.8 3026.8
+## - platelet_first 1 2989.5 3027.5
+## - weight_first 1 2990.3 3028.3
+## - renal_flag 1 2992.7 3030.7
+## - liver_flag 1 2992.7 3030.7
+## - tco2_first 1 2994.3 3032.3
+## - map_first 1 2996.2 3034.2
+## - copd_flag 1 2996.7 3034.7
+## - wbc_first 1 3016.5 3054.5
+## - sofa_first 1 3016.6 3054.6
+## - sodium_first 1 3016.9 3054.9
+## - stroke_flag 1 3028.4 3066.4
+## - service_surg 1 3043.2 3081.2
+## - chloride_first 1 3044.3 3082.3
+##
+## Step: AIC=3025.15
+## aline_flag ~ weight_first + sofa_first + service_surg + chf_flag +
+## renal_flag + liver_flag + copd_flag + cad_flag + stroke_flag +
+## respfail_flag + map_first + bun_first + chloride_first +
+## platelet_first + potassium_first + sodium_first + tco2_first +
+## wbc_first
+##
+## Df Deviance AIC
+## - chf_flag 1 2987.8 3023.8
+## - cad_flag 1 2987.8 3023.8
+## - respfail_flag 1 2988.2 3024.2
+## - potassium_first 1 2988.8 3024.8
+## <none> 2987.2 3025.2
+## - bun_first 1 2989.2 3025.2
+## - platelet_first 1 2990.2 3026.2
+## - weight_first 1 2991.0 3027.0
+## - renal_flag 1 2993.1 3029.1
+## - liver_flag 1 2993.3 3029.3
+## - tco2_first 1 2994.6 3030.6
+## - map_first 1 2996.9 3032.9
+## - copd_flag 1 2997.3 3033.3
+## - sofa_first 1 3016.8 3052.8
+## - sodium_first 1 3016.9 3052.9
+## - wbc_first 1 3017.7 3053.7
+## - stroke_flag 1 3029.6 3065.6
+## - service_surg 1 3043.3 3079.3
+## - chloride_first 1 3044.4 3080.4
+##
+## Step: AIC=3023.75
+## aline_flag ~ weight_first + sofa_first + service_surg + renal_flag +
+## liver_flag + copd_flag + cad_flag + stroke_flag + respfail_flag +
+## map_first + bun_first + chloride_first + platelet_first +
+## potassium_first + sodium_first + tco2_first + wbc_first
+##
+## Df Deviance AIC
+## - cad_flag 1 2988.2 3022.2
+## - respfail_flag 1 2988.7 3022.7
+## - potassium_first 1 2989.3 3023.3
+## <none> 2987.8 3023.8
+## - bun_first 1 2990.2 3024.2
+## - platelet_first 1 2990.8 3024.8
+## - weight_first 1 2991.7 3025.7
+## - renal_flag 1 2993.4 3027.4
+## - liver_flag 1 2993.9 3027.9
+## - tco2_first 1 2995.9 3029.9
+## - map_first 1 2997.4 3031.4
+## - copd_flag 1 2997.4 3031.4
+## - sofa_first 1 3017.1 3051.1
+## - sodium_first 1 3017.2 3051.2
+## - wbc_first 1 3018.2 3052.2
+## - stroke_flag 1 3029.7 3063.7
+## - service_surg 1 3043.7 3077.7
+## - chloride_first 1 3044.5 3078.5
+##
+## Step: AIC=3022.25
+## aline_flag ~ weight_first + sofa_first + service_surg + renal_flag +
+## liver_flag + copd_flag + stroke_flag + respfail_flag + map_first +
+## bun_first + chloride_first + platelet_first + potassium_first +
+## sodium_first + tco2_first + wbc_first
+##
+## Df Deviance AIC
+## - respfail_flag 1 2989.1 3021.1
+## - potassium_first 1 2989.8 3021.8
+## <none> 2988.2 3022.2
+## - bun_first 1 2990.5 3022.5
+## - platelet_first 1 2991.3 3023.3
+## - weight_first 1 2992.1 3024.1
+## - liver_flag 1 2994.3 3026.3
+## - renal_flag 1 2994.7 3026.7
+## - tco2_first 1 2996.4 3028.4
+## - copd_flag 1 2998.0 3030.0
+## - map_first 1 2998.0 3030.0
+## - sodium_first 1 3017.5 3049.5
+## - sofa_first 1 3018.1 3050.1
+## - wbc_first 1 3018.9 3050.9
+## - stroke_flag 1 3029.7 3061.7
+## - service_surg 1 3044.0 3076.0
+## - chloride_first 1 3044.9 3076.9
+##
+## Step: AIC=3021.13
+## aline_flag ~ weight_first + sofa_first + service_surg + renal_flag +
+## liver_flag + copd_flag + stroke_flag + map_first + bun_first +
+## chloride_first + platelet_first + potassium_first + sodium_first +
+## tco2_first + wbc_first
+##
+## Df Deviance AIC
+## - potassium_first 1 2990.7 3020.7
+## - bun_first 1 2991.1 3021.1
+## <none> 2989.1 3021.1
+## - platelet_first 1 2992.2 3022.2
+## - weight_first 1 2992.9 3022.9
+## - liver_flag 1 2995.2 3025.2
+## - renal_flag 1 2995.6 3025.6
+## - tco2_first 1 2996.7 3026.7
+## - map_first 1 2998.8 3028.8
+## - copd_flag 1 2999.1 3029.1
+## - sofa_first 1 3018.3 3048.3
+## - sodium_first 1 3018.4 3048.4
+## - wbc_first 1 3019.5 3049.5
+## - stroke_flag 1 3032.1 3062.1
+## - chloride_first 1 3046.0 3076.0
+## - service_surg 1 3046.2 3076.2
+##
+## Step: AIC=3020.68
+## aline_flag ~ weight_first + sofa_first + service_surg + renal_flag +
+## liver_flag + copd_flag + stroke_flag + map_first + bun_first +
+## chloride_first + platelet_first + sodium_first + tco2_first +
+## wbc_first
+##
+## Df Deviance AIC
+## - bun_first 1 2992.0 3020.0
+## <none> 2990.7 3020.7
+## - weight_first 1 2994.1 3022.1
+## - platelet_first 1 2994.2 3022.2
+## - liver_flag 1 2996.9 3024.9
+## - renal_flag 1 2997.9 3025.9
+## - tco2_first 1 2998.0 3026.0
+## - map_first 1 3000.2 3028.2
+## - copd_flag 1 3001.0 3029.0
+## - sodium_first 1 3018.7 3046.7
+## - sofa_first 1 3019.3 3047.3
+## - wbc_first 1 3020.6 3048.6
+## - stroke_flag 1 3034.3 3062.3
+## - chloride_first 1 3046.4 3074.4
+## - service_surg 1 3049.0 3077.0
+##
+## Step: AIC=3020
+## aline_flag ~ weight_first + sofa_first + service_surg + renal_flag +
+## liver_flag + copd_flag + stroke_flag + map_first + chloride_first +
+## platelet_first + sodium_first + tco2_first + wbc_first
+##
+## Df Deviance AIC
+## <none> 2992.0 3020.0
+## - weight_first 1 2995.5 3021.5
+## - platelet_first 1 2995.7 3021.7
+## - renal_flag 1 2998.0 3024.0
+## - liver_flag 1 2998.3 3024.3
+## - tco2_first 1 2999.3 3025.3
+## - map_first 1 3001.6 3027.6
+## - copd_flag 1 3002.1 3028.1
+## - sodium_first 1 3019.8 3045.8
+## - wbc_first 1 3022.8 3048.8
+## - sofa_first 1 3026.8 3052.8
+## - stroke_flag 1 3035.9 3061.9
+## - chloride_first 1 3047.2 3073.2
+## - service_surg 1 3049.9 3075.9
+X <- fitted(glm_fitted, type="response")
+Tr <- dat$aline_flag
+
+library("pROC")
+## Type 'citation("pROC")' for a citation.
+##
+## Attaching package: 'pROC'
+## The following objects are masked from 'package:stats':
+##
+## cov, smooth, var
+roccurve <- roc(Tr ~ X)
+plot(roccurve, col=rainbow(7), main="ROC curve", xlab="Specificity", ylab="Sensitivity")
+auc(roccurve)
+## Area under the curve: 0.6945
+Our final model has a subset of features and OK AUROC. Let’s plot the predictions it makes using a stacked bar chart.
+# plot stacked histogram of the predictions
+xrange = seq(0,1,0.01)
+# 3) subset your vectors to be inside xrange
+g1 = subset(X,Tr==0)
+g2 = subset(X,Tr==1)
+
+# 4) Now, use hist to compute the counts per interval
+h1 = hist(g1,breaks=xrange,plot=F)$counts
+h2 = hist(g2,breaks=xrange,plot=F)$counts
+
+barplot(rbind(h1,h2),col=3:2,names.arg=xrange[-1],
+ legend.text=c("No aline","Aline"),space=0,las=1,main="Stacked histogram of X")
+We can see we have little support between 0-0.2, and above 0.9. We’ll carry on with the knowledge that we’ll have few pairs in these probability ranges.
+We have built the propensity score using logistic regression in the previous block. We now use the Matching package to match patients with a caliper size of 0.1. After matching, we’ll apply McNemar’s test for paired samples to determine if patients with and without an a-line have a difference in mortality.
library(Matching)
-## Loading required package: MASS
## ##
## ## Matching (Version 4.9-2, Build Date: 2015-12-25)
## ## See http://sekhon.berkeley.edu/matching for additional documentation.
@@ -149,69 +715,35 @@ May 15, 2017
## ##
set.seed(43770)
-# fit GLM
-glm_fitted = glm(aline_flag ~ age + gender + weight_first +
- # bmi +
- sofa_first + service_surg + day_icu_intime + hour_icu_intime +
- #icu_hour_flag +
- # sedative usage
- # sedative_flag + fentanyl_flag + midazolam_flag + propofol_flag +
- # comorbidities
- chf_flag + afib_flag + renal_flag +
- liver_flag + copd_flag + cad_flag +
- stroke_flag + malignancy_flag + respfail_flag +
- # ards_flag + pneumonia_flag +
- # vitals
- map_first + hr_first + temp_first + spo2_first +
- # labs
- bun_first + chloride_first + creatinine_first +
- hgb_first + platelet_first + potassium_first +
- sodium_first + tco2_first + wbc_first, data=dataset, family="binomial", na.action = na.exclude)
-
-X <- fitted(glm_fitted, type="response")
-y <- dataset$day_28_flag
-Tr <- dataset$aline_flag
-
-# remove the rows with missing data
-idxMiss = is.na(X)
-print(paste('Missing data for ', sum(idxMiss)))
-## [1] "Missing data for 160"
-X <- X[!idxMiss]
-y <- y[!idxMiss]
-Tr <- Tr[!idxMiss]
-dat <- dataset[!idxMiss,]
-The above shows how many rows we have excluded due to missing data (this analysis uses complete case analysis).
-We have built the propensity score using logistic regression in the previous block. We now use the Matching package to match patients with a caliper size of 0.01. After matching, we’ll apply McNemar’s test for paired samples to determine if patients with and without an a-line have a difference in mortality.
ps <- Match(Y=NULL, Tr=Tr, X=X, M=1, estimand='ATC', caliper=0.01, exact=FALSE, replace=FALSE);
-
-# jesse's propensity score workshop -> get pairs with treatment/outcome as cols
-
-outcome <- data.frame(aline_pt=dat[ps$index.treated,"hosp_exp_flag"], match_pt=dat[ps$index.control,"hosp_exp_flag"])
+ps <- Match(Y=NULL, Tr=Tr, X=X, M=1, estimand='ATC', caliper=0.1, exact=FALSE, replace=FALSE);
+
+# get pairs with treatment/outcome as cols
+outcome <- data.frame(aline_pt=y[ps$index.treated], match_pt=y[ps$index.control])
head(outcome)
## aline_pt match_pt
-## 1 1 1
-## 2 0 0
+## 1 0 1
+## 2 1 0
## 3 1 0
## 4 0 0
-## 5 0 0
+## 5 1 0
## 6 0 0
# mcnemar's test to see if iac related to mort (test should use matched pairs)
tab.match1 <- table(outcome$aline_pt,outcome$match_pt,dnn=c("Aline","Matched Control"))
tab.match1
## Matched Control
## Aline 0 1
-## 0 564 93
-## 1 86 13
+## 0 578 122
+## 1 103 19
tab.match1[1,2]/tab.match1[2,1]
-## [1] 1.081395
+## [1] 1.184466
paste("95% Confint", round(exp(c(log(tab.match1[2,1]/tab.match1[1,2]) - qnorm(0.975)*sqrt(1/tab.match1[1,2] +1/tab.match1[2,1]),log(tab.match1[2,1]/tab.match1[1,2]) + qnorm(0.975)*sqrt(1/tab.match1[1,2] +1/tab.match1[2,1])) ),2))
-## [1] "95% Confint 0.69" "95% Confint 1.24"
+## [1] "95% Confint 0.65" "95% Confint 1.1"
mcnemar.test(tab.match1) # for 1-1 pairs
##
## McNemar's Chi-squared test with continuity correction
##
## data: tab.match1
-## McNemar's chi-squared = 0.20112, df = 1, p-value = 0.6538
+## McNemar's chi-squared = 1.44, df = 1, p-value = 0.2301
The above p-value, which is > 0.05, tells us that we cannot reject the null hypothesis of the aline/non-aline groups having the same mortality rate. Assuming all assumptions of our modelling process are correct, we can infer from this that the use of an indwelling arterial catheter is not associated with a mortality benefit in these patients.