In [73]:
#install.packages("SurvRegCensCov")
#install.packages("muhaz")
#install.packages("data.table")
#install.packages("texreg")

Updating HTML index of packages in '.Library'

Making 'packages.html' ...
 done



In [74]:
library("survival")
library("arrow")
library("tidyverse")
library("feather")
library("survminer")
library("SurvRegCensCov")
library("muhaz")
library("data.table")
library("texreg")

Version:  1.38.6
Date:     2022-04-06
Author:   Philip Leifeld (University of Essex)

Consider submitting praise using the praise or praise_interactive functions.
Please cite the JSS article in your publications -- see citation("texreg").


Attaching package: ‘texreg’


The following object is masked from ‘package:tidyr’:

    extract




Cleanup steps; Identical to Survanalysis.ipynb

In [31]:
tx_df <-read_parquet("/home/project/data/students/srtr/tx_cohort.parquet")
# colnames suggested by nephrologist
tx_df <- tx_df %>% select(-c('REC_DGN','CAN_HGT_CM','CAN_WGT_KG','DONOR_ID','DON_AGE_IN_MONTHS','REC_AGE_IN_MONTHS_AT_TX','REC_AGE_IN_MONTHS_AT_TX','REC_FAIL_DT','REC_FAIL_CAUSE_TY','REC_PREV_GRAFT1_DT','REC_GRAFT_STAT','REC_HGT_WGT_DT',
                             'TX_ID','TFL_COD','REC_COD','REC_COD2','REC_COD3','TFL_COD', 'CAN_RACE'))
data <- tx_df%>%mutate(time = case_when(
  !is.na(TFL_DEATH_DT) & is.na(TFL_GRAFT_DT)~ difftime( TFL_DEATH_DT,REC_TX_DT, units = "days"),
    is.na(TFL_DEATH_DT) & !is.na(TFL_GRAFT_DT)~ difftime(TFL_GRAFT_DT,REC_TX_DT,  units = "days"),
    is.na(TFL_DEATH_DT) & is.na(TFL_GRAFT_DT)~ difftime(TFL_LAFUDATE,REC_TX_DT,  units = "days"),
  ))

# time: Survival time in days
# status: censoring status 1=censored, 2=graft failure
data$status <- 1
data$status[!is.na(data$TFL_DEATH_DT)]<-1
data$status[!is.na(data$TFL_GRAFT_DT)]<-2

# drop columns with too much missing data and factors without observations or just 1 observation
data <- data %>% select(-c('CAN_ANGINA', 'REC_ACUTE_REJ_BIOPSY_CONFIRMED','REC_ACUTE_REJ_EPISODE','REC_CREAT_DECLINE_GE25','REC_PROD_URINE_GT40_24HRS'))
data <- subset(data, data$CAN_ABO != "UNK")
data <- subset(data, data$DON_ABO != "UNK")
data <- subset(data, data$REC_MM_EQUIV_CUR != "REC_MM_EQUIV_CUR6")
data <- subset(data, data$REC_MM_EQUIV_TX != "REC_MM_EQUIV_TX6")

# correct type
cols <- c("CAN_ABO","CAN_DIAB_TY", "CAN_GENDER", "CAN_MALIG", "DON_ABO", "DON_GENDER", "REC_A_MM_EQUIV_CUR", "REC_A_MM_EQUIV_TX","REC_B_MM_EQUIV_CUR", "REC_B_MM_EQUIV_TX","REC_DR_MM_EQUIV_CUR")
data[cols] <- lapply(data[cols], factor)
cols <- c("REC_DR_MM_EQUIV_TX", "REC_DR_MM_EQUIV_CUR","REC_MM_EQUIV_CUR", "REC_MM_EQUIV_TX")
data[cols] <- lapply(data[cols], factor)

# delete "old" outcomes => now as time and status
test <- data %>% select(-c('TFL_DEATH_DT','TFL_GRAFT_DT','TFL_LAFUDATE' ))
test <- na.omit(test)

# create timeframes from variables
test$dialysistotx <- difftime(test$REC_DIAL_DT,test$REC_TX_DT, units = "days")
test$dialysistotx <- as.integer(test$dialysistotx)
test$txtodischarge <- difftime(test$REC_DISCHRG_DT,test$REC_TX_DT, units = "days")
test$txtodischarge <- as.integer(test$txtodischarge)
test$yeartx <- format(test$REC_TX_DT, format="%Y")
test$yeartx <- as.factor(test$yeartx)
test <- test %>% select(-c("REC_DIAL_DT", "REC_DISCHRG_DT", "REC_TX_DT"))

# reasonable time frame => time until 95% of transplants have failed // only extremes remain, that will still fail and are not censored; see cox file
test <- subset(test, test$time < 5099)

# drop columns that i dropen in cox based upon univariate cox regression and non significant results
test <- test %>% select(-c("REC_BMI","yeartx"))

In [32]:
head(test)
colnames(test)
test$time <- as.integer(test$time)


CAN_ABO,CAN_DIAB_TY,CAN_GENDER,CAN_MALIG,DON_ABO,DON_AGE,DON_GENDER,REC_AGE_AT_TX,REC_A_MM_EQUIV_CUR,REC_A_MM_EQUIV_TX,⋯,REC_DR_MM_EQUIV_TX,REC_MM_EQUIV_CUR,REC_MM_EQUIV_TX,REC_COLD_ISCH_TM,REC_CREAT,REC_DISCHRG_CREAT,time,status,dialysistotx,txtodischarge
<fct>,<fct>,<fct>,<fct>,<fct>,<dbl>,<fct>,<dbl>,<fct>,<fct>,⋯,<fct>,<fct>,<fct>,<dbl>,<dbl>,<dbl>,<drtn>,<dbl>,<int>,<int>
B,3,M,N,B,45,F,68,2,2,⋯,2,5,5,6.18,7.33,1.96,2198 days,2,-786,6
A,1,M,N,A,48,M,69,1,1,⋯,1,3,3,1.0,4.6,1.0,2923 days,1,-309,6
O,1,M,N,O,28,F,59,2,2,⋯,1,5,5,11.3,3.7,3.8,1781 days,2,-1312,7
O,3,M,N,O,54,F,59,2,2,⋯,2,6,6,12.78,8.25,3.17,3114 days,1,-2487,4
B,1,F,N,B,57,F,59,2,2,⋯,2,6,6,0.92,6.54,0.98,2913 days,1,-824,5
A,1,M,N,A1,47,M,35,2,2,⋯,1,5,5,14.95,10.3,5.9,2853 days,1,-3000,7


# many of the distributions don't accept 0 as a time => we added 0.0001 to account for that 

In [33]:
test$time[test$time ==0]<- 0.0001

Fit the available parametric models from survival package

In [36]:
s <- Surv(time,status) ~CAN_GENDER+CAN_ABO+CAN_DIAB_TY+CAN_MALIG+DON_ABO+DON_AGE+DON_GENDER+REC_AGE_AT_TX+REC_A_MM_EQUIV_CUR+REC_A_MM_EQUIV_TX+REC_B_MM_EQUIV_CUR+REC_B_MM_EQUIV_TX+REC_DR_MM_EQUIV_CUR+REC_DR_MM_EQUIV_TX+REC_MM_EQUIV_CUR+REC_MM_EQUIV_TX+REC_COLD_ISCH_TM+REC_CREAT+REC_DISCHRG_CREAT+dialysistotx+txtodischarge

In [39]:
fitwb <- survreg(s, data=test, dist = "weibull") 
fitexp <- survreg(s, data = test, dist = "exponential")

In [40]:
fitgaus <- survreg(s, data = test, dist = "gaussian")
fitlog <- survreg(s, data = test, dist = "logistic")

In [41]:
fitlognorm <- survreg(s, data = test, dist = "lognormal")
fitloglog <- survreg(s, data = test, dist = "loglogistic")

In [57]:
a<-extractAIC(fitwb)
b<-extractAIC(fitexp)
c<-extractAIC(fitgaus)
d<-extractAIC(fitlog)
e<-extractAIC(fitlognorm)
f<-extractAIC(fitloglog)
g <- c(52, 835166.443921923)
aicput <- rbind(a,b,c,d,e,f,g)
namemod <- c("Weibull", "Exponential", "Gaussian", "Logistic", "Lognormal", "Loglogistic", "Cox")
aicput <- cbind(aicput, namemod)

In [58]:
aicput <- data.table(aicput)
names(aicput)[1]<- "DF"
names(aicput)[2]<- "AIC"
aicput$AIC <- as.double(aicput$AIC)
aicput$AIC <- round(aicput$AIC, 2)

In [59]:
aicput

DF,AIC,namemod
<chr>,<dbl>,<chr>
56,749430.3,Weibull
55,751897.9,Exponential
56,769535.5,Gaussian
56,774098.5,Logistic
56,772046.9,Lognormal
56,752226.6,Loglogistic
52,835166.4,Cox


In [71]:
summary(fitwb)


Call:
survreg(formula = s, data = test, dist = "weibull")
                         Value Std. Error      z       p
(Intercept)           9.52e+00   4.34e-02 219.53 < 2e-16
CAN_GENDERM           1.81e-01   1.37e-02  13.24 < 2e-16
CAN_ABOA1            -1.33e-01   6.07e-02  -2.19 0.02834
CAN_ABOA1B           -1.33e-01   1.97e-01  -0.67 0.50066
CAN_ABOA2            -1.62e-01   1.43e-01  -1.13 0.25780
CAN_ABOA2B           -7.61e-01   2.79e-01  -2.73 0.00633
CAN_ABOAB             8.78e-02   4.80e-02   1.83 0.06726
CAN_ABOB              2.27e-02   4.70e-02   0.48 0.62863
CAN_ABOO             -3.28e-02   3.25e-02  -1.01 0.31352
CAN_DIAB_TY2          2.49e-01   3.93e-02   6.35 2.2e-10
CAN_DIAB_TY3          8.54e-02   1.92e-02   4.44 9.2e-06
CAN_DIAB_TY4          3.10e-01   1.18e-01   2.62 0.00876
CAN_DIAB_TY5         -3.97e-01   2.57e-02 -15.47 < 2e-16
CAN_DIAB_TY998       -3.99e-02   7.65e-02  -0.52 0.60259
CAN_MALIGU           -3.03e-01   4.32e-02  -7.01 2.4e-12
CAN_MALIGY            1.93e-0

In [77]:
extract(
  fitwb,
  include.aic = TRUE,
  include.bic = TRUE,
  include.loglik = TRUE,
  include.deviance = TRUE,
  include.nobs = TRUE,
)



                             coef.         s.e.             p
(Intercept)           9.519465e+00 4.336266e-02  0.000000e+00
CAN_GENDERM           1.812246e-01 1.368438e-02  4.941449e-40
CAN_ABOA1            -1.331660e-01 6.073630e-02  2.834146e-02
CAN_ABOA1B           -1.328334e-01 1.972441e-01  5.006632e-01
CAN_ABOA2            -1.617385e-01 1.429278e-01  2.577984e-01
CAN_ABOA2B           -7.613464e-01 2.788737e-01  6.331964e-03
CAN_ABOAB             8.779702e-02 4.797790e-02  6.725785e-02
CAN_ABOB              2.273784e-02 4.701258e-02  6.286312e-01
CAN_ABOO             -3.278117e-02 3.252510e-02  3.135155e-01
CAN_DIAB_TY2          2.494969e-01 3.929851e-02  2.170530e-10
CAN_DIAB_TY3          8.536389e-02 1.924282e-02  9.158536e-06
CAN_DIAB_TY4          3.097315e-01 1.181597e-01  8.759665e-03
CAN_DIAB_TY5         -3.970434e-01 2.566143e-02  5.329870e-54
CAN_DIAB_TY998       -3.985195e-02 7.653914e-02  6.025938e-01
CAN_MALIGU           -3.028840e-01 4.320766e-02  2.383840e-12
CAN_MA