In [23]:
library(bigrquery)
library(tidyverse)

In [24]:
# Store the project ID
projectid = "yhcr-prd-phm-bia-core"

In [25]:
# load diabetes and cvd tables
# Diabetes
sql1 <- "SELECT *  FROM `yhcr-prd-phm-bia-core.CB_MYSPACE_AH.Diabetes`"
tb1 <- bq_project_query(projectid, sql1)
diabetes <-bq_table_download(tb1)

# CVD
sql2 <- "SELECT *  FROM `yhcr-prd-phm-bia-core.CB_MYSPACE_AH.All_CVD_First_Occurrence`"
tb2 <- bq_project_query(projectid, sql2)
cvd <-bq_table_download(tb2)

In [26]:
# Patient table
sql3 <- "SELECT * FROM `yhcr-prd-phm-bia-core.CB_MYSPACE_AH.Patient_Denom`"
tb3 <- bq_project_query(projectid, sql3)
patient <- bq_table_download(tb3)
head(patient)

person_id,birth_datetime,death_datetime,gender,ethnicity,imd_decile
<int>,<date>,<date>,<int>,<int>,<int>
16813063,,,2,2,1
16832562,,,1,3,1
16769967,,,1,1,1
16832450,,,1,2,1
16814272,,,1,2,1
13715865,,,1,2,1


In [27]:
covars <- read_csv("/home/jupyter/T2D_CVD_Study/Patient_Covariates_Table.csv")
covars <- covars %>% select(-'...1', -birth_datetime, -gender, -imd_decile, -ethnicity)

[1m[22mNew names:
[36m•[39m `` -> `...1`
“[1m[22mOne or more parsing issues, call `problems()` on your data frame for details,
e.g.:
  dat <- vroom(...)
  problems(dat)”
[1mRows: [22m[34m784186[39m [1mColumns: [22m[34m31[39m
[36m──[39m [1mColumn specification[22m [36m────────────────────────────────────────────────────────[39m
[1mDelimiter:[22m ","
[32mdbl[39m  (20): ...1, person_id, gender, ethnicity, imd_decile, cholesterol, bmi,...
[33mlgl[39m   (4): date_cholesterol, closest_cholesterol, date_egfr, closest_egfr
[34mdate[39m  (7): birth_datetime, death_datetime, date_bmi, date_smoking, date_bp, ...

[36mℹ[39m Use `spec()` to retrieve the full column specification for this data.
[36mℹ[39m Specify the column types or set `show_col_types = FALSE` to quiet this message.


In [28]:
# join cvd and diabetes
dc <- left_join(diabetes, cvd)
dc <- dc %>%
    mutate_at(c('t1d_status', 't2d_status', 'any_diabetes', 'has_angina','has_cardiomyopathy', 'has_heart_failure', 'has_hypertension', 'has_ihd','has_mi', 'has_pvd', 'has_stroke', 'has_valve_disease'), ~replace_na(.,0))
head(dc)
sum(duplicated(dc))
sum(duplicated(dc$person_id))

[1m[22mJoining with `by = join_by(person_id)`


person_id,age_t1d_diag,date_t1d_diag,t1d_status,age_t2d_diag,date_t2d_diag,t2d_status,age_earliest_diab,earliest_diabetes_diag,any_diabetes,⋯,date_ihd_diagnosis,has_ihd,date_mi_diagnosis,has_mi,date_pvd_diagnosis,has_pvd,date_stroke_diagnosis,has_stroke,date_valve_disease_diagnosis,has_valve_disease
<int>,<int>,<date>,<int>,<int>,<date>,<int>,<int>,<date>,<int>,⋯,<date>,<int>,<date>,<int>,<date>,<int>,<date>,<int>,<date>,<int>
13548546,60,2007-01-11,1,,,0,60,2007-01-11,1,⋯,,0,,0,,0,,0,,0
12975170,80,2020-04-28,1,,,0,80,2020-04-28,1,⋯,,0,,0,,0,2013-03-05,1,,0
13450523,36,2011-10-13,1,,,0,36,2011-10-13,1,⋯,,0,,0,,0,,0,,0
12431500,75,2011-08-16,1,,,0,75,2011-08-16,1,⋯,2012-04-03,1,,0,,0,,0,,0
13235332,47,2007-07-05,1,,,0,47,2007-07-05,1,⋯,,0,,0,,0,,0,,0
12805324,39,2008-04-08,1,,,0,39,2008-04-08,1,⋯,,0,,0,,0,,0,,0


In [29]:
# Join death datetime onto dc
dc <- left_join(dc, patient, by = "person_id")
head(dc)
sum(duplicated(dc))
sum(duplicated(dc$person_id))

person_id,age_t1d_diag,date_t1d_diag,t1d_status,age_t2d_diag,date_t2d_diag,t2d_status,age_earliest_diab,earliest_diabetes_diag,any_diabetes,⋯,has_pvd,date_stroke_diagnosis,has_stroke,date_valve_disease_diagnosis,has_valve_disease,birth_datetime,death_datetime,gender,ethnicity,imd_decile
<int>,<int>,<date>,<int>,<int>,<date>,<int>,<int>,<date>,<int>,⋯,<int>,<date>,<int>,<date>,<int>,<date>,<date>,<int>,<int>,<int>
13548546,60,2007-01-11,1,,,0,60,2007-01-11,1,⋯,0,,0,,0,1946-11-15,2008-07-15,1,1,5
12975170,80,2020-04-28,1,,,0,80,2020-04-28,1,⋯,0,2013-03-05,1,,0,1939-11-15,2020-05-15,1,1,1
13450523,36,2011-10-13,1,,,0,36,2011-10-13,1,⋯,0,,0,,0,1975-08-15,2018-12-15,1,1,4
12431500,75,2011-08-16,1,,,0,75,2011-08-16,1,⋯,0,,0,,0,1936-05-15,2013-01-15,1,1,1
13235332,47,2007-07-05,1,,,0,47,2007-07-05,1,⋯,0,,0,,0,1960-03-15,2013-09-15,1,1,1
12805324,39,2008-04-08,1,,,0,39,2008-04-08,1,⋯,0,,0,,0,1968-08-15,2017-03-15,1,2,1


In [30]:
colnames(dc)

In [31]:
# Create variable for time between:

dc <- dc %>%
    mutate(date_earliest_cvd = pmin(date_angina_diagnosis, date_cardiomyopathy_diagnosis,
                               date_heart_failure_diagnosis, date_hypertension_diagnosis,
                              date_ihd_diagnosis, date_mi_diagnosis, date_pvd_diagnosis,
                              date_stroke_diagnosis, date_valve_disease_diagnosis, na.rm = TRUE),
# Diabetes and CVD
    diab_cvd_time = abs(difftime(earliest_diabetes_diag, date_earliest_cvd, units = "days")),
# CVD and Death
          cvd_death_time = abs(difftime(date_earliest_cvd, death_datetime, units = "days")),
          has_cvd = ifelse(has_angina == 1 | has_cardiomyopathy == 1 | has_heart_failure == 1 |
                           has_hypertension == 1 | has_ihd == 1 | has_mi == 1 | has_pvd == 1 |
                           has_stroke == 1 | has_valve_disease == 1, 1,0)) %>%
        mutate(diab_cvd_time = as.integer(diab_cvd_time),
               diab_death_time = as.integer(diab_death_time),
               cvd_death_time = as.integer(cvd_death_time))

head(dc)

person_id,age_t1d_diag,date_t1d_diag,t1d_status,age_t2d_diag,date_t2d_diag,t2d_status,age_earliest_diab,earliest_diabetes_diag,any_diabetes,⋯,has_valve_disease,birth_datetime,death_datetime,gender,ethnicity,imd_decile,date_earliest_cvd,diab_cvd_time,cvd_death_time,has_cvd
<int>,<int>,<date>,<int>,<int>,<date>,<int>,<int>,<date>,<int>,⋯,<int>,<date>,<date>,<int>,<int>,<int>,<date>,<int>,<int>,<dbl>
13548546,60,2007-01-11,1,,,0,60,2007-01-11,1,⋯,0,1946-11-15,2008-07-15,1,1,5,,,,0
12975170,80,2020-04-28,1,,,0,80,2020-04-28,1,⋯,0,1939-11-15,2020-05-15,1,1,1,2013-03-05,2611.0,2628.0,1
13450523,36,2011-10-13,1,,,0,36,2011-10-13,1,⋯,0,1975-08-15,2018-12-15,1,1,4,,,,0
12431500,75,2011-08-16,1,,,0,75,2011-08-16,1,⋯,0,1936-05-15,2013-01-15,1,1,1,2012-01-04,141.0,377.0,1
13235332,47,2007-07-05,1,,,0,47,2007-07-05,1,⋯,0,1960-03-15,2013-09-15,1,1,1,,,,0
12805324,39,2008-04-08,1,,,0,39,2008-04-08,1,⋯,0,1968-08-15,2017-03-15,1,2,1,,,,0


In [32]:
sum(duplicated(covars))
sum(duplicated(covars$person_id))

In [33]:
# Join covariates to diabetes dataset
dc <- left_join(dc, covars, by = c("person_id"))

In [34]:
# Change IMD to IMD QUINTILE i.e. combine 1+2 = 1, 3+4 = 2, 5+6 = 3, 7+8 = 4, 9+10 = 5
dc <- dc %>%
    mutate(imd_quintile = case_when(
    imd_decile == 1 | imd_decile == 2 ~ 1,
    imd_decile == 3 | imd_decile == 4 ~ 2,
    imd_decile == 5 | imd_decile == 6 ~ 3,
    imd_decile == 7 | imd_decile == 8 ~ 4,
    imd_decile == 9 | imd_decile == 10 ~ 5))


In [35]:
# Total number of those with any diabetes, t1d or t2d diabetes
any_diabetes_n <- sum(dc$any_diabetes == 1, na.rm = TRUE)
t1d_n <- sum(dc$t1d_status == 1, na.rm = TRUE)
t2d_n <- sum(dc$t2d_status == 1, na.rm = TRUE)

any_diabetes_n
t1d_n
t2d_n

In [36]:
max(dc$date_t2d_diag,na.rm=TRUE)

In [15]:
# Descriptive Stats
# Age at diagnosis of any diabetes
any_mean <- floor(mean(dc$age_earliest_diab, na.rm=TRUE))
any_sd <- floor(sd(dc$age_earliest_diab, na.rm=TRUE))
any_ms <- paste0(any_mean," (",any_sd,")")
any_1 <- sum(dc$age_earliest_diab < 20, na.rm = TRUE)
any_1_p <- round((any_1/any_diabetes_n)*100,2)
any1 <- paste0(any_1," (",any_1_p,")")
any_2 <- sum(dc$age_earliest_diab >= 20 & dc$age_earliest_diab <= 24, na.rm = TRUE)
any_2_p <- round((any_2/any_diabetes_n)*100,2)
any2 <- paste0(any_2," (",any_2_p,")")
any_3 <- sum(dc$age_earliest_diab >= 25 & dc$age_earliest_diab <= 29, na.rm = TRUE)
any_3_p <- round((any_3/any_diabetes_n)*100,2)
any3 <- paste0(any_3," (",any_3_p,")")
any_4 <- sum(dc$age_earliest_diab >= 30 & dc$age_earliest_diab <= 34, na.rm = TRUE)
any_4_p <- round((any_4/any_diabetes_n)*100,2)
any4 <- paste0(any_4," (",any_4_p,")")
any_5 <- sum(dc$age_earliest_diab >= 35 & dc$age_earliest_diab <= 39, na.rm = TRUE)
any_5_p <- round((any_5/any_diabetes_n)*100,2)
any5 <- paste0(any_5," (",any_5_p,")")
any_6 <- sum(dc$age_earliest_diab >= 40 & dc$age_earliest_diab <= 44, na.rm = TRUE)
any_6_p <- round((any_6/any_diabetes_n)*100,2)
any6 <- paste0(any_6," (",any_6_p,")")
any_7 <- sum(dc$age_earliest_diab >= 45 & dc$age_earliest_diab <= 49, na.rm = TRUE)
any_7_p <- round((any_7/any_diabetes_n)*100,2)
any7 <- paste0(any_7," (",any_7_p,")")
any_8 <- sum(dc$age_earliest_diab >= 50 & dc$age_earliest_diab <= 54, na.rm = TRUE)
any_8_p <- round((any_8/any_diabetes_n)*100,2)
any8 <- paste0(any_8," (",any_8_p,")")
any_9 <- sum(dc$age_earliest_diab >= 55 & dc$age_earliest_diab <= 59, na.rm = TRUE)
any_9_p <- round((any_9/any_diabetes_n)*100,2)
any9 <- paste0(any_9," (",any_9_p,")")
any_10 <- sum(dc$age_earliest_diab >= 60 & dc$age_earliest_diab <= 64, na.rm = TRUE)
any_10_p <- round((any_10/any_diabetes_n)*100,2)
any10 <- paste0(any_10," (",any_10_p,")")
any_11 <- sum(dc$age_earliest_diab >= 65 & dc$age_earliest_diab <= 69, na.rm = TRUE)
any_11_p <- round((any_11/any_diabetes_n)*100,2)
any11 <- paste0(any_11," (",any_11_p,")")
any_12 <- sum(dc$age_earliest_diab >= 70 & dc$age_earliest_diab <= 74, na.rm = TRUE)
any_12_p <- round((any_12/any_diabetes_n)*100,2)
any12 <- paste0(any_12," (",any_12_p,")")
any_13 <- sum(dc$age_earliest_diab >= 75 & dc$age_earliest_diab <= 79, na.rm = TRUE)
any_13_p <- round((any_13/any_diabetes_n)*100,2)
any13 <- paste0(any_13," (",any_13_p,")")
any_14 <- sum(dc$age_earliest_diab >= 80 & dc$age_earliest_diab <= 84, na.rm = TRUE)
any_14_p <- round((any_14/any_diabetes_n)*100,2)
any14 <- paste0(any_14," (",any_14_p,")")
any_15 <- sum(dc$age_earliest_diab >= 85 & dc$age_earliest_diab <= 89, na.rm = TRUE)
any_15_p <- round((any_15/any_diabetes_n)*100,2)
any15 <- paste0(any_15," (",any_15_p,")")
any_16 <- sum(dc$age_earliest_diab >= 90 & dc$age_earliest_diab <= 94, na.rm = TRUE)
any_16_p <- round((any_16/any_diabetes_n)*100,2)
any16 <- paste0(any_16," (",any_16_p,")")
any_17 <- sum(dc$age_earliest_diab >= 95 & dc$age_earliest_diab <= 99, na.rm = TRUE)
any_17_p <- round((any_17/any_diabetes_n)*100,2)
any17 <- paste0(any_17," (",any_17_p,")")
any_18 <- sum(dc$age_earliest_diab >= 100, na.rm = TRUE)
any_18_p <- round((any_18/any_diabetes_n)*100,2)
any18 <- paste0(any_18," (",any_18_p,")")

# Age at diagnosis of type 1 diabetes
t1d_mean <- floor(mean(dc$age_t1d_diag, na.rm=TRUE))
t1d_sd <- floor(sd(dc$age_t1d_diag, na.rm=TRUE))
t1d_ms <- paste0(t1d_mean," (",t1d_sd,")")
t1d_1 <- sum(dc$age_t1d_diag < 20, na.rm = TRUE)
t1d_1_p <- round((t1d_1/t1d_n)*100,2)
t1d1 <- paste0(t1d_1," (",t1d_1_p,")")
t1d_2 <- sum(dc$age_t1d_diag >= 20 & dc$age_t1d_diag <= 24, na.rm = TRUE)
t1d_2_p <- round((t1d_2/t1d_n)*100,2)
t1d2 <- paste0(t1d_2," (",t1d_2_p,")")
t1d_3 <- sum(dc$age_t1d_diag >= 25 & dc$age_t1d_diag <= 29, na.rm = TRUE)
t1d_3_p <- round((t1d_3/t1d_n)*100,2)
t1d3 <- paste0(t1d_3," (",t1d_3_p,")")
t1d_4 <- sum(dc$age_t1d_diag >= 30 & dc$age_t1d_diag <= 34, na.rm = TRUE)
t1d_4_p <- round((t1d_4/t1d_n)*100,2)
t1d4 <- paste0(t1d_4," (",t1d_4_p,")")
t1d_5 <- sum(dc$age_t1d_diag >= 35 & dc$age_t1d_diag <= 39, na.rm = TRUE)
t1d_5_p <- round((t1d_5/t1d_n)*100,2)
t1d5 <- paste0(t1d_5," (",t1d_5_p,")")
t1d_6 <- sum(dc$age_t1d_diag >= 40 & dc$age_t1d_diag <= 44, na.rm = TRUE)
t1d_6_p <- round((t1d_6/t1d_n)*100,2)
t1d6 <- paste0(t1d_6," (",t1d_6_p,")")
t1d_7 <- sum(dc$age_t1d_diag >= 45 & dc$age_t1d_diag <= 49, na.rm = TRUE)
t1d_7_p <- round((t1d_7/t1d_n)*100,2)
t1d7 <- paste0(t1d_7," (",t1d_7_p,")")
t1d_8 <- sum(dc$age_t1d_diag >= 50 & dc$age_t1d_diag <= 54, na.rm = TRUE)
t1d_8_p <- round((t1d_8/t1d_n)*100,2)
t1d8 <- paste0(t1d_8," (",t1d_8_p,")")
t1d_9 <- sum(dc$age_t1d_diag >= 55 & dc$age_t1d_diag <= 59, na.rm = TRUE)
t1d_9_p <- round((t1d_9/t1d_n)*100,2)
t1d9 <- paste0(t1d_9," (",t1d_9_p,")")
t1d_10 <- sum(dc$age_t1d_diag >= 60 & dc$age_t1d_diag <= 64, na.rm = TRUE)
t1d_10_p <- round((t1d_10/t1d_n)*100,2)
t1d10 <- paste0(t1d_10," (",t1d_10_p,")")
t1d_11 <- sum(dc$age_t1d_diag >= 65 & dc$age_t1d_diag <= 69, na.rm = TRUE)
t1d_11_p <- round((t1d_11/t1d_n)*100,2)
t1d11 <- paste0(t1d_11," (",t1d_11_p,")")
t1d_12 <- sum(dc$age_t1d_diag >= 70 & dc$age_t1d_diag <= 74, na.rm = TRUE)
t1d_12_p <- round((t1d_12/t1d_n)*100,2)
t1d12 <- paste0(t1d_12," (",t1d_12_p,")")
t1d_13 <- sum(dc$age_t1d_diag >= 75 & dc$age_t1d_diag <= 79, na.rm = TRUE)
t1d_13_p <- round((t1d_13/t1d_n)*100,2)
t1d13 <- paste0(t1d_13," (",t1d_13_p,")")
t1d_14 <- sum(dc$age_t1d_diag >= 80 & dc$age_t1d_diag <= 84, na.rm = TRUE)
t1d_14_p <- round((t1d_14/t1d_n)*100,2)
t1d14 <- paste0(t1d_14," (",t1d_14_p,")")
t1d_15 <- sum(dc$age_t1d_diag >= 85 & dc$age_t1d_diag <= 89, na.rm = TRUE)
t1d_15_p <- round((t1d_15/t1d_n)*100,2)
t1d15 <- paste0(t1d_15," (",t1d_15_p,")")
t1d_16 <- sum(dc$age_t1d_diag >= 90 & dc$age_t1d_diag <= 94, na.rm = TRUE)
t1d_16_p <- round((t1d_16/t1d_n)*100,2)
t1d16 <- paste0(t1d_16," (",t1d_16_p,")")
t1d_17 <- sum(dc$age_t1d_diag >= 95 & dc$age_t1d_diag <= 99, na.rm = TRUE)
t1d_17_p <- round((t1d_17/t1d_n)*100,2)
t1d17 <- paste0(t1d_17," (",t1d_17_p,")")
t1d_18 <- sum(dc$age_t1d_diag >= 100, na.rm = TRUE)
t1d_18_p <- round((t1d_18/t1d_n)*100,2)
t1d18 <- paste0(t1d_18," (",t1d_18_p,")")

# Age at diagnosis of type 2 diabetes
t2d_mean <- floor(mean(dc$age_t2d_diag, na.rm=TRUE))
t2d_sd <- floor(sd(dc$age_t2d_diag, na.rm=TRUE))
t2d_ms <- paste0(t2d_mean," (",t2d_sd,")")
t2d_1 <- sum(dc$age_t2d_diag < 20, na.rm = TRUE)
t2d_1_p <- round((t2d_1/t2d_n)*100,2)
t2d1 <- paste0(t2d_1," (",t2d_1_p,")")
t2d_2 <- sum(dc$age_t2d_diag >= 20 & dc$age_t2d_diag <= 24, na.rm = TRUE)
t2d_2_p <- round((t2d_2/t2d_n)*100,2)
t2d2 <- paste0(t2d_2," (",t2d_2_p,")")
t2d_3 <- sum(dc$age_t2d_diag >= 25 & dc$age_t2d_diag <= 29, na.rm = TRUE)
t2d_3_p <- round((t2d_3/t2d_n)*100,2)
t2d3 <- paste0(t2d_3," (",t2d_3_p,")")
t2d_4 <- sum(dc$age_t2d_diag >= 30 & dc$age_t2d_diag <= 34, na.rm = TRUE)
t2d_4_p <- round((t2d_4/t2d_n)*100,2)
t2d4 <- paste0(t2d_4," (",t2d_4_p,")")
t2d_5 <- sum(dc$age_t2d_diag >= 35 & dc$age_t2d_diag <= 39, na.rm = TRUE)
t2d_5_p <- round((t2d_5/t2d_n)*100,2)
t2d5 <- paste0(t2d_5," (",t2d_5_p,")")
t2d_6 <- sum(dc$age_t2d_diag >= 40 & dc$age_t2d_diag <= 44, na.rm = TRUE)
t2d_6_p <- round((t2d_6/t2d_n)*100,2)
t2d6 <- paste0(t2d_6," (",t2d_6_p,")")
t2d_7 <- sum(dc$age_t2d_diag >= 45 & dc$age_t2d_diag <= 49, na.rm = TRUE)
t2d_7_p <- round((t2d_7/t2d_n)*100,2)
t2d7 <- paste0(t2d_7," (",t2d_7_p,")")
t2d_8 <- sum(dc$age_t2d_diag >= 50 & dc$age_t2d_diag <= 54, na.rm = TRUE)
t2d_8_p <- round((t2d_8/t2d_n)*100,2)
t2d8 <- paste0(t2d_8," (",t2d_8_p,")")
t2d_9 <- sum(dc$age_t2d_diag >= 55 & dc$age_t2d_diag <= 59, na.rm = TRUE)
t2d_9_p <- round((t2d_9/t2d_n)*100,2)
t2d9 <- paste0(t2d_9," (",t2d_9_p,")")
t2d_10 <- sum(dc$age_t2d_diag >= 60 & dc$age_t2d_diag <= 64, na.rm = TRUE)
t2d_10_p <- round((t2d_10/t2d_n)*100,2)
t2d10 <- paste0(t2d_10," (",t2d_10_p,")")
t2d_11 <- sum(dc$age_t2d_diag >= 65 & dc$age_t2d_diag <= 69, na.rm = TRUE)
t2d_11_p <- round((t2d_11/t2d_n)*100,2)
t2d11 <- paste0(t2d_11," (",t2d_11_p,")")
t2d_12 <- sum(dc$age_t2d_diag >= 70 & dc$age_t2d_diag <= 74, na.rm = TRUE)
t2d_12_p <- round((t2d_12/t2d_n)*100,2)
t2d12 <- paste0(t2d_12," (",t2d_12_p,")")
t2d_13 <- sum(dc$age_t2d_diag >= 75 & dc$age_t2d_diag <= 79, na.rm = TRUE)
t2d_13_p <- round((t2d_13/t2d_n)*100,2)
t2d13 <- paste0(t2d_13," (",t2d_13_p,")")
t2d_14 <- sum(dc$age_t2d_diag >= 80 & dc$age_t2d_diag <= 84, na.rm = TRUE)
t2d_14_p <- round((t2d_14/t2d_n)*100,2)
t2d14 <- paste0(t2d_14," (",t2d_14_p,")")
t2d_15 <- sum(dc$age_t2d_diag >= 85 & dc$age_t2d_diag <= 89, na.rm = TRUE)
t2d_15_p <- round((t2d_15/t2d_n)*100,2)
t2d15 <- paste0(t2d_15," (",t2d_15_p,")")
t2d_16 <- sum(dc$age_t2d_diag >= 90 & dc$age_t2d_diag <= 94, na.rm = TRUE)
t2d_16_p <- round((t2d_16/t2d_n)*100,2)
t2d16 <- paste0(t2d_16," (",t2d_16_p,")")
t2d_17 <- sum(dc$age_t2d_diag >= 95 & dc$age_t2d_diag <= 99, na.rm = TRUE)
t2d_17_p <- round((t2d_17/t2d_n)*100,2)
t2d17 <- paste0(t2d_17," (",t2d_17_p,")")
t2d_18 <- sum(dc$age_t2d_diag >= 100, na.rm = TRUE)
t2d_18_p <- round((t2d_18/t2d_n)*100,2)
t2d18 <- paste0(t2d_18," (",t2d_18_p,")")

age <- c("Mean (SD)", "<20", "20-24", "25-29", "30-34", "35-39", "40-44", "45-49", "50-54", "55-59", "60-64", "65-69", "70-74", "75-79", "80-84", "85-89", "90-94", "95-99", "100+")
any_diabetes <- c(any_ms, any1, any2, any3, any4, any5, any6, any7, any8, any9, any10, any11, any12, any13, any14, any15, any16, any17, any18)
t1d_diabetes <- c(t1d_ms, t1d1, t1d2, t1d3, t1d4, t1d5, t1d6, t1d7, t1d8, t1d9, t1d10, t1d11, t1d12, t1d13, t1d14, t1d15, t1d16, t1d17, t1d18)
t2d_diabetes <- c(t2d_ms, t2d1, t2d2, t2d3, t2d4, t2d5, t2d6, t2d7, t2d8, t2d9, t2d10, t2d11, t2d12, t2d13, t2d14, t2d15, t2d16, t2d17, t2d18)
age_dm <- data.frame(age, any_diabetes, t1d_diabetes, t2d_diabetes)
age_dm

age,any_diabetes,t1d_diabetes,t2d_diabetes
<chr>,<chr>,<chr>,<chr>
Mean (SD),57 (15),36 (21),58 (14)
<20,705 (1.59),557 (25.72),173 (0.41)
20-24,397 (0.9),165 (7.62),243 (0.57)
25-29,780 (1.76),175 (8.08),624 (1.47)
30-34,1466 (3.31),163 (7.53),1330 (3.12)
35-39,2318 (5.24),154 (7.11),2192 (5.15)
40-44,3434 (7.76),149 (6.88),3326 (7.81)
45-49,4258 (9.62),177 (8.17),4131 (9.7)
50-54,5032 (11.37),149 (6.88),4940 (11.6)
55-59,5322 (12.03),112 (5.17),5263 (12.36)


In [16]:
# Gender 
# males
m_any <- sum(dc$gender == 1 & !is.na(dc$any_diabetes), na.rm = TRUE)
m_any_p <- round((m_any/any_diabetes_n)*100,2)
m_any1 <- paste0(m_any," (",m_any_p,")")
m_t1d <- sum(dc$gender == 1 & dc$t1d_status == 1, na.rm = TRUE)
m_t1d_p <- round((m_t1d/t1d_n)*100,2)
m_t1d1 <- paste0(m_t1d," (",m_t1d_p,")")
m_t2d <- sum(dc$gender == 1 & dc$t2d_status == 1, na.rm = TRUE)
m_t2d_p <- round((m_t2d/t2d_n)*100,2)
m_t2d1 <- paste0(m_t2d," (",m_t2d_p,")")

# females
f_any <- sum(dc$gender == 2 & !is.na(dc$any_diabetes), na.rm = TRUE)
f_any_p <- round((f_any/any_diabetes_n)*100,2)
f_any1 <- paste0(f_any," (",f_any_p,")")
f_t1d <- sum(dc$gender == 2 & dc$t1d_status == 1, na.rm = TRUE)
f_t1d_p <- round((f_t1d/t1d_n)*100,2)
f_t1d1 <- paste0(f_t1d," (",f_t1d_p,")")
f_t2d <- sum(dc$gender == 2 & dc$t2d_status == 1, na.rm = TRUE)
f_t2d_p <- round((f_t2d/t2d_n)*100,2)
f_t2d1 <- paste0(f_t2d," (",f_t2d_p,")")

gender <- c("Males", "Females")
gender_any <- c(m_any1, f_any1)
gender_t1d <- c(m_t1d1, f_t1d1)
gender_t2d <- c(m_t2d1, f_t2d1)
gender_n <- data.frame(gender, gender_any, gender_t1d, gender_t2d)
gender_n

gender,gender_any,gender_t1d,gender_t2d
<chr>,<chr>,<chr>,<chr>
Males,23811 (53.81),1270 (58.63),22803 (53.54)
Females,20466 (46.25),896 (41.37),19786 (46.46)


In [17]:
# IMD
imd1_any <- sum(dc$imd_quintile == 1 & dc$any_diabetes == 1) 
imd1_any_p <- round((imd1_any/any_diabetes_n)*100,2)
imd1any <- paste0(imd1_any," (",imd1_any_p,")")

imd1_t1d <- sum(dc$imd_quintile == 1 & dc$t1d_status == 1) 
imd1_t1d_p <- round((imd1_t1d/t1d_n)*100,2)
imd1t1d <- paste0(imd1_t1d," (",imd1_t1d_p,")")

imd1_t2d <- sum(dc$imd_quintile == 1 & dc$t2d_status == 1) 
imd1_t2d_p <- round((imd1_t2d/t2d_n)*100,2)
imd1t2d <- paste0(imd1_t2d," (",imd1_t2d_p,")")

imd2_any <- sum(dc$imd_quintile == 2 & dc$any_diabetes == 1) 
imd2_any_p <- round((imd2_any/any_diabetes_n)*100,2)
imd2any <- paste0(imd2_any," (",imd2_any_p,")")

imd2_t1d <- sum(dc$imd_quintile == 2 & dc$t1d_status == 1) 
imd2_t1d_p <- round((imd2_t1d/t1d_n)*100,2)
imd2t1d <- paste0(imd2_t1d," (",imd2_t1d_p,")")

imd2_t2d <- sum(dc$imd_quintile == 2 & dc$t2d_status == 1) 
imd2_t2d_p <- round((imd2_t2d/t2d_n)*100,2)
imd2t2d <- paste0(imd2_t2d," (",imd2_t2d_p,")")

imd3_any <- sum(dc$imd_quintile == 3 & dc$any_diabetes == 1) 
imd3_any_p <- round((imd3_any/any_diabetes_n)*100,2)
imd3any <- paste0(imd3_any," (",imd3_any_p,")")

imd3_t1d <- sum(dc$imd_quintile == 3 & dc$t1d_status == 1) 
imd3_t1d_p <- round((imd3_t1d/t1d_n)*100,2)
imd3t1d <- paste0(imd3_t1d," (",imd3_t1d_p,")")

imd3_t2d <- sum(dc$imd_quintile == 3 & dc$t2d_status == 1) 
imd3_t2d_p <- round((imd3_t2d/t2d_n)*100,2)
imd3t2d <- paste0(imd3_t2d," (",imd3_t2d_p,")")

imd4_any <- sum(dc$imd_quintile == 4 & dc$any_diabetes == 1) 
imd4_any_p <- round((imd4_any/any_diabetes_n)*100,2)
imd4any <- paste0(imd4_any," (",imd4_any_p,")")

imd4_t1d <- sum(dc$imd_quintile == 4 & dc$t1d_status == 1) 
imd4_t1d_p <- round((imd4_t1d/t1d_n)*100,2)
imd4t1d <- paste0(imd4_t1d," (",imd4_t1d_p,")")

imd4_t2d <- sum(dc$imd_quintile == 4 & dc$t2d_status == 1) 
imd4_t2d_p <- round((imd4_t2d/t2d_n)*100,2)
imd4t2d <- paste0(imd4_t2d," (",imd4_t2d_p,")")

imd5_any <- sum(dc$imd_quintile == 5 & dc$any_diabetes == 1) 
imd5_any_p <- round((imd5_any/any_diabetes_n)*100,2)
imd5any <- paste0(imd5_any," (",imd5_any_p,")")

imd5_t1d <- sum(dc$imd_quintile == 5 & dc$t1d_status == 1) 
imd5_t1d_p <- round((imd5_t1d/t1d_n)*100,2)
imd5t1d <- paste0(imd5_t1d," (",imd5_t1d_p,")")

imd5_t2d <- sum(dc$imd_quintile == 5 & dc$t2d_status == 1) 
imd5_t2d_p <- round((imd5_t2d/t2d_n)*100,2)
imd5t2d <- paste0(imd5_t2d," (",imd5_t2d_p,")")

imd_quintile <- c("1", "2", "3", "4", "5")
imd_any <- c(imd1any, imd2any, imd3any, imd4any, imd5any)
imd_t1d <- c(imd1t1d, imd2t1d, imd3t1d, imd4t1d, imd5t1d)
imd_t2d <- c(imd1t2d, imd2t2d, imd3t2d, imd4t2d, imd5t2d)
imd <- data.frame(imd_quintile, imd_any, imd_t1d, imd_t2d)
imd

imd_quintile,imd_any,imd_t1d,imd_t2d
<chr>,<chr>,<chr>,<chr>
1,23101 (52.21),973 (44.92),22420 (52.64)
2,8901 (20.12),427 (19.71),8557 (20.09)
3,6206 (14.02),366 (16.9),5899 (13.85)
4,3961 (8.95),253 (11.68),3753 (8.81)
5,2081 (4.7),147 (6.79),1960 (4.6)


In [18]:
colnames(dc)

In [18]:
# Smoker
smoke_any <- sum(dc$smoking == 1 & dc$any_diabetes == 1, na.rm = TRUE)
smk_any_p <- round((smoke_any/any_diabetes_n)*100,2)
smkany <- paste0(smoke_any," (",smk_any_p,")")

smoke_t1d <- sum(dc$smoking == 1 & dc$t1d_status == 1, na.rm = TRUE)
smk_t1d_p <- round((smoke_t1d/t1d_n)*100,2)
smkt1d <- paste0(smoke_t1d," (",smk_t1d_p,")")

smoke_t2d <- sum(dc$smoking == 1 & dc$t2d_status == 1, na.rm = TRUE)
smk_t2d_p <- round((smoke_t2d/t2d_n)*100,2)
smkt2d <- paste0(smoke_t2d," (",smk_t2d_p,")")

exsmoke_any <- sum(dc$smoking == 2 & dc$any_diabetes == 1, na.rm = TRUE)
exsmk_any_p <- round((exsmoke_any/any_diabetes_n)*100,2)
exsmkany <- paste0(exsmoke_any," (",exsmk_any_p,")")

exsmoke_t1d <- sum(dc$smoking == 2 & dc$t1d_status == 1, na.rm = TRUE)
exsmk_t1d_p <- round((exsmoke_t1d/t1d_n)*100,2)
exsmkt1d <- paste0(exsmoke_t1d," (",exsmk_t1d_p,")")

exsmoke_t2d <- sum(dc$smoking == 2 & dc$t2d_status == 1, na.rm = TRUE)
exsmk_t2d_p <- round((exsmoke_t2d/t2d_n)*100,2)
exsmkt2d <- paste0(exsmoke_t2d," (",exsmk_t2d_p,")")

nonsmoke_any <- sum(dc$smoking == 3 & dc$any_diabetes == 1, na.rm = TRUE)
nosmk_any_p <- round((nonsmoke_any/any_diabetes_n)*100,2)
nosmkany <- paste0(nonsmoke_any," (",nosmk_any_p,")")

nonsmoke_t1d <- sum(dc$smoking == 3 & dc$t1d_status == 1, na.rm = TRUE)
nosmk_t1d_p <- round((nonsmoke_t1d/t1d_n)*100,2)
nosmkt1d <- paste0(nonsmoke_t1d," (",nosmk_t1d_p,")")

nonsmoke_t2d <- sum(dc$smoking == 3 & dc$t2d_status == 1, na.rm = TRUE)
nosmk_t2d_p <- round((nonsmoke_t2d/t2d_n)*100,2)
nosmkt2d <- paste0(nonsmoke_t2d," (",nosmk_t2d_p,")")


smoking <- c("Smoker", "Ex-Smoker", "Non-Smoker")
smoker <- c(smkany, smkt1d, smkt2d)
exsmoker <- c(exsmkany, exsmkt1d, exsmkt2d)
nonsmoker <- c(nosmkany, nosmkt1d, nosmkt2d)
smoke <- data.frame(smoking, smoker, exsmoker, nonsmoker)

smoke

smoking,smoker,exsmoker,nonsmoker
<chr>,<chr>,<chr>,<chr>
Smoker,8118 (18.35),12123 (27.4),1581 (3.57)
Ex-Smoker,409 (18.88),318 (14.68),75 (3.46)
Non-Smoker,7805 (18.33),11895 (27.93),1534 (3.6)


In [32]:
# BMI
mean(dc$bmi_value[dc$any_diabetes==1], na.rm = TRUE)
sd(dc$bmi_value[dc$any_diabetes==1], na.rm = TRUE)

mean(dc$bmi_value[dc$t1d_status==1], na.rm = TRUE)
sd(dc$bmi_value[dc$t1d_status==1], na.rm = TRUE)

mean(dc$bmi_value[dc$t2d_status==1], na.rm = TRUE)
sd(dc$bmi_value[dc$t2d_status==1], na.rm = TRUE)

bmiu_any <- sum(dc$bmi == 1 & dc$any_diabetes == 1, na.rm = TRUE)
bmiu_any_p <- round((bmiu_any/any_diabetes_n)*100,2)
bmiuany <- paste0(bmiu_any," (",bmiu_any_p,")")
bmih_any <- sum(dc$bmi == 2 & dc$any_diabetes == 1, na.rm = TRUE)
bmih_any_p <- round((bmih_any/any_diabetes_n)*100,2)
bmihany <- paste0(bmih_any," (",bmih_any_p,")")
bmiov_any <- sum(dc$bmi == 3 & dc$any_diabetes == 1, na.rm = TRUE)
bmiov_any_p <- round((bmiov_any/any_diabetes_n)*100,2)
bmiovany <- paste0(bmiov_any," (",bmiov_any_p,")")
bmiob_any <- sum(dc$bmi == 4 & dc$any_diabetes == 1, na.rm = TRUE)
bmiob_any_p <- round((bmiob_any/any_diabetes_n)*100,2)
bmiobany <- paste0(bmiob_any," (",bmiob_any_p,")")

bmiu_t1d <- sum(dc$bmi == 1 & dc$t1d_status == 1, na.rm = TRUE)
bmiu_t1d_p <- round((bmiu_t1d/t1d_n)*100,2)
bmiut1d <- paste0(bmiu_t1d," (",bmiu_t1d_p,")")
bmih_t1d <- sum(dc$bmi == 2 & dc$t1d_status == 1, na.rm = TRUE)
bmih_t1d_p <- round((bmih_t1d/t1d_n)*100,2)
bmiht1d <- paste0(bmih_t1d," (",bmih_t1d_p,")")
bmiov_t1d <- sum(dc$bmi == 3 & dc$t1d_status == 1, na.rm = TRUE)
bmiov_t1d_p <- round((bmiov_t1d/t1d_n)*100,2)
bmiovt1d <- paste0(bmiov_t1d," (",bmiov_t1d_p,")")
bmiob_t1d <- sum(dc$bmi == 4 & dc$t1d_status == 1, na.rm = TRUE)
bmiob_t1d_p <- round((bmiob_t1d/t1d_n)*100,2)
bmiobt1d <- paste0(bmiob_t1d," (",bmiob_t1d_p,")")

bmiu_t2d <- sum(dc$bmi == 1 & dc$t2d_status == 1, na.rm = TRUE)
bmiu_t2d_p <- round((bmiu_t2d/t2d_n)*100,2)
bmiut2d <- paste0(bmiu_t2d," (",bmiu_t2d_p,")")
bmih_t2d <- sum(dc$bmi == 2 & dc$t2d_status == 1, na.rm = TRUE)
bmih_t2d_p <- round((bmih_t2d/t2d_n)*100,2)
bmiht2d <- paste0(bmih_t2d," (",bmih_t2d_p,")")
bmiov_t2d <- sum(dc$bmi == 3 & dc$t2d_status == 1, na.rm = TRUE)
bmiov_t2d_p <- round((bmiov_t2d/t2d_n)*100,2)
bmiovt2d <- paste0(bmiov_t2d," (",bmiov_t2d_p,")")
bmiob_t2d <- sum(dc$bmi == 4 & dc$t2d_status == 1, na.rm = TRUE)
bmiob_t2d_p <- round((bmiob_t2d/t2d_n)*100,2)
bmiobt2d <- paste0(bmiob_t2d," (",bmiob_t2d_p,")")

bmi_cat <- c("Underweight", "Health weight", "Overweight", "Obese")
bmi_any <- c(bmiuany, bmihany, bmiovany, bmiobany)
bmi_t1d <- c(bmiut1d, bmiht1d, bmiovt1d, bmiobt1d)
bmi_t2d <- c(bmiut2d, bmiht2d, bmiovt2d, bmiobt2d)
bmi <- data.frame(bmi_cat, bmi_any, bmi_t1d, bmi_t2d)
bmi

bmi_cat,bmi_any,bmi_t1d,bmi_t2d
<chr>,<chr>,<chr>,<chr>
Underweight,390 (0.88),204 (9.42),195 (0.46)
Health weight,5408 (12.22),661 (30.52),4871 (11.44)
Overweight,12933 (29.23),541 (24.98),12544 (29.45)
Obese,21978 (49.67),416 (19.21),21735 (51.03)


In [20]:
# Blood Pressure
median(dc$sbp_value[dc$any_diabetes==1], na.rm = TRUE)
IQR(dc$sbp_value[dc$any_diabetes==1], na.rm = TRUE)
median(dc$dbp_value[dc$any_diabetes==1], na.rm = TRUE)
IQR(dc$dbp_value[dc$any_diabetes==1], na.rm = TRUE)

median(dc$sbp_value[dc$t1d_status==1], na.rm = TRUE)
IQR(dc$sbp_value[dc$t1d_status==1], na.rm = TRUE)
median(dc$dbp_value[dc$t1d_status==1], na.rm = TRUE)
IQR(dc$dbp_value[dc$t1d_status==1], na.rm = TRUE)

median(dc$sbp_value[dc$t2d_status==1], na.rm = TRUE)
IQR(dc$sbp_value[dc$t2d_status==1], na.rm = TRUE)
median(dc$dbp_value[dc$t2d_status==1], na.rm = TRUE)
IQR(dc$dbp_value[dc$t2d_status==1], na.rm = TRUE)



bpl_any <- sum(dc$blood_pressure == 1 & dc$any_diabetes == 1, na.rm = TRUE)
bpl_any_p <- round((bpl_any/any_diabetes_n)*100,2)
bplany <- paste0(bpl_any," (",bpl_any_p,")")

bpl_t1d <- sum(dc$blood_pressure == 1 & dc$t1d_status == 1, na.rm = TRUE)
bpl_t1d_p <- round((bpl_t1d/t1d_n)*100,2)
bplt1d <- paste0(bpl_t1d," (",bpl_t1d_p,")")

bpl_t2d <- sum(dc$blood_pressure == 1 & dc$t2d_status == 1, na.rm = TRUE)
bpl_t2d_p <- round((bpl_t2d/t2d_n)*100,2)
bplt2d <- paste0(bpl_t2d," (",bpl_t2d_p,")")

bpn_any <- sum(dc$blood_pressure == 2 & dc$any_diabetes == 1, na.rm = TRUE)
bpn_any_p <- round((bpn_any/any_diabetes_n)*100,2)
bpnany <- paste0(bpn_any," (",bpn_any_p,")")

bpn_t1d <- sum(dc$blood_pressure == 2 & dc$t1d_status == 1, na.rm = TRUE)
bpn_t1d_p <- round((bpn_t1d/t1d_n)*100,2)
bpnt1d <- paste0(bpn_t1d," (",bpn_t1d_p,")")

bpn_t2d <- sum(dc$blood_pressure == 2 & dc$t2d_status == 1, na.rm = TRUE)
bpn_t2d_p <- round((bpn_t2d/t2d_n)*100,2)
bpnt2d <- paste0(bpn_t2d," (",bpn_t2d_p,")")

bph_any <- sum(dc$blood_pressure == 3 & dc$any_diabetes == 1, na.rm = TRUE)
bph_any_p <- round((bph_any/any_diabetes_n)*100,2)
bphany <- paste0(bph_any," (",bph_any_p,")")

bph_t1d <- sum(dc$blood_pressure == 3 & dc$t1d_status == 1, na.rm = TRUE)
bph_t1d_p <- round((bph_t1d/t1d_n)*100,2)
bpht1d <- paste0(bph_t1d," (",bph_t1d_p,")")

bph_t2d <- sum(dc$blood_pressure == 3 & dc$t2d_status == 1, na.rm = TRUE)
bph_t2d_p <- round((bph_t2d/t2d_n)*100,2)
bpht2d <- paste0(bph_t2d," (",bph_t2d_p,")")

bp_cat <- c("Low", "Normal", "High")
bp_any <- c(bplany, bpnany, bphany)
bp_t1d <- c(bplt1d, bpnt1d, bpht1d)
bp_t2d <- c(bplt2d, bpnt2d, bpht2d)
bp <- data.frame(bp_cat, bp_any, bp_t1d, bp_t2d)
bp

bp_cat,bp_any,bp_t1d,bp_t2d
<chr>,<chr>,<chr>,<chr>
Low,209 (0.47),7 (0.32),204 (0.48)
Normal,1141 (2.58),29 (1.34),1119 (2.63)
High,827 (1.87),18 (0.83),818 (1.92)


In [21]:
# Cholesterol
chn_any <- sum(dc$cholesterol == 1 & dc$any_diabetes == 1, na.rm = TRUE)
chn_any_p <- round((chn_any/any_diabetes_n)*100,2)
chnany <- paste0(chn_any," (",chn_any_p,")")

chn_t1d <- sum(dc$cholesterol == 1 & dc$t1d_status == 1, na.rm = TRUE)
chn_t1d_p <- round((chn_t1d/t1d_n)*100,2)
chnt1d <- paste0(chn_t1d," (",chn_t1d_p,")")

chn_t2d <- sum(dc$cholesterol == 1 & dc$t2d_status == 1, na.rm = TRUE)
chn_t2d_p <- round((chn_t2d/t2d_n)*100,2)
chnt2d <- paste0(chn_t2d," (",chn_t2d_p,")")

chh_any <- sum(dc$cholesterol == 2 & dc$any_diabetes == 1, na.rm = TRUE)
chh_any_p <- round((chh_any/any_diabetes_n)*100,2)
chhany <- paste0(chh_any," (",chh_any_p,")")

chh_t1d <- sum(dc$cholesterol == 2 & dc$t1d_status == 1, na.rm = TRUE)
chh_t1d_p <- round((chh_t1d/t1d_n)*100,2)
chht1d <- paste0(chh_t1d," (",chh_t1d_p,")")

chh_t2d <- sum(dc$cholesterol == 2 & dc$t2d_status == 1, na.rm = TRUE)
chh_t2d_p <- round((chh_t2d/t2d_n)*100,2)
chht2d <- paste0(chh_t2d," (",chh_t2d_p,")")

chol_cat <- c("Healthy", "High")
chol_any <- c(chnany, chhany)
chol_t1d <- c(chnt1d, chht1d)
chol_t2d <- c(chnt2d, chht2d)
chol <- data.frame(chol_cat, chol_any, chol_t1d, chol_t2d)
chol

chol_cat,chol_any,chol_t1d,chol_t2d
<chr>,<chr>,<chr>,<chr>
Healthy,111 (0.25),7 (0.32),104 (0.24)
High,59 (0.13),2 (0.09),58 (0.14)


In [22]:
# Renal Disease
rda_any <- sum((dc$creatinine == 1 | dc$egfr == 1 | dc$acr == 1) & dc$any_diabetes == 1, na.rm = TRUE)
rda_any_p <- round((rda_any/any_diabetes_n)*100,2)
rdaany <- paste0(rda_any," (",rda_any_p,")")

rda_t1d <- sum((dc$creatinine == 1 | dc$egfr == 1 | dc$acr == 1) & dc$t1d_status == 1, na.rm = TRUE)
rda_t1d_p <- round((rda_t1d/t1d_n)*100,2)
rdat1d <- paste0(rda_t1d," (",rda_t1d_p,")")

rda_t2d <- sum((dc$creatinine == 1 | dc$egfr == 1 | dc$acr == 1) & dc$t2d_status == 1, na.rm = TRUE)
rda_t2d_p <- round((rda_t2d/t2d_n)*100,2)
rdat2d <- paste0(rda_t2d," (",rda_t2d_p,")")



crn_any <- sum(dc$creatinine == 1 & dc$any_diabetes == 1, na.rm = TRUE) 
crn_any_p <- round((crn_any/any_diabetes_n)*100,2)
crnany <- paste0(crn_any," (",crn_any_p,")")

crn_t1d <- sum(dc$creatinine == 1 & dc$t1d_status == 1, na.rm = TRUE)
crn_t1d_p <- round((crn_t1d/t1d_n)*100,2)
crnt1d <- paste0(crn_t1d," (",crn_t1d_p,")")

crn_t2d <- sum(dc$creatinine == 1 & dc$t2d_status == 1, na.rm = TRUE)
crn_t2d_p <- round((crn_t2d/t2d_n)*100,2)
crnt2d <- paste0(crn_t2d," (",crn_t2d_p,")")

crh_any <- sum(dc$creatinine == 2 & dc$any_diabetes == 1, na.rm = TRUE)
crh_any_p <- round((crh_any/any_diabetes_n)*100,2)
crhany <- paste0(crh_any," (",crh_any_p,")")

crh_t1d <- sum(dc$creatinine == 2 & dc$t1d_status == 1, na.rm = TRUE)
crh_t1d_p <- round((crh_t1d/t1d_n)*100,2)
crht1d <- paste0(crh_t1d," (",crh_t1d_p,")")

crh_t2d <- sum(dc$creatinine == 2 & dc$t2d_status == 1, na.rm = TRUE)
crh_t2d_p <- round((crh_t2d/t2d_n)*100,2)
crht2d <- paste0(crh_t2d," (",crh_t2d_p,")")

crvh_any <- sum(dc$creatinine == 3 & dc$any_diabetes == 1, na.rm = TRUE) 
crvh_any_p <- round((crvh_any/any_diabetes_n)*100,2)
crvhany <- paste0(crvh_any," (",crvh_any_p,")")

crvh_t1d <- sum(dc$creatinine == 3 & dc$t1d_status == 1, na.rm = TRUE)
crvh_t1d_p <- round((crvh_t1d/t1d_n)*100,2)
crvht1d <- paste0(crvh_t1d," (",crvh_t1d_p,")")

crvh_t2d <- sum(dc$creatinine == 3 & dc$t2d_status == 1, na.rm = TRUE)
crvh_t2d_p <- round((crvh_t2d/t2d_n)*100,2)
crvht2d <- paste0(crvh_t2d," (",crvh_t2d_p,")")



egn_any <- sum(dc$egfr == 1 & dc$any_diabetes == 1, na.rm = TRUE) 
egn_any_p <- round((egn_any/any_diabetes_n)*100,2)
egnany <- paste0(egn_any," (",egn_any_p,")")

egn_t1d <- sum(dc$egfr == 1 & dc$t1d_status == 1, na.rm = TRUE)
egn_t1d_p <- round((egn_t1d/t1d_n)*100,2)
egnt1d <- paste0(egn_t1d," (",egn_t1d_p,")")

egn_t2d <- sum(dc$egfr == 1 & dc$t2d_status == 1, na.rm = TRUE)
egn_t2d_p <- round((egn_t2d/t2d_n)*100,2)
egnt2d <- paste0(egn_t2d," (",egn_t2d_p,")")

egr_any <- sum(dc$egfr == 2 & dc$any_diabetes == 1, na.rm = TRUE) 
egr_any_p <- round((egr_any/any_diabetes_n)*100,2)
egrany <- paste0(egr_any," (",egr_any_p,")")

egr_t1d <- sum(dc$egfr == 2 & dc$t1d_status == 1, na.rm = TRUE)
egr_t1d_p <- round((egr_t1d/t1d_n)*100,2)
egrt1d <- paste0(egr_t1d," (",egr_t1d_p,")")

egr_t2d <- sum(dc$egfr == 2 & dc$t2d_status == 1, na.rm = TRUE)
egr_t2d_p <- round((egr_t2d/t2d_n)*100,2)
egrt2d <- paste0(egr_t2d," (",egr_t2d_p,")")



acrn_any <- sum(dc$acr == 1 & dc$any_diabetes == 1, na.rm = TRUE) 
acrn_any_p <- round((acrn_any/any_diabetes_n)*100,2)
acrnany <- paste0(acrn_any," (",acrn_any_p,")")

acrn_t1d <- sum(dc$acr == 1 & dc$t1d_status == 1, na.rm = TRUE)
acrn_t1d_p <- round((acrn_t1d/t1d_n)*100,2)
acrnt1d <- paste0(acrn_t1d," (",acrn_t1d_p,")")

acrn_t2d <- sum(dc$acr == 1 & dc$t2d_status == 1, na.rm = TRUE)
acrn_t2d_p <- round((acrn_t2d/t2d_n)*100,2)
acrnt2d <- paste0(acrn_t2d," (",acrn_t2d_p,")")

acrh_any <- sum(dc$acr == 2 & dc$any_diabetes == 1, na.rm = TRUE) 
acrh_any_p <- round((acrh_any/any_diabetes_n)*100,2)
acrhany <- paste0(acrh_any," (",acrh_any_p,")")

acrh_t1d <- sum(dc$acr == 2 & dc$t1d_status == 1, na.rm = TRUE)
acrh_t1d_p <- round((acrh_t1d/t1d_n)*100,2)
acrht1d <- paste0(acrh_t1d," (",acrh_t1d_p,")")

acrh_t2d <- sum(dc$acr == 2 & dc$t2d_status == 1, na.rm = TRUE)
acrh_t2d_p <- round((acrh_t2d/t2d_n)*100,2)
acrht2d <- paste0(acrh_t2d," (",acrh_t2d_p,")")

acrvh_any <- sum(dc$acr == 3 & dc$any_diabetes == 1, na.rm = TRUE) 
acrvh_any_p <- round((acrvh_any/any_diabetes_n)*100,2)
acrvhany <- paste0(acrvh_any," (",acrvh_any_p,")")

acrvh_t1d <- sum(dc$acr == 3 & dc$t1d_status == 1, na.rm = TRUE)
acrvh_t1d_p <- round((acrvh_t1d/t1d_n)*100,2)
acrvht1d <- paste0(acrvh_t1d," (",acrvh_t1d_p,")")

acrvh_t2d <- sum(dc$acr == 3 & dc$t2d_status == 1, na.rm = TRUE)
acrvh_t2d_p <- round((acrvh_t2d/t2d_n)*100,2)
acrvht2d <- paste0(acrvh_t2d," (",acrvh_t2d_p,")")

rd_cat <- c("Any", "Creatinine_Normal", "Creatinine_High", "Creatinine_Very_High", "ACR_Normal", "ACR_High", "ACR_Very_High", "eGFR_Normal", "eGFR_Reduced")
rd_any <- c(rdaany, crnany, crhany, crvhany, acrnany, acrhany, acrvhany, egnany, egrany)
rd_t1d <- c(rdat1d, crnt1d, crht1d, crvht1d, acrnt1d, acrht1d, acrvht1d, egnt1d, egrt1d)
rd_t2d <- c(rdat2d, crnt2d, crht2d, crvht2d, acrnt2d, acrht2d, acrvht2d, egnt2d, egrt2d)
rd <- data.frame(rd_cat, rd_any, rd_t1d, rd_t2d)
rd

rd_cat,rd_any,rd_t1d,rd_t2d
<chr>,<chr>,<chr>,<chr>
Any,26822 (60.61),891 (41.14),26220 (61.57)
Creatinine_Normal,459 (1.04),12 (0.55),450 (1.06)
Creatinine_High,10328 (23.34),431 (19.9),10045 (23.59)
Creatinine_Very_High,31443 (71.06),1173 (54.16),30583 (71.81)
ACR_Normal,26644 (60.21),886 (40.9),26046 (61.16)
ACR_High,26 (0.06),2 (0.09),24 (0.06)
ACR_Very_High,3745 (8.46),133 (6.14),3654 (8.58)
eGFR_Normal,3 (0.01),0 (0),3 (0.01)
eGFR_Reduced,22 (0.05),0 (0),22 (0.05)


In [24]:
table(dc$creatinine)


    0     1     2     3 
 2027   460 10337 31453 

In [23]:
# Ethnicity
# White
ethw_any <- sum(dc$ethnicity == 1 & dc$any_diabetes == 1, na.rm = TRUE)
ethw_any_p <- round((ethw_any/any_diabetes_n)*100,2)
ethwany <- paste0(ethw_any," (",ethw_any_p,")")

ethw_t1d <- sum(dc$ethnicity == 1 & dc$t1d_status == 1, na.rm = TRUE)
ethw_t1d_p <- round((ethw_t1d/t1d_n)*100,2)
ethwt1d <- paste0(ethw_t1d," (",ethw_t1d_p,")")

ethw_t2d <- sum(dc$ethnicity == 1 & dc$t2d_status == 1, na.rm = TRUE)
ethw_t2d_p <- round((ethw_t2d/t2d_n)*100,2)
ethwt2d <- paste0(ethw_t2d," (",ethw_t2d_p,")")

# Pakistani
ethp_any <- sum(dc$ethnicity == 2 & dc$any_diabetes == 1, na.rm = TRUE)
ethp_any_p <- round((ethp_any/any_diabetes_n)*100,2)
ethpany <- paste0(ethp_any," (",ethp_any_p,")")

ethp_t1d <- sum(dc$ethnicity == 2 & dc$t1d_status == 1, na.rm = TRUE)
ethp_t1d_p <- round((ethp_t1d/t1d_n)*100,2)
ethpt1d <- paste0(ethp_t1d," (",ethp_t1d_p,")")

ethp_t2d <- sum(dc$ethnicity == 2 & dc$t2d_status == 1, na.rm = TRUE)
ethp_t2d_p <- round((ethp_t2d/t2d_n)*100,2)
ethpt2d <- paste0(ethp_t2d," (",ethp_t2d_p,")")

# Other
etho_any <- sum(dc$ethnicity == 3 & dc$any_diabetes == 1, na.rm = TRUE)
etho_any_p <- round((etho_any/any_diabetes_n)*100,2)
ethoany <- paste0(etho_any," (",etho_any_p,")")

etho_t1d <- sum(dc$ethnicity == 3 & dc$t1d_status == 1, na.rm = TRUE)
etho_t1d_p <- round((etho_t1d/t1d_n)*100,2)
ethot1d <- paste0(etho_t1d," (",etho_t1d_p,")")

etho_t2d <- sum(dc$ethnicity == 3 & dc$t2d_status == 1, na.rm = TRUE)
etho_t2d_p <- round((etho_t2d/t2d_n)*100,2)
ethot2d <- paste0(etho_t2d," (",etho_t2d_p,")")

ethnicity_cat <- c("White", "Pakistani", "Other")
eth_any <- c(ethwany, ethpany, ethoany)
eth_t1d <- c(ethwt1d, ethpt1d, ethot1d)
eth_t2d <- c(ethwt2d, ethpt2d, ethot2d)
ethnicity <- data.frame(ethnicity_cat, eth_any, eth_t1d, eth_t2d)
ethnicity



ethnicity_cat,eth_any,eth_t1d,eth_t2d
<chr>,<chr>,<chr>,<chr>
White,25797 (58.3),1529 (70.59),24531 (57.6)
Pakistani,15865 (35.85),510 (23.55),15564 (36.54)
Other,2588 (5.85),127 (5.86),2494 (5.86)


In [24]:
# N (%) of CVD for each diabetes
anycvd_any <- sum(dc$has_cvd == 1 & dc$any_diabetes == 1, na.rm = TRUE)
anycvd_any_p <- round((anycvd_any/any_diabetes_n)*100,2)
anycvdany <- paste0(anycvd_any," (",anycvd_any_p,")")

anycvd_t1d <- sum(dc$has_cvd == 1 & dc$t1d_status == 1, na.rm = TRUE)
anycvd_t1d_p <- round((anycvd_t1d/t1d_n)*100,2)
anycvdt1d <- paste0(anycvd_t1d," (",anycvd_t1d_p,")")

anycvd_t2d <- sum(dc$has_cvd == 1 & dc$t2d_status == 1, na.rm = TRUE)
anycvd_t2d_p <- round((anycvd_t2d/t2d_n)*100,2)
anycvdt2d <- paste0(anycvd_t2d," (",anycvd_t2d_p,")")

ang_any <- sum(dc$has_angina == 1 & dc$any_diabetes == 1, na.rm = TRUE)
angcvd_any_p <- round((ang_any/any_diabetes_n)*100,2)
angany <- paste0(ang_any," (",angcvd_any_p,")")

ang_t1d <- sum(dc$has_angina == 1 & dc$t1d_status == 1, na.rm = TRUE)
angcvd_t1d_p <- round((ang_t1d/t1d_n)*100,2)
angt1d <- paste0(ang_t1d," (",angcvd_t1d_p,")")

ang_t2d <- sum(dc$has_angina == 1 & dc$t2d_status == 1, na.rm = TRUE)
angcvd_t2d_p <- round((ang_t2d/t2d_n)*100,2)
angt2d <- paste0(ang_t2d," (",angcvd_t2d_p,")")

cardio_any <- sum(dc$has_cardiomyopathy== 1 & dc$any_diabetes == 1, na.rm = TRUE)
cardiocvd_any_p <- round((cardio_any/any_diabetes_n)*100,2)
cardioany <- paste0(cardio_any," (",cardiocvd_any_p,")")

cardio_t1d <- sum(dc$has_cardiomyopathy== 1 & dc$t1d_status == 1, na.rm = TRUE)
cardiocvd_t1d_p <- round((cardio_t1d/t1d_n)*100,2)
cardiot1d <- paste0(cardio_t1d," (",cardiocvd_t1d_p,")")

cardio_t2d <- sum(dc$has_cardiomyopathy== 1 & dc$t2d_status == 1, na.rm = TRUE)
cardiocvd_t2d_p <- round((cardio_t2d/t2d_n)*100,2)
cardiot2d <- paste0(cardio_t2d," (",cardiocvd_t2d_p,")")


hf_any <- sum(dc$has_heart_failure== 1 & dc$any_diabetes == 1, na.rm = TRUE)
hfcvd_any_p <- round((hf_any/any_diabetes_n)*100,2)
hfany <- paste0(hf_any," (",hfcvd_any_p,")")

hf_t1d <- sum(dc$has_heart_failure== 1 & dc$t1d_status == 1, na.rm = TRUE)
hfcvd_t1d_p <- round((hf_t1d/t1d_n)*100,2)
hft1d <- paste0(hf_t1d," (",hfcvd_t1d_p,")")

hf_t2d <- sum(dc$has_heart_failure== 1 & dc$t2d_status == 1, na.rm = TRUE)
hfcvd_t2d_p <- round((hf_t2d/t2d_n)*100,2)
hft2d <- paste0(hf_t2d," (",hfcvd_t2d_p,")")


ht_any <- sum(dc$has_hypertension== 1 & dc$any_diabetes == 1, na.rm = TRUE)
htcvd_any_p <- round((ht_any/any_diabetes_n)*100,2)
htany <- paste0(ht_any," (",htcvd_any_p,")")

ht_t1d <- sum(dc$has_hypertension== 1 & dc$t1d_status == 1, na.rm = TRUE)
htcvd_t1d_p <- round((ht_t1d/t1d_n)*100,2)
htt1d <- paste0(ht_t1d," (",htcvd_t1d_p,")")

ht_t2d <- sum(dc$has_hypertension== 1 & dc$t2d_status == 1, na.rm = TRUE)
htcvd_t2d_p <- round((ht_t2d/t2d_n)*100,2)
htt2d <- paste0(ht_t2d," (",htcvd_t2d_p,")")


ihd_any <- sum(dc$has_ihd== 1 & dc$any_diabetes == 1, na.rm = TRUE)
ihdcvd_any_p <- round((ihd_any/any_diabetes_n)*100,2)
ihdany <- paste0(ihd_any," (",ihdcvd_any_p,")")

ihd_t1d <- sum(dc$has_ihd== 1 & dc$t1d_status == 1, na.rm = TRUE)
ihdcvd_t1d_p <- round((ihd_t1d/t1d_n)*100,2)
ihdt1d <- paste0(ihd_t1d," (",ihdcvd_t1d_p,")")

ihd_t2d <- sum(dc$has_ihd== 1 & dc$t2d_status == 1, na.rm = TRUE)
ihdcvd_t2d_p <- round((ihd_t2d/t2d_n)*100,2)
ihdt2d <- paste0(ihd_t2d," (",ihdcvd_t2d_p,")")


mi_any <- sum(dc$has_mi== 1 & dc$any_diabetes == 1, na.rm = TRUE)
micvd_any_p <- round((mi_any/any_diabetes_n)*100,2)
miany <- paste0(mi_any," (",micvd_any_p,")")

mi_t1d <- sum(dc$has_mi== 1 & dc$t1d_status == 1, na.rm = TRUE)
micvd_t1d_p <- round((mi_t1d/t1d_n)*100,2)
mit1d <- paste0(mi_t1d," (",micvd_t1d_p,")")

mi_t2d <- sum(dc$has_mi== 1 & dc$t2d_status == 1, na.rm = TRUE)
micvd_t2d_p <- round((mi_t2d/t2d_n)*100,2)
mit2d <- paste0(mi_t2d," (",micvd_t2d_p,")")


stroke_any <- sum(dc$has_stroke== 1 & dc$any_diabetes == 1, na.rm = TRUE)
strokecvd_any_p <- round((stroke_any/any_diabetes_n)*100,2)
strokeany <- paste0(stroke_any," (",strokecvd_any_p,")")

stroke_t1d <- sum(dc$has_stroke== 1 & dc$t1d_status == 1, na.rm = TRUE)
strokecvd_t1d_p <- round((stroke_t1d/t1d_n)*100,2)
stroket1d <- paste0(stroke_t1d," (",strokecvd_t1d_p,")")

stroke_t2d <- sum(dc$has_stroke== 1 & dc$t2d_status == 1, na.rm = TRUE)
strokecvd_t2d_p <- round((stroke_t2d/t2d_n)*100,2)
stroket2d <- paste0(stroke_t2d," (",strokecvd_t2d_p,")")


pvd_any <- sum(dc$has_pvd== 1 & dc$any_diabetes == 1, na.rm = TRUE)
pvdcvd_any_p <- round((pvd_any/any_diabetes_n)*100,2)
pvdany <- paste0(pvd_any," (",pvdcvd_any_p,")")

pvd_t1d <- sum(dc$has_pvd== 1 & dc$t1d_status == 1, na.rm = TRUE)
pvdcvd_t1d_p <- round((pvd_t1d/t1d_n)*100,2)
pvdt1d <- paste0(pvd_t1d," (",pvdcvd_t1d_p,")")

pvd_t2d <- sum(dc$has_pvd== 1 & dc$t2d_status == 1, na.rm = TRUE)
pvdcvd_t2d_p <- round((pvd_t2d/t2d_n)*100,2)
pvdt2d <- paste0(pvd_t2d," (",pvdcvd_t2d_p,")")


vd_any <- sum(dc$has_valve_disease == 1 & dc$any_diabetes == 1, na.rm = TRUE)
vdcvd_any_p <- round((vd_any/any_diabetes_n)*100,2)
vdany <- paste0(vd_any," (",vdcvd_any_p,")")

vd_t1d <- sum(dc$has_valve_disease == 1 & dc$t1d_status == 1, na.rm = TRUE)
vdcvd_t1d_p <- round((vd_t1d/t1d_n)*100,2)
vdt1d <- paste0(vd_t1d," (",vdcvd_t1d_p,")")

vd_t2d <- sum(dc$has_valve_disease == 1 & dc$t2d_status == 1, na.rm = TRUE)
vdcvd_t2d_p <- round((vd_t2d/t2d_n)*100,2)
vdt2d <- paste0(vd_t2d," (",vdcvd_t2d_p,")")

cvd_cat <- c("Any CVD", "Angina", "Cardiomyopathy", "Heart Failure", "Hypertension", "Iscaemic Heart Disease", "Myocardial Infarction", "Stroke", "Peripheral Vascular Disease", "Valve Disease")
cvd_any <- c(anycvdany, angany, cardioany, hfany, htany, ihdany, miany, strokeany, pvdany, vdany)
cvd_t1d <- c(anycvdt1d, angt1d, cardiot1d, hft1d, htt1d, ihdt1d, mit1d, stroket1d, pvdt1d, vdt1d)
cvd_t2d <- c(anycvdt2d, angt2d, cardiot2d, hft2d, htt2d, ihdt2d, mit2d, stroket2d, pvdt2d, vdt2d)
cvd <- data.frame(cvd_cat, cvd_any, cvd_t1d, cvd_t2d)
cvd

cvd_cat,cvd_any,cvd_t1d,cvd_t2d
<chr>,<chr>,<chr>,<chr>
Any CVD,16050 (36.27),392 (18.1),15822 (37.15)
Angina,4533 (10.24),92 (4.25),4483 (10.53)
Cardiomyopathy,1274 (2.88),28 (1.29),1255 (2.95)
Heart Failure,3644 (8.24),76 (3.51),3604 (8.46)
Hypertension,7545 (17.05),158 (7.29),7467 (17.53)
Iscaemic Heart Disease,2133 (4.82),42 (1.94),2110 (4.95)
Myocardial Infarction,1505 (3.4),33 (1.52),1489 (3.5)
Stroke,4908 (11.09),121 (5.59),4832 (11.35)
Peripheral Vascular Disease,1458 (3.29),79 (3.65),1409 (3.31)
Valve Disease,1535 (3.47),36 (1.66),1511 (3.55)


In [25]:

dc <- dc %>%
    mutate(deceased = ifelse(is.na(death_datetime.x),0,1))
table(dc$deceased)


    0     1 
33734 10543 

In [26]:
# Deceased
dth_any <- sum(dc$deceased == 1 & dc$any_diabetes == 1, na.rm = TRUE)
dth_any_p <- round((dth_any/any_diabetes_n)*100,2)
dthany <- paste0(dth_any," (",dth_any_p,")")

ndth_any <- sum(dc$deceased == 0 & dc$any_diabetes == 1, na.rm = TRUE)
ndth_any_p <- round((ndth_any/any_diabetes_n)*100,2)
ndthany <- paste0(ndth_any," (",ndth_any_p,")")


dth_t1d <- sum(dc$deceased == 1 & dc$t1d_status == 1, na.rm = TRUE)
dth_t1d_p <- round((dth_t1d/t1d_n)*100,2)
dtht1d <- paste0(dth_t1d," (",dth_t1d_p,")")

ndth_t1d <- sum(dc$deceased == 0 & dc$t1d_status == 1, na.rm = TRUE)
ndth_t1d_p <- round((ndth_t1d/t1d_n)*100,2)
ndtht1d <- paste0(ndth_t1d," (",ndth_t1d_p,")")


dth_t2d <- sum(dc$deceased == 1 & dc$t2d_status == 1, na.rm = TRUE)
dth_t2d_p <- round((dth_t2d/t2d_n)*100,2)
dtht2d <- paste0(dth_t2d," (",dth_t2d_p,")")

ndth_t2d <- sum(dc$deceased == 0 & dc$t2d_status == 1, na.rm = TRUE)
ndth_t2d_p <- round((ndth_t2d/t2d_n)*100,2)
ndtht2d <- paste0(ndth_t2d," (",ndth_t2d_p,")")


dd_cat <- c("Yes", "No")
dd_any <- c(dthany, ndthany)
dd_t1d <- c(dtht1d, ndtht1d)
dd_t2d <- c(dtht2d, ndtht2d)
dth <- data.frame(dd_cat, dd_any, dd_t1d, dd_t2d)
dth


dd_cat,dd_any,dd_t1d,dd_t2d
<chr>,<chr>,<chr>,<chr>
Yes,10528 (23.79),279 (12.88),10355 (24.31)
No,33722 (76.21),1887 (87.12),32234 (75.69)


In [27]:
colnames(dc)
dc %>% select(diab_death_time, cvd_death_time, diab_cvd_time) %>% slice(1:5)

diab_death_time,cvd_death_time,diab_cvd_time
<int>,<int>,<int>
551,,
17,2628.0,2611.0
2620,,
518,377.0,141.0
2264,,


In [37]:
# Change time between diagnosis and death to months
dc <- dc %>%
    mutate(diab_cvd_time_mth = (diab_cvd_time/30.4375),
          diab_death_time_mth = (diab_death_time/30.4375),
          cvd_death_time_mth = (cvd_death_time/30.4375))

In [43]:
# Time between diabetes diagnosis and cardiovascular disease
dd_mean_any <- round(mean(dc$diab_cvd_time_mth[dc$any_diabetes == 1], na.rm = TRUE), digits = 2)
dd_sd_any <- round(sd(dc$diab_cvd_time_mth[dc$any_diabetes == 1], na.rm = TRUE), digits = 2)
dd_any <- paste0(dd_mean_any," (",dd_sd_any,")")

dd_mean_t1d <- round(mean(dc$diab_cvd_time_mth[dc$t1d_status == 1], na.rm = TRUE), digits = 2)
dd_sd_t1d <- round(sd(dc$diab_cvd_time_mth[dc$t1d_status == 1], na.rm = TRUE), digits = 2)
dd_t1d <- paste0(dd_mean_t1d," (",dd_sd_t1d,")")

dd_mean_t2d <- round(mean(dc$diab_cvd_time_mth[dc$t2d_status == 1], na.rm = TRUE), digits = 2)
dd_sd_t2d <- round(sd(dc$diab_cvd_time_mth[dc$t2d_status == 1], na.rm = TRUE), digits = 2)
dd_t2d <- paste0(dd_mean_t2d," (",dd_sd_t2d,")")

dcvd_cat <- c("Diabetes diagnosis and Cardiovascular Disease")
dcvd_any <- c(dd_any)
dcvd_t1d <- c(dd_t1d)
dcvd_t2d <- c(dd_t2d)
dcvd <- data.frame(dcvd_any, dcvd_t1d, dcvd_t2d)
dcvd

dcvd_any,dcvd_t1d,dcvd_t2d
<chr>,<chr>,<chr>
66.08 (49.14),69.69 (49.11),66.01 (49.13)


In [44]:
# Time between diabetes and death
dd_mean_any <- round(mean(dc$diab_death_time_mth[dc$any_diabetes == 1], na.rm = TRUE), digits = 2)
dd_sd_any <- round(sd(dc$diab_death_time_mth[dc$any_diabetes == 1], na.rm = TRUE), digits = 2)
dd_any <- paste0(dd_mean_any," (",dd_sd_any,")")

dd_mean_t1d <- round(mean(dc$diab_death_time_mth[dc$t1d_status == 1], na.rm = TRUE), digits = 2)
dd_sd_t1d <- round(sd(dc$diab_death_time_mth[dc$t1d_status == 1], na.rm = TRUE), digits = 2)
dd_t1d <- paste0(dd_mean_t1d," (",dd_sd_t1d,")")

dd_mean_t2d <- round(mean(dc$diab_death_time_mth[dc$t2d_status == 1], na.rm = TRUE), digits = 2)
dd_sd_t2d <- round(sd(dc$diab_death_time_mth[dc$t2d_status == 1], na.rm = TRUE), digits = 2)
dd_t2d <- paste0(dd_mean_t2d," (",dd_sd_t2d,")")

dcvd_cat <- c("Diabetes diagnosis and death")
dcvd_any <- c(dd_any)
dcvd_t1d <- c(dd_t1d)
dcvd_t2d <- c(dd_t2d)
dcvd <- data.frame(dcvd_any, dcvd_t1d, dcvd_t2d)
dcvd

dcvd_any,dcvd_t1d,dcvd_t2d
<chr>,<chr>,<chr>
80.83 (50.92),75.89 (51.34),81 (50.87)


In [45]:
# Time between cardiovascular disease and death
dd_mean_any <- round(mean(dc$cvd_death_time_mth[dc$any_diabetes == 1], na.rm = TRUE), digits = 2)
dd_sd_any <- round(sd(dc$cvd_death_time_mth[dc$any_diabetes == 1], na.rm = TRUE), digits = 2)
dd_any <- paste0(dd_mean_any," (",dd_sd_any,")")

dd_mean_t1d <- round(mean(dc$cvd_death_time_mth[dc$t1d_status == 1], na.rm = TRUE), digits = 2)
dd_sd_t1d <- round(sd(dc$cvd_death_time_mth[dc$t1d_status == 1], na.rm = TRUE), digits = 2)
dd_t1d <- paste0(dd_mean_t1d," (",dd_sd_t1d,")")

dd_mean_t2d <- round(mean(dc$cvd_death_time_mth[dc$t2d_status == 1], na.rm = TRUE), digits = 2)
dd_sd_t2d <- round(sd(dc$cvd_death_time_mth[dc$t2d_status == 1], na.rm = TRUE), digits = 2)
dd_t2d <- paste0(dd_mean_t2d," (",dd_sd_t2d,")")

dcvd_cat <- c("CVD diagnosis and death")
dcvd_any <- c(dd_any)
dcvd_t1d <- c(dd_t1d)
dcvd_t2d <- c(dd_t2d)
dcvd <- data.frame(dcvd_any, dcvd_t1d, dcvd_t2d)
dcvd

dcvd_any,dcvd_t1d,dcvd_t2d
<chr>,<chr>,<chr>
100.07 (68.15),90.22 (61.28),100.23 (68.19)


In [64]:
# Replace 0s in Cholesterol, BMI, Smoking, BP, Creatinine, ACR, EGFR with NAs
dc$cholesterol[dc$cholesterol==0] <- NA
dc$smoking[dc$smoking==0] <- NA
dc$bmi[dc$bmi==0] <- NA
dc$blood_pressure[dc$blood_pressure==0] <- NA
dc$creatinine[dc$creatinine==0] <- NA
dc$acr[dc$acr==0] <- NA
dc$egfr[dc$egfr==0] <- NA

In [76]:
# Number of missing records
# Age
#sum(is.na(dc$age_earliest_diab[dc$any_diabetes == 1]))
#sum(is.na(dc$age_t1d_diag[dc$t1d_status == 1]))
#sum(is.na(dc$age_t2d_diag[dc$t2d_status == 1]))

# Gender
#sum(is.na(dc$gender[dc$any_diabetes == 1]))
#sum(is.na(dc$age_t1d_diag[dc$t1d_status == 1]))
#sum(is.na(dc$age_t2d_diag[dc$t2d_status == 1]))

# IMD
#sum(is.na(dc$imd_quintile[dc$any_diabetes == 1]))
#sum(is.na(dc$imd_quintile[dc$t1d_status == 1]))
#sum(is.na(dc$imd_quintile[dc$t2d_status == 1]))

# Ethnicity
#sum(is.na(dc$ethnicity[dc$any_diabetes == 1]))
#sum(is.na(dc$ethnicity[dc$t1d_status == 1]))
#sum(is.na(dc$ethnicity[dc$t2d_status == 1]))

# Smoking
smk_any_na <- sum(is.na(dc$smoking[dc$any_diabetes == 1]))
smk_any_na_p <- round((smk_any_na/any_diabetes_n)*100,2)
smkanyna <- paste0(smk_any_na," (",smk_any_na_p,")")
smk_t1d_na <- sum(is.na(dc$smoking[dc$t1d_status == 1]))
smk_t1d_na_p <- round((smk_t1d_na/t1d_n)*100,2)
smkt1dna <- paste0(smk_t1d_na," (",smk_t1d_na_p,")")
smk_t2d_na <- sum(is.na(dc$smoking[dc$t2d_status == 1]))
smk_t2d_na_p <- round((smk_t2d_na/t2d_n)*100,2)
smkt2dna <- paste0(smk_t2d_na," (",smk_t2d_na_p,")")
smkg_any_na <- c(smkanyna)
smkg_t1d_na <- c(smkt1dna)
smkg_t2d_na <- c(smkt2dna)
smkg_na <- data.frame(smkg_any_na, smkg_t1d_na, smkg_t2d_na)
smkg_na


# BMI
bmi_any_na <- sum(is.na(dc$bmi[dc$any_diabetes == 1]))
bmi_any_na_p <- round((bmi_any_na/any_diabetes_n)*100,2)
bmianyna <- paste0(bmi_any_na," (",bmi_any_na_p,")")
bmi_t1d_na <- sum(is.na(dc$bmi[dc$t1d_status == 1]))
bmi_t1d_na_p <- round((bmi_t1d_na/t1d_n)*100,2)
bmit1dna <- paste0(bmi_t1d_na," (",bmi_t1d_na_p,")")
bmi_t2d_na <- sum(is.na(dc$bmi[dc$t2d_status == 1]))
bmi_t2d_na_p <- round((bmi_t2d_na/t2d_n)*100,2)
bmit2dna <- paste0(bmi_t2d_na," (",bmi_t2d_na_p,")")
bmig_any_na <- c(bmianyna)
bmig_t1d_na <- c(bmit1dna)
bmig_t2d_na <- c(bmit2dna)
bmig_na <- data.frame(bmig_any_na, bmig_t1d_na, bmig_t2d_na)
bmig_na

# Blood Pressure
bp_any_na <- sum(is.na(dc$blood_pressure[dc$any_diabetes == 1]))
bp_any_na_p <- round((bp_any_na/any_diabetes_n)*100,2)
bpanyna <- paste0(bp_any_na," (",bp_any_na_p,")")
bp_t1d_na <- sum(is.na(dc$blood_pressure[dc$t1d_status == 1]))
bp_t1d_na_p <- round((bp_t1d_na/t1d_n)*100,2)
bpt1dna <- paste0(bp_t1d_na," (",bp_t1d_na_p,")")
bp_t2d_na <- sum(is.na(dc$blood_pressure[dc$t2d_status == 1]))
bp_t2d_na_p <- round((bp_t2d_na/t2d_n)*100,2)
bpt2dna <- paste0(bp_t2d_na," (",bp_t2d_na_p,")")
bpg_any_na <- c(bpanyna)
bpg_t1d_na <- c(bpt1dna)
bpg_t2d_na <- c(bpt2dna)
bpg_na <- data.frame(bpg_any_na, bpg_t1d_na, bpg_t2d_na)
bpg_na

# Cholesterol
cholesterol_any_na <- sum(is.na(dc$cholesterol[dc$any_diabetes == 1]))
cholesterol_any_na_p <- round((cholesterol_any_na/any_diabetes_n)*100,2)
cholesterolanyna <- paste0(cholesterol_any_na," (",cholesterol_any_na_p,")")
cholesterol_t1d_na <- sum(is.na(dc$cholesterol[dc$t1d_status == 1]))
cholesterol_t1d_na_p <- round((cholesterol_t1d_na/t1d_n)*100,2)
cholesterolt1dna <- paste0(cholesterol_t1d_na," (",cholesterol_t1d_na_p,")")
cholesterol_t2d_na <- sum(is.na(dc$cholesterol[dc$t2d_status == 1]))
cholesterol_t2d_na_p <- round((cholesterol_t2d_na/t2d_n)*100,2)
cholesterolt2dna <- paste0(cholesterol_t2d_na," (",cholesterol_t2d_na_p,")")
cholesterolg_any_na <- c(cholesterolanyna)
cholesterolg_t1d_na <- c(cholesterolt1dna)
cholesterolg_t2d_na <- c(cholesterolt2dna)
cholesterolg_na <- data.frame(cholesterolg_any_na, cholesterolg_t1d_na, cholesterolg_t2d_na)
cholesterolg_na

# Creatinine
creatinine_any_na <- sum(is.na(dc$creatinine[dc$any_diabetes == 1]))
creatinine_any_na_p <- round((creatinine_any_na/any_diabetes_n)*100,2)
creatinineanyna <- paste0(creatinine_any_na," (",creatinine_any_na_p,")")
creatinine_t1d_na <- sum(is.na(dc$creatinine[dc$t1d_status == 1]))
creatinine_t1d_na_p <- round((creatinine_t1d_na/t1d_n)*100,2)
creatininet1dna <- paste0(creatinine_t1d_na," (",creatinine_t1d_na_p,")")
creatinine_t2d_na <- sum(is.na(dc$creatinine[dc$t2d_status == 1]))
creatinine_t2d_na_p <- round((creatinine_t2d_na/t2d_n)*100,2)
creatininet2dna <- paste0(creatinine_t2d_na," (",creatinine_t2d_na_p,")")
creatinineg_any_na <- c(creatinineanyna)
creatinineg_t1d_na <- c(creatininet1dna)
creatinineg_t2d_na <- c(creatininet2dna)
creatinineg_na <- data.frame(creatinineg_any_na, creatinineg_t1d_na, creatinineg_t2d_na)
creatinineg_na

# ACR
acr_any_na <- sum(is.na(dc$acr[dc$any_diabetes == 1]))
acr_any_na_p <- round((acr_any_na/any_diabetes_n)*100,2)
acranyna <- paste0(acr_any_na," (",acr_any_na_p,")")
acr_t1d_na <- sum(is.na(dc$acr[dc$t1d_status == 1]))
acr_t1d_na_p <- round((acr_t1d_na/t1d_n)*100,2)
acrt1dna <- paste0(acr_t1d_na," (",acr_t1d_na_p,")")
acr_t2d_na <- sum(is.na(dc$acr[dc$t2d_status == 1]))
acr_t2d_na_p <- round((acr_t2d_na/t2d_n)*100,2)
acrt2dna <- paste0(acr_t2d_na," (",acr_t2d_na_p,")")
acrg_any_na <- c(acranyna)
acrg_t1d_na <- c(acrt1dna)
acrg_t2d_na <- c(acrt2dna)
acrg_na <- data.frame(acrg_any_na, acrg_t1d_na, acrg_t2d_na)
acrg_na

# eGFR
egfr_any_na <- sum(is.na(dc$egfr[dc$any_diabetes == 1]))
egfr_any_na_p <- round((egfr_any_na/any_diabetes_n)*100,2)
egfranyna <- paste0(egfr_any_na," (",egfr_any_na_p,")")
egfr_t1d_na <- sum(is.na(dc$egfr[dc$t1d_status == 1]))
egfr_t1d_na_p <- round((egfr_t1d_na/t1d_n)*100,2)
egfrt1dna <- paste0(egfr_t1d_na," (",egfr_t1d_na_p,")")
egfr_t2d_na <- sum(is.na(dc$egfr[dc$t2d_status == 1]))
egfr_t2d_na_p <- round((egfr_t2d_na/t2d_n)*100,2)
egfrt2dna <- paste0(egfr_t2d_na," (",egfr_t2d_na_p,")")
egfrg_any_na <- c(egfranyna)
egfrg_t1d_na <- c(egfrt1dna)
egfrg_t2d_na <- c(egfrt2dna)
egfrg_na <- data.frame(egfrg_any_na, egfrg_t1d_na, egfrg_t2d_na)
egfrg_na

smkg_any_na,smkg_t1d_na,smkg_t2d_na
<chr>,<chr>,<chr>
22428 (50.68),1364 (62.97),21355 (50.14)


bmig_any_na,bmig_t1d_na,bmig_t2d_na
<chr>,<chr>,<chr>
3062 (6.92),331 (15.28),2776 (6.52)


bpg_any_na,bpg_t1d_na,bpg_t2d_na
<chr>,<chr>,<chr>
42073 (95.08),2112 (97.51),40448 (94.97)


cholesterolg_any_na,cholesterolg_t1d_na,cholesterolg_t2d_na
<chr>,<chr>,<chr>
44080 (99.62),2157 (99.58),42427 (99.62)


creatinineg_any_na,creatinineg_t1d_na,creatinineg_t2d_na
<chr>,<chr>,<chr>
2020 (4.56),550 (25.39),1511 (3.55)


acrg_any_na,acrg_t1d_na,acrg_t2d_na
<chr>,<chr>,<chr>
13835 (31.27),1145 (52.86),12865 (30.21)


egfrg_any_na,egfrg_t1d_na,egfrg_t2d_na
<chr>,<chr>,<chr>
44225 (99.94),2166 (100),42564 (99.94)


In [None]:
# subset cvds and diabetes
# Angina
angina <- dc %>%
    select(person_id, date_earliest_diabetes, any_diabetes, date_angina_diagnosis, has_angina) %>%
    mutate(incident = ifelse(date_angina_diagnosis > date_earliest_diabetes,1,0)) %>%
    mutate(prevalent = ifelse(date_angina_diagnosis < date_earliest_diabetes,1,0))

# Cardiomyopathy
cardiomyopathy <- dc %>%
    select(person_id, date_earliest_diabetes, any_diabetes, date_cardiomyopathy_diagnosis, has_cardiomyopathy) %>%
    mutate(incident = ifelse(date_cardiomyopathy_diagnosis > date_earliest_diabetes,1,0)) %>%
    mutate(prevalent = ifelse(date_cardiomyopathy_diagnosis < date_earliest_diabetes,1,0))

# Heart Failure
heart_failure <- dc %>%
    select(person_id, date_earliest_diabetes, any_diabetes, date_heart_failure_diagnosis, has_heart_failure) %>%
    mutate(incident = ifelse(date_heart_failure_diagnosis > date_earliest_diabetes,1,0)) %>%
    mutate(prevalent = ifelse(date_heart_failure_diagnosis < date_earliest_diabetes,1,0))

# Hypertension
hypertension <- dc %>%
    select(person_id, date_earliest_diabetes, any_diabetes, date_hypertension_diagnosis, has_hypertension) %>%
    mutate(incident = ifelse(date_hypertension_diagnosis > date_earliest_diabetes,1,0)) %>%
    mutate(prevalent = ifelse(date_hypertension_diagnosis < date_earliest_diabetes,1,0))

# IHD
ihd <- dc %>%
    select(person_id, date_earliest_diabetes, any_diabetes, date_ihd_diagnosis, has_ihd) %>%
    mutate(incident = ifelse(date_ihd_diagnosis > date_earliest_diabetes,1,0)) %>%
    mutate(prevalent = ifelse(date_ihd_diagnosis < date_earliest_diabetes,1,0))

# MI
mi <- dc %>%
    select(person_id, date_earliest_diabetes, any_diabetes, date_mi_diagnosis, has_mi) %>%
    mutate(incident = ifelse(date_mi_diagnosis > date_earliest_diabetes,1,0)) %>%
    mutate(prevalent = ifelse(date_mi_diagnosis < date_earliest_diabetes,1,0))

# PVD
pvd <- dc %>%
    select(person_id, date_earliest_diabetes, any_diabetes, date_pvd_diagnosis, has_pvd) %>%
    mutate(incident = ifelse(date_pvd_diagnosis > date_earliest_diabetes,1,0)) %>%
    mutate(prevalent = ifelse(date_pvd_diagnosis < date_earliest_diabetes,1,0))

# Stroke
stroke <- dc %>%
    select(person_id, date_earliest_diabetes, any_diabetes, date_stroke_diagnosis, has_stroke) %>%
    mutate(incident = ifelse(date_stroke_diagnosis > date_earliest_diabetes,1,0)) %>%
    mutate(prevalent = ifelse(date_stroke_diagnosis < date_earliest_diabetes,1,0))

# Valve Disease
valve_disease <- dc %>%
    select(person_id, date_earliest_diabetes, any_diabetes, date_valve_disease_diagnosis, has_valve_disease) %>%
    mutate(incident = ifelse(date_valve_disease_diagnosis > date_earliest_diabetes,1,0)) %>%
    mutate(prevalent = ifelse(date_valve_disease_diagnosis < date_earliest_diabetes,1,0))

In [None]:
nrow(angina)
table(angina$incident)
table(angina$prevalent)
nrow(cardiomyopathy)
table(cardiomyopathy$incident)
table(cardiomyopathy$prevalent)
nrow(heart_failure)
table(heart_failure$incident)
table(heart_failure$prevalent)
nrow(hypertension)
table(hypertension$incident)
table(hypertension$prevalent)
nrow(ihd)
table(ihd$incident)
table(ihd$prevalent)
nrow(mi)
table(mi$incident)
table(mi$prevalent)
nrow(pvd)
table(pvd$incident)
table(pvd$prevalent)
nrow(stroke)
table(stroke$incident)
table(stroke$prevalent)
nrow(valve_disease)
table(valve_disease$incident)
table(valve_disease$prevalent)

In [48]:
sum(dc$any_diabetes == 1 & !is.na(dc$death_datetime.x))