In [31]:
library(bigrquery)
library(tidyverse)

In [32]:
# Store the project ID
projectid = "yhcr-prd-phm-bia-core"

In [33]:
# load diabetes and covariates
# Diabetes
sql1 <- "SELECT *  FROM `yhcr-prd-phm-bia-core.CB_MYSPACE_AH.Diabetes`"
tb1 <- bq_project_query(projectid, sql1)
diabetes <-bq_table_download(tb1)

# Cholesterol
sql2 <- "SELECT *  FROM `yhcr-prd-phm-bia-core.CB_MYSPACE_AH.Cholesterol`"
tb2 <- bq_project_query(projectid, sql2)
cholesterol <-bq_table_download(tb2)

# BMI
sql3 <- "SELECT *  FROM `yhcr-prd-phm-bia-core.CB_MYSPACE_AH.BMI`"
tb3 <- bq_project_query(projectid, sql3)
bmi <-bq_table_download(tb3)

# Smoking
sql4 <- "SELECT *  FROM `yhcr-prd-phm-bia-core.CB_MYSPACE_AH.Smoking`"
tb4 <- bq_project_query(projectid, sql4)
smoking <-bq_table_download(tb4)

# Blood Pressure
sql5 <- "SELECT *  FROM `yhcr-prd-phm-bia-core.CB_MYSPACE_AH.Blood_Pressure`"
tb5 <- bq_project_query(projectid, sql5)
bp <-bq_table_download(tb5)

# Renal Disease: Creatinine
sql6 <- "SELECT *  FROM `yhcr-prd-phm-bia-core.CB_MYSPACE_AH.Creatinine`"
tb6 <- bq_project_query(projectid, sql6)
creatinine <-bq_table_download(tb6)

# Renal Disease: ACR
sql7 <- "SELECT *  FROM `yhcr-prd-phm-bia-core.CB_MYSPACE_AH.ACR`"
tb7 <- bq_project_query(projectid, sql7)
acr <-bq_table_download(tb7)

# Renal Disease: eGFR
sql8 <- "SELECT *  FROM `yhcr-prd-phm-bia-core.CB_MYSPACE_AH.eGFR`"
tb8 <- bq_project_query(projectid, sql8)
egfr <-bq_table_download(tb8)

In [40]:
# drop BMI values <14 or >50
bmi <- bmi %>% filter(!bmi_value < 14) %>% filter(!bmi_value > 50)
nrow(bmi)

In [41]:
# Join diabetes and BMI tables
diab_bmi <- left_join(diabetes, bmi, by = "person_id")

# add indicator for bmi event closest to date of diabetes diagnosis, drop those that are > 365 days different
diab_bmi <- diab_bmi %>%
    mutate(diff_date = as.integer(abs(earliest_diabetes_diag - date_bmi))) %>%
    filter(!diff_date >= 366) %>%
    group_by(person_id) %>%
    arrange(person_id, diff_date) %>%
    mutate(order = row_number()) %>%
    ungroup() %>%
    mutate(closest_bmi = ifelse(order == 1, 1,0)) %>%
    filter(closest_bmi == 1)
head(diab_bmi)
sum(duplicated(diab_bmi))

person_id,age_t1d_diag,date_t1d_diag,t1d_status,age_t2d_diag,date_t2d_diag,t2d_status,age_earliest_diab,earliest_diabetes_diag,any_diabetes,diab_death_time,diabetes_meds,date_bmi,bmi,bmi_value,diff_date,order,closest_bmi
<int>,<int>,<date>,<int>,<int>,<date>,<int>,<int>,<date>,<int>,<int>,<int>,<date>,<int>,<dbl>,<int>,<int>,<dbl>
147,,,0,42.0,2018-01-29,1,42,2018-01-29,1,,0,2018-01-19,3,26.07897,10,1,1
230,,,0,53.0,2018-12-12,1,53,2018-12-12,1,,0,2018-12-18,3,28.44095,6,1,1
1153,19.0,2007-02-19,1,,,0,19,2007-02-19,1,,0,2007-03-15,2,22.3027,24,1,1
1168,,,0,40.0,2007-04-25,1,40,2007-04-25,1,,0,2007-04-25,4,35.0,0,1,1
1422,,,0,61.0,2013-01-10,1,61,2013-01-10,1,,0,2012-11-14,4,36.57979,57,1,1
1535,,,0,78.0,2007-04-23,1,78,2007-04-23,1,2033.0,0,2007-04-23,4,37.2,0,1,1


In [42]:
# subset bmi 
BMI <- diab_bmi %>%
select(person_id, date_bmi, bmi, closest_bmi, bmi_value) %>%
filter(!is.na(date_bmi))
head(BMI)
sum(duplicated(BMI))

person_id,date_bmi,bmi,closest_bmi,bmi_value
<int>,<date>,<int>,<dbl>,<dbl>
147,2018-01-19,3,1,26.07897
230,2018-12-18,3,1,28.44095
1153,2007-03-15,2,1,22.3027
1168,2007-04-25,4,1,35.0
1422,2012-11-14,4,1,36.57979
1535,2007-04-23,4,1,37.2


In [44]:
nrow(BMI)
table(BMI$bmi)


    1     2     3     4 
  317  4674 11055 17944 

In [45]:
# join diabetes and chol
diab_chol <- left_join(diabetes, cholesterol, by = "person_id")
# add indicator for chol event closest to date of diabetes diagnosis, drop those that are > 365 days different
diab_chol <- diab_chol %>%
    mutate(diff_date = as.integer(abs(earliest_diabetes_diag - date_cholesterol))) %>%
    filter(!diff_date >= 366) %>%
    group_by(person_id) %>%
    arrange(person_id, diff_date) %>%
    mutate(order = row_number()) %>%
    ungroup() %>%
    mutate(closest_cholesterol = ifelse(order == 1, 1,0)) %>%
    filter(closest_cholesterol == 1)
head(diab_chol)

person_id,age_t1d_diag,date_t1d_diag,t1d_status,age_t2d_diag,date_t2d_diag,t2d_status,age_earliest_diab,earliest_diabetes_diag,any_diabetes,diab_death_time,diabetes_meds,date_cholesterol,cholesterol,diff_date,order,closest_cholesterol
<int>,<int>,<date>,<int>,<int>,<date>,<int>,<int>,<date>,<int>,<int>,<int>,<date>,<int>,<int>,<int>,<dbl>
196901,23.0,2009-04-21,1,,,0,23,2009-04-21,1,,0,2009-04-16,1,5,1,1
275085,,,0,34.0,2016-06-07,1,34,2016-06-07,1,,0,2016-06-06,2,1,1,1
448093,,,0,72.0,2013-09-03,1,72,2013-09-03,1,3329.0,0,2013-09-03,1,0,1,1
512392,,,0,75.0,2010-04-13,1,75,2010-04-13,1,489.0,0,2010-05-28,1,45,1,1
605409,,,0,70.0,2014-11-24,1,70,2014-11-24,1,1421.0,0,2014-09-09,1,76,1,1
685452,,,0,42.0,2015-05-29,1,42,2015-05-29,1,,0,2015-05-26,2,3,1,1


In [46]:
# subset chol 
Chol <- diab_chol %>%
select(person_id, date_cholesterol, cholesterol, closest_cholesterol) %>%
filter(!is.na(date_cholesterol))
head(Chol)
nrow(Chol)

person_id,date_cholesterol,cholesterol,closest_cholesterol
<int>,<date>,<int>,<dbl>
196901,2009-04-16,1,1
275085,2016-06-06,2,1
448093,2013-09-03,1,1
512392,2010-05-28,1,1
605409,2014-09-09,1,1
685452,2015-05-26,2,1


In [47]:
table(Chol$cholesterol)


 1  2 
84 44 

In [48]:
# join diabetes and smoking
diab_smoke <- left_join(diabetes, smoking, by = "person_id")
# add indicator for smoke event closest to date of diabetes diagnosis, drop those that are > 365 days different
diab_smoke <- diab_smoke %>%
    mutate(diff_date = as.integer(abs(earliest_diabetes_diag - date_smoking))) %>%
    filter(!diff_date >= 366) %>%
    group_by(person_id) %>%
    arrange(person_id, diff_date) %>%
    mutate(order = row_number()) %>%
    ungroup() %>%
    mutate(closest_smoking = ifelse(order == 1, 1,0)) %>%
    filter(closest_smoking == 1)
head(diab_smoke)

person_id,age_t1d_diag,date_t1d_diag,t1d_status,age_t2d_diag,date_t2d_diag,t2d_status,age_earliest_diab,earliest_diabetes_diag,any_diabetes,diab_death_time,diabetes_meds,date_smoking,smoking,diff_date,order,closest_smoking
<int>,<int>,<date>,<int>,<int>,<date>,<int>,<int>,<date>,<int>,<int>,<int>,<date>,<int>,<int>,<int>,<dbl>
1422,,,0,61,2013-01-10,1,61,2013-01-10,1,,0,2012-11-14,1,57,1,1
1535,,,0,78,2007-04-23,1,78,2007-04-23,1,2033.0,0,2007-11-02,1,193,1,1
2115,,,0,73,2008-07-22,1,73,2008-07-22,1,754.0,0,2008-08-21,2,30,1,1
2173,,,0,58,2014-09-09,1,58,2014-09-09,1,,0,2014-09-03,1,6,1,1
2369,,,0,53,2008-10-09,1,53,2008-10-09,1,,0,2008-06-12,2,119,1,1
2427,,,0,64,2017-10-20,1,64,2017-10-20,1,,0,2017-05-02,2,171,1,1


In [49]:
# subset smoking 
Smoke <- diab_smoke %>%
select(person_id, date_smoking, smoking, closest_smoking) %>%
filter(!is.na(date_smoking))
head(Smoke)
nrow(Smoke)

person_id,date_smoking,smoking,closest_smoking
<int>,<date>,<int>,<dbl>
1422,2012-11-14,1,1
1535,2007-11-02,1,1
2115,2008-08-21,2,1
2173,2014-09-03,1,1
2369,2008-06-12,2,1
2427,2017-05-02,2,1


In [50]:
table(Smoke$smoking)


    1     2     3 
 6738 10250  1436 

In [51]:
# join diabetes and BP
diab_bp <- left_join(diabetes, bp, by = "person_id")
# add indicator for bp event closest to date of diabetes diagnosis, drop those that are > 365 days different
diab_bp <- diab_bp %>%
    mutate(diff_date = as.integer(abs(earliest_diabetes_diag - date_bp))) %>%
    filter(!diff_date >= 366) %>%
    group_by(person_id) %>%
    arrange(person_id, diff_date) %>%
    mutate(order = row_number()) %>%
    ungroup() %>%
    mutate(closest_bp = ifelse(order == 1, 1,0)) %>%
    filter(closest_bp == 1)
head(diab_bp)
sum(duplicated(diab_bp))

person_id,age_t1d_diag,date_t1d_diag,t1d_status,age_t2d_diag,date_t2d_diag,t2d_status,age_earliest_diab,earliest_diabetes_diag,any_diabetes,diab_death_time,diabetes_meds,date_bp,blood_pressure,sbp_value,dbp_value,diff_date,order,closest_bp
<int>,<int>,<date>,<int>,<int>,<date>,<int>,<int>,<date>,<int>,<int>,<int>,<date>,<int>,<dbl>,<dbl>,<int>,<int>,<dbl>
36414,,,0,67.0,2018-08-24,1,67,2018-08-24,1,,0,2018-08-24,2.0,130.0,69,0,1,1
43294,,,0,52.0,2007-02-15,1,52,2007-02-15,1,3256.0,0,2007-09-20,2.0,90.0,70,217,1,1
54241,6.0,2017-12-01,1,,,0,6,2017-12-01,1,,0,2018-01-10,,,70,40,1,1
55960,33.0,2009-06-23,1,43.0,2019-04-30,1,33,2009-06-23,1,,0,2009-11-12,3.0,176.0,106,142,1,1
74946,,,0,54.0,2017-01-27,1,54,2017-01-27,1,,0,2017-02-21,3.0,155.0,99,25,1,1
78568,,,0,83.0,2013-11-05,1,83,2013-11-05,1,,0,2013-02-04,3.0,176.0,74,274,1,1


In [52]:
# subset bp 
BP <- diab_bp %>%
select(person_id, date_bp, blood_pressure, sbp_value, dbp_value, closest_bp) %>%
filter(!is.na(date_bp))
head(BP)
nrow(BP)
sum(duplicated(BP))

person_id,date_bp,blood_pressure,sbp_value,dbp_value,closest_bp
<int>,<date>,<int>,<dbl>,<dbl>,<dbl>
36414,2018-08-24,2.0,130.0,69,1
43294,2007-09-20,2.0,90.0,70,1
54241,2018-01-10,,,70,1
55960,2009-11-12,3.0,176.0,106,1
74946,2017-02-21,3.0,155.0,99,1
78568,2013-02-04,3.0,176.0,74,1


In [53]:
table(BP$blood_pressure)


  1   2   3 
155 813 607 

In [12]:
# join diabetes and renal
#diab_renal <- left_join(diabetes, renal, by = "person_id")
# keep date of bmi event closest to date of diabetes diagnosis
#diab_renal <- diab_renal %>%
#    mutate(diff_date = abs(date_earliest_diabetes - date_renal)) %>%
#    group_by(person_id) %>%
#    slice_min(diff_date, with_ties = FALSE)
#head(diab_renal)


In [54]:
# Join diabetes and creatinine
diab_cr <- left_join(diabetes, creatinine, by = "person_id")

# add indicator for creatinine event closest to date of diabetes diagnosis, drop those that are > 365 days different
diab_cr <- diab_cr %>%
    mutate(diff_date = as.integer(abs(earliest_diabetes_diag - date_creatinine))) %>%
    filter(!diff_date >= 366) %>%
    group_by(person_id) %>%
    arrange(person_id, diff_date) %>%
    mutate(order = row_number()) %>%
    ungroup() %>%
    mutate(closest_creatinine = ifelse(order == 1, 1,0)) %>%
    filter(closest_creatinine == 1)
head(diab_cr)

person_id,age_t1d_diag,date_t1d_diag,t1d_status,age_t2d_diag,date_t2d_diag,t2d_status,age_earliest_diab,earliest_diabetes_diag,any_diabetes,diab_death_time,diabetes_meds,date_creatinine,creatinine,diff_date,order,closest_creatinine
<int>,<int>,<date>,<int>,<int>,<date>,<int>,<int>,<date>,<int>,<int>,<int>,<date>,<int>,<int>,<int>,<dbl>
147,,,0,42.0,2018-01-29,1,42,2018-01-29,1,,0,2018-01-19,3,10,1,1
1153,19.0,2007-02-19,1,,,0,19,2007-02-19,1,,0,2007-09-26,3,219,1,1
1168,,,0,40.0,2007-04-25,1,40,2007-04-25,1,,0,2007-03-23,3,33,1,1
1188,,,0,61.0,2011-06-06,1,61,2011-06-06,1,3114.0,0,2011-07-08,3,32,1,1
1422,,,0,61.0,2013-01-10,1,61,2013-01-10,1,,0,2013-08-06,3,208,1,1
1535,,,0,78.0,2007-04-23,1,78,2007-04-23,1,2033.0,0,2007-04-23,2,0,1,1


In [55]:
# subset cr 
Creatinine <- diab_cr %>%
select(person_id, date_creatinine, creatinine, closest_creatinine) %>%
filter(!is.na(date_creatinine))
head(Creatinine)
nrow(Creatinine)

person_id,date_creatinine,creatinine,closest_creatinine
<int>,<date>,<int>,<dbl>
147,2018-01-19,3,1
1153,2007-09-26,3,1
1168,2007-03-23,3,1
1188,2011-07-08,3,1
1422,2013-08-06,3,1
1535,2007-04-23,2,1


In [56]:
table(Creatinine$creatinine)


    1     2     3 
  410  9135 25563 

In [57]:
# Join diabetes and albumin:creatinine ratio
diab_acr <- left_join(diabetes, acr, by = "person_id")

# add indicator for acr event closest to date of diabetes diagnosis, drop those that are > 365 days different
diab_acr <- diab_acr %>%
    mutate(diff_date = as.integer(abs(earliest_diabetes_diag - date_acr))) %>%
    filter(!diff_date >= 366) %>%
    group_by(person_id) %>%
    arrange(person_id, diff_date) %>%
    mutate(order = row_number()) %>%
    ungroup() %>%
    mutate(closest_acr = ifelse(order == 1, 1,0)) %>%
    filter(closest_acr == 1)
head(diab_acr)

person_id,age_t1d_diag,date_t1d_diag,t1d_status,age_t2d_diag,date_t2d_diag,t2d_status,age_earliest_diab,earliest_diabetes_diag,any_diabetes,diab_death_time,diabetes_meds,date_acr,acr,diff_date,order,closest_acr
<int>,<int>,<date>,<int>,<int>,<date>,<int>,<int>,<date>,<int>,<int>,<int>,<date>,<int>,<int>,<int>,<dbl>
147,,,0,42.0,2018-01-29,1,42,2018-01-29,1,,0,2018-01-19,1,10,1,1
1153,19.0,2007-02-19,1,,,0,19,2007-02-19,1,,0,2006-08-22,1,181,1,1
1168,,,0,40.0,2007-04-25,1,40,2007-04-25,1,,0,2006-11-29,1,147,1,1
1535,,,0,78.0,2007-04-23,1,78,2007-04-23,1,2033.0,0,2007-04-23,1,0,1,1
2173,,,0,58.0,2014-09-09,1,58,2014-09-09,1,,0,2014-09-24,1,15,1,1
2427,,,0,64.0,2017-10-20,1,64,2017-10-20,1,,0,2017-10-05,1,15,1,1


In [58]:
# subset albumin:creatinine ratio
ACR <- diab_acr %>%
select(person_id, date_acr, acr, closest_acr) %>%
filter(!is.na(date_acr))
head(ACR)
nrow(ACR)

person_id,date_acr,acr,closest_acr
<int>,<date>,<int>,<dbl>
147,2018-01-19,1,1
1153,2006-08-22,1,1
1168,2006-11-29,1,1
1535,2007-04-23,1,1
2173,2014-09-24,1,1
2427,2017-10-05,1,1


In [59]:
table(ACR$acr)


    1     2     3 
22989    20  3159 

In [60]:
# Join diabetes and egfr
diab_egfr <- left_join(diabetes, egfr, by = "person_id")

# add indicator for egfr event closest to date of diabetes diagnosis, drop those that are > 365 days different
diab_egfr <- diab_egfr %>%
    mutate(diff_date = as.integer(abs(earliest_diabetes_diag - date_egfr))) %>%
    filter(!diff_date >= 366) %>%
    group_by(person_id) %>%
    arrange(person_id, diff_date) %>%
    mutate(order = row_number()) %>%
    ungroup() %>%
    mutate(closest_egfr = ifelse(order == 1, 1,0)) %>%
    filter(closest_egfr == 1)
head(diab_egfr)

person_id,age_t1d_diag,date_t1d_diag,t1d_status,age_t2d_diag,date_t2d_diag,t2d_status,age_earliest_diab,earliest_diabetes_diag,any_diabetes,diab_death_time,diabetes_meds,date_egfr,egfr,diff_date,order,closest_egfr
<int>,<int>,<date>,<int>,<int>,<date>,<int>,<int>,<date>,<int>,<int>,<int>,<date>,<int>,<int>,<int>,<dbl>
12414023,,,0,45,2014-12-08,1,45,2014-12-08,1,,0,2014-05-19,2,203,1,1
12429763,,,0,51,2018-02-23,1,51,2018-02-23,1,,0,2018-07-11,2,138,1,1
12583239,,,0,49,2018-01-17,1,49,2018-01-17,1,,0,2017-06-29,2,202,1,1
12764746,,,0,56,2016-07-15,1,56,2016-07-15,1,1279.0,0,2016-02-27,1,139,1,1
12787579,,,0,72,2018-03-16,1,72,2018-03-16,1,,0,2018-10-10,2,208,1,1
13005441,,,0,48,2018-10-15,1,48,2018-10-15,1,,0,2018-08-01,2,75,1,1


In [61]:
# subset egfr 
EGFR <- diab_egfr %>%
select(person_id, date_egfr, egfr, closest_egfr) %>%
filter(!is.na(date_egfr))
head(EGFR)
nrow(EGFR)

person_id,date_egfr,egfr,closest_egfr
<int>,<date>,<int>,<dbl>
12414023,2014-05-19,2,1
12429763,2018-07-11,2,1
12583239,2017-06-29,2,1
12764746,2016-02-27,1,1
12787579,2018-10-10,2,1
13005441,2018-08-01,2,1


In [62]:
table(EGFR$egfr)


1 2 
1 7 

In [None]:
# join all covariates to patient table
# import patient table
sql <- "SELECT *  FROM `yhcr-prd-phm-bia-core.CB_MYSPACE_AH.Patient_Denom`"
tb <- bq_project_query(projectid, sql)
patient_denom <-bq_table_download(tb)
head(patient_denom)

In [None]:
# join Chol, BMI, Smoke, BP and Renal_D to patient_denom
patient <- left_join(patient_denom, Chol)
patient <- left_join(patient, BMI)
patient <- left_join(patient, Smoke)
patient <- left_join(patient, BP)
patient <- left_join(patient, Creatinine)
patient <- left_join(patient, ACR)
patient <- left_join(patient, EGFR)

In [None]:
head(patient)
nrow(patient)
sum(!is.na(patient$cholesterol))
sum(!is.na(patient$bmi))
sum(!is.na(patient$smoking))
sum(!is.na(patient$blood_pressure))
sum(!is.na(patient$creatinine))
sum(!is.na(patient$acr))
sum(!is.na(patient$egfr))

In [22]:
patient_covs <- patient %>%
    mutate_at(c('cholesterol','bmi', 'smoking', 'blood_pressure', 'creatinine', 'acr', 'egfr'), ~replace_na(.,0))

In [23]:
table(patient_covs$gender)
table(patient_covs$ethnicity)
table(patient_covs$imd_decile)
table(patient_covs$cholesterol)
table(patient_covs$bmi)
table(patient_covs$smoking)
table(patient_covs$blood_pressure)
table(patient_covs$creatinine)
table(patient_covs$acr)
table(patient_covs$egfr)


     1      2 
385987 398199 


     1      2      3 
495036 202615  86535 


     1      2      3      4      5      6      7      8      9     10 
290711  77500  93078  74043  53405  63714  42832  40513  30533  17857 


     0      1      2 
784058     84     44 


     0      1      2      3      4 
750196    317   4674  11055  17944 


     0      1      2      3 
765762   6738  10250   1436 


     0      1      2      3 
782611    155    813    607 


     0      1      2      3 
749078    410   9135  25563 


     0      1      2      3 
758018  22989     20   3159 


     0      1      2 
784178      1      7 

In [24]:
rm(list=setdiff(ls(), "patient_covs"))
ls()

In [28]:
nrow(patient_covs)
table(patient_covs$t1d_status)

“Unknown or uninitialised column: `t1d_status`.”


< table of extent 0 >

In [25]:
#colnames(patient_covs)
#patient_bp_bmi <- patient_covs %>%
#    select(person_id, birth_datetime, death_datetime, gender, ethnicity, imd_decile, date_bmi, bmi, bmi_value, closest_bmi, date_bp, blood_pressure, sbp_value, dbp_value, closest_bp)
#patient_covs <- patient_covs %>%
#    select(-date_bmi, -bmi, -bmi_value, -closest_bmi, -date_bp, -blood_pressure, -sbp_value, -dbp_value, -closest_bp)

In [26]:
# save patient_covs as csv
write.csv(patient_covs, '/home/jupyter/T2D_CVD_Study/Patient_Covariates_Table.csv')

In [None]:
sum(duplicated(patient_covs))

In [None]:
bq_auth()

In [118]:
colnames(patient_covs)

In [120]:
# save new patient table
# set destination table - do this regardless of whether it already exists
dest_dataset <- bq_dataset("yhcr-prd-phm-bia-core","CB_MYSPACE_AH")
dest_table <- bq_table(dest_dataset, "Patient_Covariates_Table_1")

# create a full table spec as follows
# then pass this to bq_table_upload
dest_fields <- bq_fields(list(bq_field("person_id", "INT64", "REQUIRED"),
                              bq_field("birth_datetime", "DATE"),
                              bq_field("death_datetime", "DATE"),
                              bq_field("gender", "INT64"),
                              bq_field("ethnicity", "INT64"),
                              bq_field("imd_decile", "INT64"),
                              bq_field("date_cholesterol", "DATE"),
                              bq_field("cholesterol", "INT64"),
                              bq_field("closest_cholesterol", "INT64"),
                              bq_field("date_bmi", "DATE"),
                              bq_field("bmi", "INT64"),
                              bq_field("bmi_value", "FLOAT64"),
                              bq_field("closest_bmi", "INT64"),
                              bq_field("date_smoking", "DATE"),
                              bq_field("smoking", "INT64"),
                              bq_field("closest_smoking", "INT64"),
                              bq_field("date_bp", "DATE"),
                              bq_field("blood_pressure", "INT64"),
                              bq_field("sbp_value", "FLOAT64"),
                              bq_field("dbp_value", "FLOAT64"),
                              bq_field("closest_bp", "INT64"),
                              bq_field("date_creatinine", "DATE"),
                              bq_field("creatinine", "INT64"),
                              bq_field("closest_creatinine", "INT64"),
                              bq_field("date_acr", "DATE"),
                              bq_field("acr", "INT64"),
                              bq_field("closest_acr", "INT64"),
                              bq_field("date_egfr", "DATE"),
                              bq_field("egfr", "INT64"),
                              bq_field("closest_egfr", "INT64")))

# delete an existing table if necessary
if(bq_table_exists(dest_table)) bq_table_delete(dest_table)
# upload file_data to dest_table
bq_table_upload(dest_table, patient_covs, fields = dest_fields, create_disposition='CREATE_IF_NEEDED', write_disposition='WRITE_TRUNCATE')

ERROR: Error in rawToChar(rawConnectionValue(con)): long vectors not supported yet: raw.c:68


In [147]:
# save new patient table
# set destination table - do this regardless of whether it already exists
dest_dataset <- bq_dataset("yhcr-prd-phm-bia-core","CB_MYSPACE_AH")
dest_table <- bq_table(dest_dataset, "Patient_Covariates_Table1")

# create a full table spec as follows
# then pass this to bq_table_upload
dest_fields <- bq_fields(list(bq_field("person_id", "INT64", "REQUIRED"),
                              bq_field("birth_datetime", "DATE"),
                              bq_field("death_datetime", "DATE"),
                              bq_field("gender", "INT64"),
                              bq_field("ethnicity", "INT64"),
                              bq_field("imd_decile", "INT64"),
                              bq_field("date_cholesterol", "DATE"),
                              bq_field("cholesterol", "INT64"),
                              bq_field("closest_cholesterol", "INT64"),
                              bq_field("date_smoking", "DATE"),
                              bq_field("smoking", "INT64"),
                              bq_field("closest_smoking", "INT64"),
                              bq_field("date_creatinine", "DATE"),
                              bq_field("creatinine", "INT64"),
                              bq_field("closest_creatinine", "INT64"),
                              bq_field("date_acr", "DATE"),
                              bq_field("acr", "INT64"),
                              bq_field("closest_acr", "INT64"),
                              bq_field("date_egfr", "DATE"),
                              bq_field("egfr", "INT64"),
                              bq_field("closest_egfr", "INT64")))

# delete an existing table if necessary
if(bq_table_exists(dest_table)) bq_table_delete(dest_table)
# upload file_data to dest_table
bq_table_upload(dest_table, patient_covs, fields = dest_fields, create_disposition='CREATE_IF_NEEDED', write_disposition='WRITE_TRUNCATE')

ERROR: Error in rawToChar(rawConnectionValue(con)): long vectors not supported yet: raw.c:68


In [59]:
# save new patient table
# set destination table - do this regardless of whether it already exists
dest_dataset <- bq_dataset("yhcr-prd-phm-bia-core","CB_MYSPACE_AH")
dest_table <- bq_table(dest_dataset, "Patient_Covariates_Table_2")

# create a full table spec as follows
# then pass this to bq_table_upload
dest_fields <- bq_fields(list(bq_field("person_id", "INT64", "REQUIRED"),
                              bq_field("birth_datetime", "DATE"),
                              bq_field("death_datetime", "DATE"),
                              bq_field("gender", "INT64"),
                              bq_field("ethnicity", "INT64"),
                              bq_field("imd_decile", "INT64"),
                              bq_field("date_bmi", "DATE"),
                              bq_field("bmi", "INT64"),
                              bq_field("bmi_value", "FLOAT64"),
                              bq_field("closest_bmi", "INT64"),
                              bq_field("date_bp", "DATE"),
                              bq_field("blood_pressure", "INT64"),
                              bq_field("sbp_value", "FLOAT64"),
                              bq_field("dbp_value", "FLOAT64"),
                              bq_field("closest_bp", "INT64")))

# delete an existing table if necessary
if(bq_table_exists(dest_table)) bq_table_delete(dest_table)
# upload file_data to dest_table
bq_table_upload(dest_table, patient_bp_bmi, fields = dest_fields, create_disposition='CREATE_IF_NEEDED', write_disposition='WRITE_TRUNCATE')