In [152]:
# this notebook is perform anaylsis of MACE events in the care home hypertension cohort 

In [153]:
library(ggplot2)
library(bigrquery)
library(tidyverse)
library(ggplot2)
library(lubridate)
library(stats)
library(epitools)

ProjectId = "yhcr-prd-bradfor-bia-core"

targetdb3 <- 'yhcr-prd-bradfor-bia-core.CB_2172'
targetdb3 <- gsub(' ','',targetdb3)

In [154]:
#select events from the mace table, left join with admission dates and death dates from the care home cohrot master table
sql1 <- 
paste(
    'SELECT  
a.*, b.dod as death_date, b.episodestartdate as ch_admission_date
from ',targetdb3,'.cb_2172_mace a
left join ',targetdb3,'.care_home_cohort_v1 b 
on a.person_id = b.person_id
where a.person_id in(select person_id from ',targetdb3,'.care_home_cohort_v1) 

;',sep="")

mace_tbl <-bq_project_query(ProjectId,sql1)
mace_data <- bq_table_download(mace_tbl)

Auto-refreshing stale OAuth token.



In [155]:
#dod was a misnomer so correct to reflec the fact that it represents the date
mace_data$date.event <- as.Date(mace_data$dod)

mace_data <- mace_data %>% select(-dod)

In [156]:
#rank mace events for each person so we can identify first event
mace_data_event_ranked <- mace_data %>% 
                arrange(person_id, desc(date.event)) %>% 
                    mutate(event_id= rank(date.event))
#here is the initial data, the problem is there are some  duplicate events where the data differs
#by only a couple of days so they are counted as different events. 
#Some cardiovascular deaths also have  the cardiovascular event (e.g. myocardial evvent,
#recorded as a seperate event a few days apart). I need to remove these events so that only the cardiovascular death is recorded. 
#This is based on the assumption that where a stroke or MI has been recorded within 30 days of a cardiovascular death, 
# the stroke or MI can be considered fatal one so should be recorded as a CV death 
#rather than a non fatal MI or stroke. 

In [157]:
#remove mace events that occurred at same time as care home admission since these are assumed to have happened before care home admission
mace_data_event_ranked <- mace_data_event_ranked %>% filter(date.event > ch_admission_date)

In [158]:
# this identifies myocardial infarction events where another event occurs afterwards
# (either a duplicate event or death), so these myocardial events can be removed, 
#i will do the same for stroke as well. this ensures only non fatal myocardial infarctions 
# and strokes are captured in the MACE outcomes. fatal myocardial infarctions and stroke would fall into CV death 
mace_data_filtered_mi_cvd <- mace_data_event_ranked %>%
                                group_by(person_id) %>% 
                                filter(event_type == 'myocardial infarction' & 
                                !((date.event - lag(date.event)) >= 30 &
                                (date.event - lag(date.event)) > 0)) %>%
                                ungroup() 

mace_data_filtered_mi_cvd

event_type,person_id,death_date,ch_admission_date,date.event,event_id
<chr>,<int>,<date>,<date>,<date>,<dbl>
myocardial infarction,879366,2019-06-15,2018-08-15,2019-06-10,122.0
myocardial infarction,12394871,2021-04-15,2018-07-20,2019-03-12,104.0
myocardial infarction,12755225,2022-03-15,2018-12-05,2019-07-06,128.5
myocardial infarction,12831720,2018-01-15,2017-06-05,2018-01-09,54.0
myocardial infarction,13580091,2018-07-15,2018-04-23,2018-05-07,70.0
myocardial infarction,13580202,2017-12-15,2016-05-31,2016-06-06,1.0


In [159]:

# this identifies stroke events where another event occurs afterwards(either a duplicate event or death), so these myocardial events can be removed, i will do the same for stroke as well.
#this ensures only non fatal myocardial infarctions and strokes are captured in the MACE outcomes.
#fatal myocardial infarctions and stroke would fall into CV death 
mace_filtered_stroke_cvd <- mace_data_event_ranked %>%
                              group_by(person_id) %>%
                              filter(event_type == 'stroke' & 
                              !((date.event - lag(date.event)) >= 30 &
                              (date.event - lag(date.event)) > 0)) %>%
                              ungroup() 

mace_filtered_stroke_cvd

event_type,person_id,death_date,ch_admission_date,date.event,event_id
<chr>,<int>,<date>,<date>,<date>,<dbl>
stroke,12921291,2017-01-15,2016-04-26,2016-12-29,9
stroke,13424129,2019-05-15,2018-09-17,2019-05-13,118


In [160]:
#i then subsetted the mace_Data to remove those duplicate stroke and myocardial events identified above

In [161]:
#creat he not in function

'%notin%' <- Negate(`%in%`)

In [162]:
mace_data_filtered <- mace_data_event_ranked[mace_data_event_ranked$event_id %notin% mace_data_filtered_mi_cvd$event_id, ]

In [163]:
mace_data_filtered <- mace_data_filtered[mace_data_filtered$date.event %notin% mace_filtered_stroke_cvd$date.event,]

In [164]:
mace_data_filtered <- mace_data_filtered %>% arrange(person_id, desc(date.event))

table(mace_data_filtered$event_type)



             cv death myocardial infarction                stroke 
                   84                    12                    17 

In [165]:
# merge the care home cohort table in order to calculate the incidence density
#(need to subset by the cae_home_cohort_v1 table as cohort was updated in feb 2025, to exclude people with no follow-up 

In [166]:
sql1 <- 
paste(
    'SELECT a.person_id,first_episodestartdate,b.dod, hypertension
from 
',targetdb3,'.cb_2172_hypertension_combined a
left join ',targetdb3,'.care_home_cohort_v1 b 
on a.person_id = b.person_id
;',sep="")

bp_tbl <-bq_project_query(ProjectId,sql1)
ch_cohort <- bq_table_download(bp_tbl)

ch_cohort$person_id <- as.character(ch_cohort$person_id)
print(ch_cohort)

[90m# A tibble: 2,793 × 4[39m
   person_id first_episodestartdate dod    hypertension
   [3m[90m<chr>[39m[23m     [3m[90m<dttm>[39m[23m                 [3m[90m<date>[39m[23m [3m[90m<lgl>[39m[23m       
[90m 1[39m 13391566  2016-04-08 [90m00:00:00[39m    [31mNA[39m     FALSE       
[90m 2[39m 13407837  2016-05-16 [90m00:00:00[39m    [31mNA[39m     FALSE       
[90m 3[39m 13283147  2016-06-24 [90m00:00:00[39m    [31mNA[39m     FALSE       
[90m 4[39m 12807410  2016-08-08 [90m00:00:00[39m    [31mNA[39m     FALSE       
[90m 5[39m 13600637  2016-08-08 [90m00:00:00[39m    [31mNA[39m     FALSE       
[90m 6[39m 13343064  2016-08-08 [90m00:00:00[39m    [31mNA[39m     FALSE       
[90m 7[39m 13256011  2016-08-16 [90m00:00:00[39m    [31mNA[39m     FALSE       
[90m 8[39m 12515587  2016-09-16 [90m00:00:00[39m    [31mNA[39m     FALSE       
[90m 9[39m 13420182  2016-09-16 [90m00:00:00[39m    [31mNA[39m     FALSE       
[90m1

In [167]:
ch_mace_merged <- merge(x =ch_cohort, y = mace_data_filtered, all.x = TRUE)

In [168]:
ch_mace_merged$first_episodestartdate <- as_date(ch_mace_merged$first_episodestartdate)

In [169]:
ch_mace_merged <- ch_mace_merged %>% mutate(exposure_time = 0)

In [170]:
 ch_mace_merged_filtered <- ch_mace_merged %>% filter(hypertension == TRUE)

In [171]:
ch_mace_merged_filtered <-ch_mace_merged_filtered %>%  select(-"event_id","first_episodestartdate","hypertension","death_date")

In [172]:
# write a for if loop so that if the event_type occurred put that as the exposure time, but if it didn't occur check whether death date occured within 12 months, if it did
# use the death to calculate exposure time if it didn't then put the expsoure time as 365
for (i in 1:nrow(ch_mace_merged_filtered)) {
    
    if(!is.na(ch_mace_merged_filtered[i,]$event_type)) {
        ch_mace_merged_filtered[i,]$exposure_time <- ch_mace_merged_filtered[i,]$date.event - ch_mace_merged_filtered[i,]$first_episodestartdate
        
    } else if (is.na(ch_mace_merged_filtered[i,]$event_type) 
               & !is.na((ch_mace_merged_filtered[i,]$dod))
               & ((ch_mace_merged_filtered[i,]$dod - ch_mace_merged_filtered[i,]$first_episodestartdate)  < 365)) {
        ch_mace_merged_filtered[i,]$exposure_time <- (ch_mace_merged_filtered[i,]$dod - ch_mace_merged_filtered[i,]$first_episodestartdate)
    } else { 
        ch_mace_merged_filtered[i,]$exposure_time <- 365
        }
    }

In [173]:
(sum(!is.na(ch_mace_merged_filtered$event_type))/(sum(ch_mace_merged_filtered$exposure_time)/365))*100

In [174]:
ch_mace_merged_filtered$event_type <- as.factor(ch_mace_merged_filtered$event_type)

In [175]:
exposure_time <- sum(ch_mace_merged_filtered$exposure_time)/365

In [176]:
mace_events_count<- sum(!is.na(ch_mace_merged_filtered$event_type))

In [177]:
pois.exact(mace_events_count, pt = exposure_time, conf.level = 0.95)

x,pt,rate,lower,upper,conf.level
<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
113,1607.408,0.0702995,0.05793674,0.08451941,0.95
