# Setting up the environment

We'll load the needed libraries:


In [1]:
options(repr.matrix.max.rows=100, repr.matrix.max.cols=300)
options(repr.plot.width = 20, repr.plot.height = 15)
options(width=300)

numcores=8

library(tidyverse)
library(data.table)
library(fst)
library(comorbidity)
library(reshape)
library(dtplyr)
library(haven)
library(vroom)
library(dplyr)
`%!in%` = Negate(`%in%`)

setDTthreads(numcores)

── [1mAttaching packages[22m ────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.3.1 ──

[32m✔[39m [34mggplot2[39m 3.3.6     [32m✔[39m [34mpurrr  [39m 0.3.4
[32m✔[39m [34mtibble [39m 3.1.7     [32m✔[39m [34mdplyr  [39m 1.0.9
[32m✔[39m [34mtidyr  [39m 1.2.0     [32m✔[39m [34mstringr[39m 1.4.0
[32m✔[39m [34mreadr  [39m 2.1.2     [32m✔[39m [34mforcats[39m 0.5.1

── [1mConflicts[22m ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()


Attaching package: ‘data.table’


The following objects are masked from ‘package:dply

# Codes

First, we will add codes from ICD and Medicare:primary_care_specialty_codes

In [2]:
#diagnosis codes

office_visit_codes=c("99201","99202","99203","99204","99205","99211","99212","99213","99214"
                     ,"99215")

IHD_icd_9_codes=c(410, 411, 412,413,414)
IHD_icd_10_codes=c("I20", "I21", "I22", "I23", "I24", "I25")

non_us_state_codes=c(40,54,56,57,58,59,60,61,62,63,64,65,66,97,98,99)

primary_care_specialty_codes=c("01", "08", "11", "38")

#http://www.icd9data.com/2015/Volume1/390-459/401-405/default.htm
#https://www.icd10data.com/ICD10CM/Codes/I00-I99/I10-I16
hypertension_icd_9_codes=c("401","402","403","404","405")
hypertension_icd_10_codes=c("I10","I11","I12","I13","I15","I16")

#http://www.icd9data.com/2014/Volume1/290-319/295-299/296/default.htm
#https://www.icd10data.com/ICD10CM/Codes/F01-F99/F30-F39
depression_icd_9_codes=c("2962","2963")
depression_icd_10_codes=c("F32","F33")

#http://www.icd9data.com/2015/Volume1/240-279/249-259/default.htm
#https://www.icd10data.com/ICD10CM/Codes/E00-E89/E08-E13
diabetes_icd_9_codes=c("250")
diabetes_icd_10_codes=c("E08","E09","E10","E11","E13")

#http://www.icd9data.com/2014/Volume1/710-739/710-719/714/default.htm
#https://www.icd10data.com/ICD10CM/Codes/M00-M99/M05-M14
arthritis_icd_9_codes=c("714")
arthritis_icd_10_codes=c("M05","M06","M07","M08","M09","M10","M11","M12","M13","M14")





# Patient level calculations

## Yearly Calculators

These are the main functions that calculate yearly expenditures for patients and their corresponding physicians.\

### Read data from choose columns



In [3]:
carrier_data_all_years = read_fst(
    "carrier_data_all_years.fst", as.data.table = T,to = 1000000)

ERROR: Error in read_fst("carrier_data_all_years.fst", as.data.table = T, to = 1e+06): Error opening fst file for reading, please check access rights and file availability


In [None]:
outpatient_data_all_years = read_fst(
    "outpatient_data_all_years.fst", as.data.table = T,to = 1000000)
inpatient_data_all_years = read_fst(
    "inpatient_data_all_years.fst", as.data.table = T,to = 1000000)

In [None]:
mbsf_data = read_fst(
  "/work/postresearch/Shared/Projects/Data_fst/mbsf_data", as.data.table = T,to = 1000000)
revenue_center_outpatient_all_years = read_fst(
  "/work/postresearch/Shared/Projects/Data_fst/revenue_center_outpatient_all_years.fst", as.data.table = T,to = 1000000)
outpatient_and_revenue_center_data = read_fst(
  "/work/postresearch/Shared/Projects/Data_fst/outpatient_and_revenue_center_data.fst", as.data.table = T,to = 1000000)

In [None]:
#carrier_sample = tail(carrier_data_all_years,1000000)
#outpatient_sample = tail(outpatient_data_all_years,1000000)
#inpatient_sample = tail(inpatient_data_all_years,1000000)
#mbsf_sample = tail(mbsf_data,1000000)
#revenue_center_outpatient_sample=tail(revenue_center_outpatient_all_years,1000000)
#outpatient_and_revenue_center_data_sample=tail(outpatient_and_revenue_center_data,1000000)
#head(carrier_sample)
#head(outpatient_sample)
#head(inpatient_sample)
#head(mbsf_sample)
#head(revenue_center_outpatient_sample)
#head(outpatient_and_revenue_center_data_sample)

### Loading sample data (for pc)

In [4]:
sample_data=readRDS(file = "sample_data.RDS")

In [5]:
carrier_data_all_years=sample_data[[1]]
outpatient_data_all_years=sample_data[[2]]
inpatient_data_all_years=sample_data[[3]]
mbsf_data=read_fst("mbsf_data_long.fst",as.data.table=T)
revenue_center_outpatient_all_years=sample_data[[5]]
outpatient_and_revenue_center_data=sample_data[[6]]

### Patient yearly expenditures and use of services carrier

I will first create a function that adds conditions of interest to the data.


#### Finding conditions for each claim line

In [6]:
yearly_calculator_patient_conditions = function(data) {
  
  #requirements
  require(data.table)
  require(dtplyr)
  require(tidyverse)
  require(lubridate)
  
  data %>%
    mutate(
      is_office_visit = HCPCS_CD %in% office_visit_codes,
      
      is_by_primary_care_physician= PRVDR_SPCLTY %in% primary_care_specialty_codes,

      is_hypertension= if_else(
        LINE_ICD_DGNS_VRSN_CD == 0,
        substr(LINE_ICD_DGNS_CD, 0, 3) %in% hypertension_icd_10_codes,
        if_else(
          LINE_ICD_DGNS_VRSN_CD == 9,
          substr(LINE_ICD_DGNS_CD, 0, 3) %in% hypertension_icd_9_codes,NA)),
      
      is_arthritis= if_else(
        LINE_ICD_DGNS_VRSN_CD == 0,
        substr(LINE_ICD_DGNS_CD, 0, 3) %in% arthritis_icd_10_codes,
        if_else(
          LINE_ICD_DGNS_VRSN_CD == 9,
          substr(LINE_ICD_DGNS_CD, 0, 3) %in% arthritis_icd_9_codes,NA)),
      
      is_IHD = if_else(
        LINE_ICD_DGNS_VRSN_CD == 0,
        substr(LINE_ICD_DGNS_CD, 0, 3) %in% IHD_icd_10_codes,
        if_else(
          LINE_ICD_DGNS_VRSN_CD == 9,
          substr(LINE_ICD_DGNS_CD, 0, 3) %in% IHD_icd_9_codes,NA)),
      
      is_diabetes= if_else(
        LINE_ICD_DGNS_VRSN_CD == 0,
        substr(LINE_ICD_DGNS_CD, 0, 3) %in% diabetes_icd_10_codes,
        if_else(
          LINE_ICD_DGNS_VRSN_CD == 9,
          substr(LINE_ICD_DGNS_CD, 0, 3) %in% diabetes_icd_9_codes,NA)),
      
      is_depression= if_else(
        LINE_ICD_DGNS_VRSN_CD == 0,
        substr(LINE_ICD_DGNS_CD, 0, 3) %in% depression_icd_10_codes,
        if_else(
          LINE_ICD_DGNS_VRSN_CD == 9,
          substr(LINE_ICD_DGNS_CD, 0, 4) %in% depression_icd_9_codes,NA))

      
    ) %>%
    as.data.table()
}

yearly_patient_conditions_carrier=yearly_calculator_patient_conditions(carrier_data_all_years)
head(yearly_patient_conditions_carrier)


Loading required package: lubridate


Attaching package: ‘lubridate’


The following object is masked from ‘package:reshape’:

    stamp


The following objects are masked from ‘package:data.table’:

    hour, isoweek, mday, minute, month, quarter, second, wday, week, yday, year


The following objects are masked from ‘package:base’:

    date, intersect, setdiff, union




DESY_SORT_KEY,CLAIM_NO,LINE_NUM,CLM_THRU_DT,LINE_PLACE_OF_SRVC_CD,HCPCS_CD,LINE_ICD_DGNS_VRSN_CD,LINE_ICD_DGNS_CD,LINE_ALOWD_CHRG_AMT,PRF_PHYSN_NPI,PRVDR_SPCLTY,PRVDR_STATE_CD,date,year,month_year,is_office_visit,is_by_primary_care_physician,is_hypertension,is_arthritis,is_IHD,is_diabetes,is_depression
<int>,<int>,<int>,<int>,<int>,<chr>,<int>,<chr>,<dbl>,<chr>,<chr>,<int>,<date>,<dbl>,<chr>,<lgl>,<lgl>,<lgl>,<lgl>,<lgl>,<lgl>,<lgl>
100000015,2,1,20130425,22,94375,9,496,15.26,1073503884,29,22,2013-04-25,2013,2013-04,False,False,False,False,False,False,False
100000015,2,2,20130425,22,94726,9,496,13.54,1073503884,29,22,2013-04-25,2013,2013-04,False,False,False,False,False,False,False
100000015,2,3,20130425,22,94729,9,496,9.95,1073503884,29,22,2013-04-25,2013,2013-04,False,False,False,False,False,False,False
100000015,3,1,20130528,11,99214,9,41400,114.64,1285600932,11,22,2013-05-28,2013,2013-05,True,True,False,False,True,False,False
100000015,3,2,20130528,11,93000,9,41400,20.08,1285600932,11,22,2013-05-28,2013,2013-05,False,True,False,False,True,False,False
100000015,4,1,20130719,22,99213,9,496,51.76,1659344091,29,22,2013-07-19,2013,2013-07,True,False,False,False,False,False,False


#### Summarizing patient data
I will now summarise the data for each patient.


In [7]:
summarise_carrier = function(data, time_frame = 365){
  
  data%>%
    group_by(DESY_SORT_KEY,year) %>%
    summarise(
      #tot_allowed_carrier = sum(na.rm = T, LINE_ALOWD_CHRG_AMT),
      
      #office_visit_count = sum(na.rm = T, is_office_visit),
      
      #office_visit_cost_carrier = sum(na.rm = T, LINE_ALOWD_CHRG_AMT * is_office_visit),
      
      distinct_clinicians = length(unique(PRF_PHYSN_NPI)),
      
      distinct_primary_care_physicians = length(.[is_by_primary_care_physician, unique(PRF_PHYSN_NPI)]),

      hypertension = sum(is_hypertension, na.rm = T) > 0,
      
      arthritis = sum(is_arthritis, na.rm = T) > 0,
      
      IHD = sum(is_IHD, na.rm = T) > 0,
      
      diabetes = sum(is_diabetes, na.rm = T) > 0,
  
      depression = sum(is_depression, na.rm = T) > 0,
      
      icd_9_pure = ifelse(prod(LINE_ICD_DGNS_VRSN_CD, na.rm = T) == 0, F, T),
      
      icd_10_pure = ifelse(sum(LINE_ICD_DGNS_VRSN_CD, na.rm = T) == 0, T, F),
      
    ) %>%
    as.data.table()
}


summary_patient_by_year = summarise_carrier(yearly_patient_conditions_carrier)
head(summary_patient_by_year)


`summarise()` has grouped output by 'DESY_SORT_KEY'. You can override using the `.groups` argument.


DESY_SORT_KEY,year,distinct_clinicians,distinct_primary_care_physicians,hypertension,arthritis,IHD,diabetes,depression,icd_9_pure,icd_10_pure
<int>,<dbl>,<int>,<int>,<lgl>,<lgl>,<lgl>,<lgl>,<lgl>,<lgl>,<lgl>
100000015,2013,7,1,False,False,True,False,False,True,False
100000019,2013,27,2,True,False,False,False,False,True,False
100000053,2013,6,0,False,False,False,False,False,True,False
100000099,2013,29,5,True,False,False,False,False,True,False
100000129,2013,1,0,False,False,False,False,False,True,False
100000203,2013,21,4,True,False,False,False,False,True,False


In [8]:
add_patient_characteristics = function(mbsf_data,summary_data){
  require(dtplyr)
  require(lubridate)
  require(tidyverse)
  data = left_join(summary_data,mbsf_data,by=c("DESY_SORT_KEY","year")) %>% as.data.frame()
  
  data %>%
  mutate(
    year_of_death=substr(DATE_OF_DEATH,0,4)
  )%>%
  as.data.table()
}

summary_with_patient_characteristics=add_patient_characteristics(mbsf_data,summary_patient_by_year)
head(summary_with_patient_characteristics)

DESY_SORT_KEY,year,distinct_clinicians,distinct_primary_care_physicians,hypertension,arthritis,IHD,diabetes,depression,icd_9_pure,icd_10_pure,REFERENCE_YEAR,STATE_CODE,COUNTY_CODE,SEX_CODE,RACE_CODE,AGE,ORIG_REASON_FOR_ENTITLEMENT,CURR_REASON_FOR_ENTITLEMENT,ENTITLEMENT_BUY_IN_IND01,ENTITLEMENT_BUY_IN_IND02,ENTITLEMENT_BUY_IN_IND03,ENTITLEMENT_BUY_IN_IND04,ENTITLEMENT_BUY_IN_IND05,ENTITLEMENT_BUY_IN_IND06,ENTITLEMENT_BUY_IN_IND07,ENTITLEMENT_BUY_IN_IND08,ENTITLEMENT_BUY_IN_IND09,ENTITLEMENT_BUY_IN_IND10,ENTITLEMENT_BUY_IN_IND11,ENTITLEMENT_BUY_IN_IND12,HMO_INDICATOR01,HMO_INDICATOR02,HMO_INDICATOR03,HMO_INDICATOR04,HMO_INDICATOR05,HMO_INDICATOR06,HMO_INDICATOR07,HMO_INDICATOR08,HMO_INDICATOR09,HMO_INDICATOR10,HMO_INDICATOR11,HMO_INDICATOR12,VALID_DATE_OF_DEATH_SWITCH,DATE_OF_DEATH,year_of_death
<int>,<dbl>,<int>,<int>,<lgl>,<lgl>,<lgl>,<lgl>,<lgl>,<lgl>,<lgl>,<dbl>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<dbl>,<chr>
100000015,2013,7,1,False,False,True,False,False,True,False,13,22,160,1,1,76,0,0,3,3,3,3,3,3,3,3,3,3,3,3,0,0,0,0,0,0,0,0,0,0,0,0,,,
100000019,2013,27,2,True,False,False,False,False,True,False,13,7,50,2,4,76,0,0,C,C,C,C,C,C,C,C,C,C,C,C,0,0,0,0,0,0,0,0,0,0,0,0,,,
100000053,2013,6,0,False,False,False,False,False,True,False,13,33,331,1,2,77,0,0,C,C,C,C,C,C,C,C,C,C,C,C,0,0,0,0,0,0,0,0,0,0,0,0,,,
100000099,2013,29,5,True,False,False,False,False,True,False,13,10,350,1,1,82,0,0,3,3,3,3,3,3,3,3,3,3,3,3,0,0,0,0,0,0,0,0,0,0,0,0,,,
100000129,2013,1,0,False,False,False,False,False,True,False,13,7,10,1,1,52,1,1,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,,,
100000203,2013,21,4,True,False,False,False,False,True,False,13,33,510,2,1,65,1,0,3,3,3,3,3,3,3,3,3,3,3,3,0,0,0,0,0,0,0,0,0,0,0,0,,,


### Most common physicians for each patient

Now, we will find most common physicians and cardiologists for each patient.


In [9]:
#adding most common physicians
add_patient_NPI=function(data, summary_data, time_frame = 365){

  comorbidity_and_phys_data =
    inner_join(data, summary_data[, c("DESY_SORT_KEY","year",
                                "icd_9_pure",
                                "icd_10_pure")], by = c("DESY_SORT_KEY","year")) %>%
    as.data.table()
  
  patient_NPI_count_finder = function(data) {
    result = data %>%
      mutate(is_office_visit = HCPCS_CD %in% office_visit_codes)%>%
      group_by(DESY_SORT_KEY, year, PRF_PHYSN_NPI) %>%
      summarise(n = sum(is_office_visit,na.rm=T)) %>%
      filter(n>0)%>%
      arrange(.by_group = T, desc(n))
  }
  
  patient_NPI_counts = patient_NPI_count_finder(comorbidity_and_phys_data)
  
  patient_NPI_counts = left_join(patient_NPI_counts,
                                 distinct(data[, .(PRF_PHYSN_NPI, PRVDR_SPCLTY)]), by ="PRF_PHYSN_NPI")
  
  find_most_common = function(data) {
    data %>%
      group_by(DESY_SORT_KEY,year) %>%
      arrange(.by_group = T, desc(n)) %>%
      slice(1) %>%
      as.data.table()
  }
  
  find_most_common_by_specialty = function(data, specialty_code) {
    data %>%
      filter(PRVDR_SPCLTY %in% specialty_code) %>%
      group_by(DESY_SORT_KEY,year) %>%
      arrange(.by_group = T, desc(n)) %>%
      slice(1) %>%
      as.data.table()
  }
  
  most_common_physician = find_most_common(patient_NPI_counts)
  most_common_primary_care_physician = find_most_common_by_specialty(patient_NPI_counts,
                                                                     specialty_code = c("01", "08", "11", "38"))
  most_common_physician = data.frame(most_common_physician) %>%
    rename_with( ~ paste0("most_common_physician_", .x))
  most_common_primary_care_physician = data.frame(most_common_primary_care_physician) %>%
    rename_with( ~ paste0("most_common_primary_care_physician_", .x))
  
  summary_data = left_join(
    summary_data,
    most_common_physician,
    by = c("DESY_SORT_KEY" = "most_common_physician_DESY_SORT_KEY")
  )
  summary_data = left_join(
    summary_data,
    most_common_primary_care_physician,
    by = c("DESY_SORT_KEY" = "most_common_primary_care_physician_DESY_SORT_KEY")
  )%>%
  as.data.table()

}

summary_with_npi=add_patient_NPI(data = carrier_data_all_years, summary_data = summary_with_patient_characteristics)
head(summary_with_npi)


`summarise()` has grouped output by 'DESY_SORT_KEY', 'year'. You can override using the `.groups` argument.


DESY_SORT_KEY,year,distinct_clinicians,distinct_primary_care_physicians,hypertension,arthritis,IHD,diabetes,depression,icd_9_pure,icd_10_pure,REFERENCE_YEAR,STATE_CODE,COUNTY_CODE,SEX_CODE,RACE_CODE,AGE,ORIG_REASON_FOR_ENTITLEMENT,CURR_REASON_FOR_ENTITLEMENT,ENTITLEMENT_BUY_IN_IND01,ENTITLEMENT_BUY_IN_IND02,ENTITLEMENT_BUY_IN_IND03,ENTITLEMENT_BUY_IN_IND04,ENTITLEMENT_BUY_IN_IND05,ENTITLEMENT_BUY_IN_IND06,ENTITLEMENT_BUY_IN_IND07,ENTITLEMENT_BUY_IN_IND08,ENTITLEMENT_BUY_IN_IND09,ENTITLEMENT_BUY_IN_IND10,ENTITLEMENT_BUY_IN_IND11,ENTITLEMENT_BUY_IN_IND12,HMO_INDICATOR01,HMO_INDICATOR02,HMO_INDICATOR03,HMO_INDICATOR04,HMO_INDICATOR05,HMO_INDICATOR06,HMO_INDICATOR07,HMO_INDICATOR08,HMO_INDICATOR09,HMO_INDICATOR10,HMO_INDICATOR11,HMO_INDICATOR12,VALID_DATE_OF_DEATH_SWITCH,DATE_OF_DEATH,year_of_death,most_common_physician_year,most_common_physician_PRF_PHYSN_NPI,most_common_physician_n,most_common_physician_PRVDR_SPCLTY,most_common_primary_care_physician_year,most_common_primary_care_physician_PRF_PHYSN_NPI,most_common_primary_care_physician_n,most_common_primary_care_physician_PRVDR_SPCLTY
<int>,<dbl>,<int>,<int>,<lgl>,<lgl>,<lgl>,<lgl>,<lgl>,<lgl>,<lgl>,<dbl>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<dbl>,<chr>,<dbl>,<chr>,<int>,<chr>,<dbl>,<chr>,<int>,<chr>
100000015,2013,7,1,False,False,True,False,False,True,False,13,22,160,1,1,76,0,0,3,3,3,3,3,3,3,3,3,3,3,3,0,0,0,0,0,0,0,0,0,0,0,0,,,,2013.0,1285600932.0,2.0,11.0,2013.0,1285600932.0,2.0,11.0
100000019,2013,27,2,True,False,False,False,False,True,False,13,7,50,2,4,76,0,0,C,C,C,C,C,C,C,C,C,C,C,C,0,0,0,0,0,0,0,0,0,0,0,0,,,,2013.0,1609857119.0,14.0,90.0,2013.0,1558465849.0,3.0,11.0
100000053,2013,6,0,False,False,False,False,False,True,False,13,33,331,1,2,77,0,0,C,C,C,C,C,C,C,C,C,C,C,C,0,0,0,0,0,0,0,0,0,0,0,0,,,,2013.0,1417030412.0,5.0,48.0,,,,
100000099,2013,29,5,True,False,False,False,False,True,False,13,10,350,1,1,82,0,0,3,3,3,3,3,3,3,3,3,3,3,3,0,0,0,0,0,0,0,0,0,0,0,0,,,,2013.0,1598867251.0,13.0,11.0,2013.0,1598867251.0,13.0,11.0
100000129,2013,1,0,False,False,False,False,False,True,False,13,7,10,1,1,52,1,1,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,,,,,,,,,,,
100000203,2013,21,4,True,False,False,False,False,True,False,13,33,510,2,1,65,1,0,3,3,3,3,3,3,3,3,3,3,3,3,0,0,0,0,0,0,0,0,0,0,0,0,,,,2013.0,1134153430.0,9.0,9.0,2013.0,1770541260.0,4.0,11.0


## Physician integration status

Here, I will find which physicians are integrated.


### A method of finding codes that are not exclusive to hospitals or non-hospital places
We can exclude these HCPCS codes and only include codes that are not exclusive to hospitals.

In [10]:
exclusive_hospital_code_finder = function (data,
                                           threshold=0.05,
                                           integrated_place_of_service_codes = c("19", "22"),
                                           all_place_of_service_codes = c("11", "19", "22")){
  require(dtplyr)
  require(tidyverse)
  
  result = data %>%
  filter(LINE_PLACE_OF_SRVC_CD %in% all_place_of_service_codes)%>%
  group_by(HCPCS_CD) %>%
  summarise(prp_in_facility = nrow(.[LINE_PLACE_OF_SRVC_CD %in% integrated_place_of_service_codes])/n()
           )%>%
  as.data.table
  
  exclusive_codes = result[prp_in_facility<threshold | prp_in_facility>(1-threshold),HCPCS_CD]
  

  return(exclusive_codes)
  
}

exclusive_hospital_codes=exclusive_hospital_code_finder(carrier_data_all_years)

In [11]:
exclusive_hospital_codes

### A function to find integrated docs

In [12]:
#calculate and add physician integration data
#this only uses visits to see if a physician is integrated or not (codde list)

physician_integration_finder = function(data,
                                        integrated_place_of_service_codes = c("19", "22"),
                                        all_place_of_service_codes = c("11", "19", "22"),
                                        #integration_threshold = 0.5,
                                        office_code_list = c(
                                          "99201",
                                          "99202",
                                          "99203",
                                          "99204",
                                          "99205",
                                          "99211",
                                          "99212",
                                          "99213",
                                          "99214",
                                          "99215"
                                        ),
                                       exclusive_hospital_codes) {
  require(dtplyr)
  require(tidyverse)
  
  #data = subset(data, HCPCS_CD %in% code_list)
  result = data %>%
  mutate(
    is_facility = LINE_PLACE_OF_SRVC_CD %in% integrated_place_of_service_codes,
    is_all = LINE_PLACE_OF_SRVC_CD %in% all_place_of_service_codes,
    is_office_visit = HCPCS_CD %in% office_code_list,
    has_non_exclusive_code = HCPCS_CD %!in% exclusive_hospital_codes
  ) %>%
  group_by(PRF_PHYSN_NPI, year) %>%
  summarise(
    in_facility_visits_count = sum(is_facility*is_office_visit, na.rm = T),
    in_all_visits_count = sum(is_all*is_office_visit, na.rm = T),
    in_facility_non_exclusive_HCPCS_count = sum(is_facility*has_non_exclusive_code, na.rm = T),
    in_all_non_exclusive_HCPCS_count = sum(is_all*has_non_exclusive_code, na.rm = T),
    in_facility_count = sum(is_facility, na.rm = T),
    in_all_count = sum(is_all, na.rm = T)
  ) %>%
  mutate(
    in_facility_visits_prp = in_facility_visits_count / in_all_visits_count,
    in_facility_non_exclusive_HCPCS_prp = in_facility_non_exclusive_HCPCS_count / in_all_non_exclusive_HCPCS_count,    
    in_facility_prp = in_facility_count / in_all_count
  )%>%
  as.data.table()
}

physician_integration_stats = physician_integration_finder(carrier_data_all_years,exclusive_hospital_codes=exclusive_hospital_codes)

`summarise()` has grouped output by 'PRF_PHYSN_NPI'. You can override using the `.groups` argument.


In [13]:
tail(physician_integration_stats)

PRF_PHYSN_NPI,year,in_facility_visits_count,in_all_visits_count,in_facility_non_exclusive_HCPCS_count,in_all_non_exclusive_HCPCS_count,in_facility_count,in_all_count,in_facility_visits_prp,in_facility_non_exclusive_HCPCS_prp,in_facility_prp
<chr>,<dbl>,<int>,<int>,<int>,<int>,<int>,<int>,<dbl>,<dbl>,<dbl>
1992997522,2013,2,2,10,10,10,10,1.0,1.0,1.0
1992998157,2013,0,0,0,0,0,0,,,
1992998207,2013,0,0,0,0,0,5,,,0.0
1992999122,2013,0,8,0,8,0,8,0.0,0.0,0.0
9999999991,2013,0,1,0,2,0,2,0.0,0.0,0.0
9999999992,2013,0,3,0,16,0,71,0.0,0.0,0.0


### Add integration status of physicians
This function will add the integration status of most common physicians to each patient's summary data.

In [None]:
#rename columns
rename_last = function(data, how_many, new_names) {
  total_cols = ncol(data)
  setnames(data, (total_cols - how_many + 1):(total_cols), new_names)
}
add_integration_status=function(data, physician_integration_stats){
  
  data_selected=data[,c("DESY_SORT_KEY",
               "year",
               "most_common_physician_PRF_PHYSN_NPI",
               "most_common_primary_care_physician_PRF_PHYSN_NPI"
              )]
  
  most_common_physician = left_join(
    data_selected,
    physician_integration_stats,
    by = c(
      "most_common_physician_PRF_PHYSN_NPI" = "PRF_PHYSN_NPI", "year" = "year")
  ) %>% as.data.table()
  
  most_common_physician=most_common_physician[,-c("most_common_primary_care_physician_PRF_PHYSN_NPI")]
  
  rename_last(
    most_common_physician,
    ncol(physician_integration_stats)-2,
    paste("most_common_physician_",colnames(physician_integration_stats)[3:ncol(physician_integration_stats)],sep="")
    )
    
    
  most_common_primary_care = left_join(
    data_selected,
    physician_integration_stats,
    by = c(
      "most_common_primary_care_physician_PRF_PHYSN_NPI" = "PRF_PHYSN_NPI", "year" = "year")
  ) %>% as.data.table()
  
  most_common_primary_care=most_common_primary_care[,-c("most_common_physician_PRF_PHYSN_NPI")]  

  rename_last(
    most_common_primary_care,
    ncol(physician_integration_stats)-2,
    paste("most_common_primary_care_physician_",colnames(physician_integration_stats)[3:ncol(physician_integration_stats)],sep="")
    )
  
  physician_data=
  full_join(most_common_physician[,-("most_common_physician_PRF_PHYSN_NPI")],
            most_common_primary_care[,-("most_common_primary_care_physician_PRF_PHYSN_NPI")],
            by=c("DESY_SORT_KEY","year")
           )
  result=full_join(data,
                   physician_data,
                   by=c("DESY_SORT_KEY","year")
           )%>%
  as.data.table
  
  
  return(result)
}

summary_with_physician_integration_stats=add_integration_status(data = summary_with_npi,
                                                                physician_integration_stats = physician_integration_stats)






In [15]:
head(summary_with_physician_integration_stats)

DESY_SORT_KEY,year,distinct_clinicians,distinct_primary_care_physicians,hypertension,arthritis,IHD,diabetes,depression,icd_9_pure,icd_10_pure,REFERENCE_YEAR,STATE_CODE,COUNTY_CODE,SEX_CODE,RACE_CODE,AGE,ORIG_REASON_FOR_ENTITLEMENT,CURR_REASON_FOR_ENTITLEMENT,ENTITLEMENT_BUY_IN_IND01,ENTITLEMENT_BUY_IN_IND02,ENTITLEMENT_BUY_IN_IND03,ENTITLEMENT_BUY_IN_IND04,ENTITLEMENT_BUY_IN_IND05,ENTITLEMENT_BUY_IN_IND06,ENTITLEMENT_BUY_IN_IND07,ENTITLEMENT_BUY_IN_IND08,ENTITLEMENT_BUY_IN_IND09,ENTITLEMENT_BUY_IN_IND10,ENTITLEMENT_BUY_IN_IND11,ENTITLEMENT_BUY_IN_IND12,HMO_INDICATOR01,HMO_INDICATOR02,HMO_INDICATOR03,HMO_INDICATOR04,HMO_INDICATOR05,HMO_INDICATOR06,HMO_INDICATOR07,HMO_INDICATOR08,HMO_INDICATOR09,HMO_INDICATOR10,HMO_INDICATOR11,HMO_INDICATOR12,VALID_DATE_OF_DEATH_SWITCH,DATE_OF_DEATH,year_of_death,most_common_physician_year,most_common_physician_PRF_PHYSN_NPI,most_common_physician_n,most_common_physician_PRVDR_SPCLTY,most_common_primary_care_physician_year,most_common_primary_care_physician_PRF_PHYSN_NPI,most_common_primary_care_physician_n,most_common_primary_care_physician_PRVDR_SPCLTY,most_common_physician_in_facility_visits_count,most_common_physician_in_all_visits_count,most_common_physician_in_facility_non_exclusive_HCPCS_count,most_common_physician_in_all_non_exclusive_HCPCS_count,most_common_physician_in_facility_count,most_common_physician_in_all_count,most_common_physician_in_facility_visits_prp,most_common_physician_in_facility_non_exclusive_HCPCS_prp,most_common_physician_in_facility_prp,most_common_primary_care_physician_in_facility_visits_count,most_common_primary_care_physician_in_all_visits_count,most_common_primary_care_physician_in_facility_non_exclusive_HCPCS_count,most_common_primary_care_physician_in_all_non_exclusive_HCPCS_count,most_common_primary_care_physician_in_facility_count,most_common_primary_care_physician_in_all_count,most_common_primary_care_physician_in_facility_visits_prp,most_common_primary_care_physician_in_facility_non_exclusive_HCPCS_prp,most_common_primary_care_physician_in_facility_prp
<int>,<dbl>,<int>,<int>,<lgl>,<lgl>,<lgl>,<lgl>,<lgl>,<lgl>,<lgl>,<dbl>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<dbl>,<chr>,<dbl>,<chr>,<int>,<chr>,<dbl>,<chr>,<int>,<chr>,<int>,<int>,<int>,<int>,<int>,<int>,<dbl>,<dbl>,<dbl>,<int>,<int>,<int>,<int>,<int>,<int>,<dbl>,<dbl>,<dbl>
100000015,2013,7,1,False,False,True,False,False,True,False,13,22,160,1,1,76,0,0,3,3,3,3,3,3,3,3,3,3,3,3,0,0,0,0,0,0,0,0,0,0,0,0,,,,2013.0,1285600932.0,2.0,11.0,2013.0,1285600932.0,2.0,11.0,0.0,2.0,0.0,2.0,0.0,4.0,0.0,0.0,0.0,0.0,2.0,0.0,2.0,0.0,4.0,0.0,0.0,0.0
100000019,2013,27,2,True,False,False,False,False,True,False,13,7,50,2,4,76,0,0,C,C,C,C,C,C,C,C,C,C,C,C,0,0,0,0,0,0,0,0,0,0,0,0,,,,2013.0,1609857119.0,14.0,90.0,2013.0,1558465849.0,3.0,11.0,0.0,16.0,0.0,30.0,0.0,168.0,0.0,0.0,0.0,0.0,3.0,0.0,5.0,0.0,47.0,0.0,0.0,0.0
100000053,2013,6,0,False,False,False,False,False,True,False,13,33,331,1,2,77,0,0,C,C,C,C,C,C,C,C,C,C,C,C,0,0,0,0,0,0,0,0,0,0,0,0,,,,2013.0,1417030412.0,5.0,48.0,,,,,0.0,5.0,0.0,5.0,0.0,5.0,0.0,0.0,0.0,,,,,,,,,
100000099,2013,29,5,True,False,False,False,False,True,False,13,10,350,1,1,82,0,0,3,3,3,3,3,3,3,3,3,3,3,3,0,0,0,0,0,0,0,0,0,0,0,0,,,,2013.0,1598867251.0,13.0,11.0,2013.0,1598867251.0,13.0,11.0,0.0,16.0,0.0,18.0,0.0,28.0,0.0,0.0,0.0,0.0,16.0,0.0,18.0,0.0,28.0,0.0,0.0,0.0
100000129,2013,1,0,False,False,False,False,False,True,False,13,7,10,1,1,52,1,1,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
100000203,2013,21,4,True,False,False,False,False,True,False,13,33,510,2,1,65,1,0,3,3,3,3,3,3,3,3,3,3,3,3,0,0,0,0,0,0,0,0,0,0,0,0,,,,2013.0,1134153430.0,9.0,9.0,2013.0,1770541260.0,4.0,11.0,0.0,10.0,0.0,18.0,0.0,81.0,0.0,0.0,0.0,0.0,4.0,0.0,4.0,0.0,6.0,0.0,0.0,0.0


# Comparisons and analyses

## Reading the patient and physician integration results

In [92]:
yearly_calculations_stable_angina =
read_fst("results_may/yearly_calculations_stable_angina_with_integration.fst"
         ,as.data.table = T) 
yearly_calculations_unstable_angina =
read_fst("results_may/yearly_calculations_unstable_angina_with_integration.fst"
         ,as.data.table = T)
MI_dates=
read_fst("results_may/MI_DATES.fst", as.data.table = T)
#physician_integration_stats = 
#read_fst("results_feb/physician_integration_stats.fst"
#        ,as.data.table = T)
#melted_physician_integration_stats=
#read_fst("results_feb/melted_physician_integration_stats.fst"
#        ,as.data.table = T)

## Adding metropolitan status
I will add the metropolitan statuses of the patient counties using the USDA data

In [97]:
rural_urban_data = readxl::read_xls("physician_data/ruralurbancodes2013.xls")%>%as.data.table()
head(rural_urban_data)
cross_walk_rural_urban=read.csv(file = "physician_data/xwalk2018.csv")%>%as.data.table()
head(cross_walk_rural_urban)

FIPS,State,County_Name,Population_2010,RUCC_2013,Description
<chr>,<chr>,<chr>,<dbl>,<dbl>,<chr>
1001,AL,Autauga County,54571,2,"Metro - Counties in metro areas of 250,000 to 1 million population"
1003,AL,Baldwin County,182265,3,"Metro - Counties in metro areas of fewer than 250,000 population"
1005,AL,Barbour County,27457,6,"Nonmetro - Urban population of 2,500 to 19,999, adjacent to a metro area"
1007,AL,Bibb County,22915,1,Metro - Counties in metro areas of 1 million population or more
1009,AL,Blount County,57322,1,Metro - Counties in metro areas of 1 million population or more
1011,AL,Bullock County,10914,6,"Nonmetro - Urban population of 2,500 to 19,999, adjacent to a metro area"


County.Name,State,SSACD,FIPS.County.Code,CBSA,CBSA.Name
<chr>,<chr>,<int>,<int>,<int>,<chr>
AUTAUGA,AL,1000,1001,33860.0,"Montgomery, AL"
BALDWIN,AL,1010,1003,19300.0,"Daphne-Fairhope-Foley, AL"
BARBOUR,AL,1020,1005,,
BIBB,AL,1030,1007,13820.0,"Birmingham-Hoover, AL"
BLOUNT,AL,1040,1009,13820.0,"Birmingham-Hoover, AL"
BULLOCK,AL,1050,1011,,


In [98]:
yearly_calculations_stable_angina[,SSACD:=as.integer(paste(state_code_at_diagnosis,county_code_at_diagnosis,sep=""))]
yearly_calculations_stable_angina=left_join(yearly_calculations_stable_angina,cross_walk_rural_urban,by="SSACD")%>%
mutate(FIPS.County.Code=as.character(FIPS.County.Code))%>%
left_join(.,rural_urban_data,by=c("FIPS.County.Code"="FIPS"))%>%
as.data.table()
yearly_calculations_stable_angina[,c("SSACD","County.Name","State.x","FIPS.County.Code","CBSA","CBSA.Name","State.y","County_Name","Population_2010","Description"):=NULL]
head(yearly_calculations_stable_angina)

“NAs introduced by coercion”


DESY_SORT_KEY,first_diagnosis,tot_allowed_carrier,office_visit_count,office_visit_cost_carrier,cardiology_visit_count,distinct_clinicians,distinct_cardiologists,distinct_primary_care_physicians,distinct_surgeons,distinct_other_specialties,catheterization_count,catheterization_cost_carrier,ecg_count,ecg_cost_carrier,cardiac_ct_count,cardiac_ct_cost_carrier,cardiac_mri_count,cardiac_mri_cost_carrier,stress_test_count,stress_test_cost_carrier,echocardiography_count,echocardiography_cost_carrier,angioplasty_count,angioplasty_cost_carrier,CABG_count,CABG_cost_carrier,stable_angina,unstable_angina,MI,cardiac_arrest,stroke,hypertension,copd,obesity,depression,diabetes,icd_9_pure,icd_10_pure,catheterization_date,angioplasty_date,catheterization_doc_NPI,angioplasty_doc_NPI,diagnosing_doc_NPI,year_first_diagnosed,time_difference_angio_cath,angioplasty_with_cath,angioplasty_during_year_after_cath,tot_allowed_carrier_cardiology_related,office_visit_count_cardiology_related,office_visit_cost_carrier_cardiology_related,cardiology_visit_count_cardiology_related,distinct_clinicians_cardiology_related,distinct_cardiologists_cardiology_related,distinct_primary_care_physicians_cardiology_related,distinct_surgeons_cardiology_related,distinct_other_specialties_cardiology_related,catheterization_count_cardiology_related,catheterization_cost_carrier_cardiology_related,ecg_count_cardiology_related,ecg_cost_carrier_cardiology_related,cardiac_ct_count_cardiology_related,cardiac_ct_cost_carrier_cardiology_related,cardiac_mri_count_cardiology_related,cardiac_mri_cost_carrier_cardiology_related,stress_test_count_cardiology_related,stress_test_cost_carrier_cardiology_related,echocardiography_count_cardiology_related,echocardiography_cost_carrier_cardiology_related,angioplasty_count_cardiology_related,angioplasty_cost_carrier_cardiology_related,CABG_count_cardiology_related,CABG_cost_carrier_cardiology_related,office_visit_cost_outpatient,catheterization_cost_outpatient,ecg_cost_outpatient,cardiac_ct_cost_outpatient,cardiac_mri_cost_outpatient,stress_test_cost_outpatient,echocardiography_cost_outpatient,angioplasty_cost_outpatient,CABG_cost_outpatient,office_visit_cost_outpatient_cardiology_related,catheterization_cost_outpatient_cardiology_related,ecg_cost_outpatient_cardiology_related,cardiac_ct_cost_outpatient_cardiology_related,cardiac_mri_cost_outpatient_cardiology_related,stress_test_cost_outpatient_cardiology_related,echocardiography_cost_outpatient_cardiology_related,angioplasty_cost_outpatient_cardiology_related,CABG_cost_outpatient_cardiology_related,office_visit_cost,catheterization_cost,ecg_cost,cardiac_ct_cost,cardiac_mri_cost,stress_test_cost,echocardiography_cost,angioplasty_cost,CABG_cost,office_visit_cost_cardiology_related,catheterization_cost_cardiology_related,ecg_cost_cardiology_related,cardiac_ct_cost_cardiology_related,cardiac_mri_cost_cardiology_related,stress_test_cost_cardiology_related,echocardiography_cost_cardiology_related,angioplasty_cost_cardiology_related,CABG_cost_cardiology_related,tot_cheap_prcdr_cost,tot_expensive_prcdr_cost,tot_cheap_prcdr_cost_cardiology_related,tot_expensive_prcdr_cost_cardiology_related,tot_cheap_prcdr_count,tot_expensive_prcdr_count,tot_cheap_prcdr_count_cardiology_related,tot_expensive_prcdr_count_cardiology_related,REFERENCE_YEAR_2013,STATE_CODE_2013,COUNTY_CODE_2013,SEX_CODE_2013,RACE_CODE_2013,AGE_2013,ORIG_REASON_FOR_ENTITLEMENT_2013,CURR_REASON_FOR_ENTITLEMENT_2013,ENTITLEMENT_BUY_IN_IND01_2013,ENTITLEMENT_BUY_IN_IND02_2013,ENTITLEMENT_BUY_IN_IND03_2013,ENTITLEMENT_BUY_IN_IND04_2013,ENTITLEMENT_BUY_IN_IND05_2013,ENTITLEMENT_BUY_IN_IND06_2013,ENTITLEMENT_BUY_IN_IND07_2013,ENTITLEMENT_BUY_IN_IND08_2013,ENTITLEMENT_BUY_IN_IND09_2013,ENTITLEMENT_BUY_IN_IND10_2013,ENTITLEMENT_BUY_IN_IND11_2013,ENTITLEMENT_BUY_IN_IND12_2013,HMO_INDICATOR01_2013,HMO_INDICATOR02_2013,HMO_INDICATOR03_2013,HMO_INDICATOR04_2013,HMO_INDICATOR05_2013,HMO_INDICATOR06_2013,HMO_INDICATOR07_2013,HMO_INDICATOR08_2013,HMO_INDICATOR09_2013,HMO_INDICATOR10_2013,HMO_INDICATOR11_2013,HMO_INDICATOR12_2013,VALID_DATE_OF_DEATH_SWITCH_2013,⋯,ENTITLEMENT_BUY_IN_IND05_2020,ENTITLEMENT_BUY_IN_IND06_2020,ENTITLEMENT_BUY_IN_IND07_2020,ENTITLEMENT_BUY_IN_IND08_2020,ENTITLEMENT_BUY_IN_IND09_2020,ENTITLEMENT_BUY_IN_IND10_2020,ENTITLEMENT_BUY_IN_IND11_2020,ENTITLEMENT_BUY_IN_IND12_2020,HMO_INDICATOR01_2020,HMO_INDICATOR02_2020,HMO_INDICATOR03_2020,HMO_INDICATOR04_2020,HMO_INDICATOR05_2020,HMO_INDICATOR06_2020,HMO_INDICATOR07_2020,HMO_INDICATOR08_2020,HMO_INDICATOR09_2020,HMO_INDICATOR10_2020,HMO_INDICATOR11_2020,HMO_INDICATOR12_2020,VALID_DATE_OF_DEATH_SWITCH_2020,DATE_OF_DEATH_2020,deate_of_death_collapsed,death_validity_collapsed,date_of_death_collapsed,state_code_at_diagnosis,county_code_at_diagnosis,sex_code_at_diagnosis,race_code_at_diagnosis,age_at_diagnosis,ENTITLEMENT_BUY_IN_IND_sum,HMO_INDICATOR_sum,died_in_one_year_after_diagnosis,died_in_two_years_after_diagnosis,date_difference_diagnosis_death,score,index,wscore,windex,most_common_physician_PRF_PHYSN_NPI,most_common_physician_n,most_common_physician_PRVDR_SPCLTY,most_common_primary_care_physician_PRF_PHYSN_NPI,most_common_primary_care_physician_n,most_common_primary_care_physician_PRVDR_SPCLTY,most_common_cardiologist_PRF_PHYSN_NPI,most_common_cardiologist_n,most_common_cardiologist_PRVDR_SPCLTY,most_common_physician_2013_PRF_PHYSN_NPI,most_common_physician_2013_n,most_common_physician_2013_PRVDR_SPCLTY,most_common_primary_care_physician_2013_PRF_PHYSN_NPI,most_common_primary_care_physician_2013_n,most_common_primary_care_physician_2013_PRVDR_SPCLTY,most_common_cardiologist_2013_PRF_PHYSN_NPI,most_common_cardiologist_2013_n,most_common_cardiologist_2013_PRVDR_SPCLTY,tot_allowed_outpatient,tot_allowed_inpatient,number_of_hospitalizations,was_hospitalized,tot_allowed_outpatient_cardiology_related,tot_allowed_inpatient_cardiology_related,number_of_hospitalizations_cardiology_related,was_hospitalized_cardiology_related,total_exp,total_exp_cardiology_related,most_common_physician_prp_in_year_of_diagnosis_05_non_exclusive_HCPCS,most_common_physician_prp_in_2013_05_non_exclusive_HCPCS,most_common_physician_prp_in_year_before_diagnosis_05_non_exclusive_HCPCS,most_common_physician_prp_in_year_of_diagnosis_03_non_exclusive_HCPCS,most_common_physician_prp_in_2013_03_non_exclusive_HCPCS,most_common_physician_prp_in_year_before_diagnosis_03_non_exclusive_HCPCS,most_common_physician_avg_in_year_of_diagnosis_non_exclusive_HCPCS,most_common_physician_avg_in_2013_non_exclusive_HCPCS,most_common_physician_avg_in_year_before_diagnosis_non_exclusive_HCPCS,most_common_primary_care_physician_prp_in_year_of_diagnosis_05_non_exclusive_HCPCS,most_common_primary_care_physician_prp_in_2013_05_non_exclusive_HCPCS,most_common_primary_care_physician_prp_in_year_before_diagnosis_05_non_exclusive_HCPCS,most_common_primary_care_physician_prp_in_year_of_diagnosis_03_non_exclusive_HCPCS,most_common_primary_care_physician_prp_in_2013_03_non_exclusive_HCPCS,most_common_primary_care_physician_prp_in_year_before_diagnosis_03_non_exclusive_HCPCS,most_common_primary_care_physician_avg_in_year_of_diagnosis_non_exclusive_HCPCS,most_common_primary_care_physician_avg_in_2013_non_exclusive_HCPCS,most_common_primary_care_physician_avg_in_year_before_diagnosis_non_exclusive_HCPCS,most_common_cardiologist_prp_in_year_of_diagnosis_05_non_exclusive_HCPCS,most_common_cardiologist_prp_in_2013_05_non_exclusive_HCPCS,most_common_cardiologist_prp_in_year_before_diagnosis_05_non_exclusive_HCPCS,most_common_cardiologist_prp_in_year_of_diagnosis_03_non_exclusive_HCPCS,most_common_cardiologist_prp_in_2013_03_non_exclusive_HCPCS,most_common_cardiologist_prp_in_year_before_diagnosis_03_non_exclusive_HCPCS,most_common_cardiologist_avg_in_year_of_diagnosis_non_exclusive_HCPCS,most_common_cardiologist_avg_in_2013_non_exclusive_HCPCS,most_common_cardiologist_avg_in_year_before_diagnosis_non_exclusive_HCPCS,diagnosing_doc_prp_in_year_of_diagnosis_05_non_exclusive_HCPCS,diagnosing_doc_prp_in_2013_05_non_exclusive_HCPCS,diagnosing_doc_prp_in_year_before_diagnosis_05_non_exclusive_HCPCS,diagnosing_doc_prp_in_year_of_diagnosis_03_non_exclusive_HCPCS,diagnosing_doc_prp_in_2013_03_non_exclusive_HCPCS,diagnosing_doc_prp_in_year_before_diagnosis_03_non_exclusive_HCPCS,diagnosing_doc_avg_in_year_of_diagnosis_non_exclusive_HCPCS,diagnosing_doc_avg_in_2013_non_exclusive_HCPCS,diagnosing_doc_avg_in_year_before_diagnosis_non_exclusive_HCPCS,catheterization_doc_prp_in_year_of_diagnosis_05_non_exclusive_HCPCS,catheterization_doc_prp_in_2013_05_non_exclusive_HCPCS,catheterization_doc_prp_in_year_before_diagnosis_05_non_exclusive_HCPCS,catheterization_doc_prp_in_year_of_diagnosis_03_non_exclusive_HCPCS,catheterization_doc_prp_in_2013_03_non_exclusive_HCPCS,catheterization_doc_prp_in_year_before_diagnosis_03_non_exclusive_HCPCS,catheterization_doc_avg_in_year_of_diagnosis_non_exclusive_HCPCS,catheterization_doc_avg_in_2013_non_exclusive_HCPCS,catheterization_doc_avg_in_year_before_diagnosis_non_exclusive_HCPCS,angioplasty_doc_prp_in_year_of_diagnosis_05_non_exclusive_HCPCS,angioplasty_doc_prp_in_2013_05_non_exclusive_HCPCS,angioplasty_doc_prp_in_year_before_diagnosis_05_non_exclusive_HCPCS,angioplasty_doc_prp_in_year_of_diagnosis_03_non_exclusive_HCPCS,angioplasty_doc_prp_in_2013_03_non_exclusive_HCPCS,angioplasty_doc_prp_in_year_before_diagnosis_03_non_exclusive_HCPCS,angioplasty_doc_avg_in_year_of_diagnosis_non_exclusive_HCPCS,angioplasty_doc_avg_in_2013_non_exclusive_HCPCS,angioplasty_doc_avg_in_year_before_diagnosis_non_exclusive_HCPCS,most_common_physician_2013_prp_in_year_of_diagnosis_05_non_exclusive_HCPCS,most_common_physician_2013_prp_in_2013_05_non_exclusive_HCPCS,most_common_physician_2013_prp_in_year_before_diagnosis_05_non_exclusive_HCPCS,most_common_physician_2013_prp_in_year_of_diagnosis_03_non_exclusive_HCPCS,most_common_physician_2013_prp_in_2013_03_non_exclusive_HCPCS,most_common_physician_2013_prp_in_year_before_diagnosis_03_non_exclusive_HCPCS,most_common_physician_2013_avg_in_year_of_diagnosis_non_exclusive_HCPCS,most_common_physician_2013_avg_in_2013_non_exclusive_HCPCS,most_common_physician_2013_avg_in_year_before_diagnosis_non_exclusive_HCPCS,most_common_primary_care_physician_2013_prp_in_year_of_diagnosis_05_non_exclusive_HCPCS,most_common_primary_care_physician_2013_prp_in_2013_05_non_exclusive_HCPCS,most_common_primary_care_physician_2013_prp_in_year_before_diagnosis_05_non_exclusive_HCPCS,most_common_primary_care_physician_2013_prp_in_year_of_diagnosis_03_non_exclusive_HCPCS,most_common_primary_care_physician_2013_prp_in_2013_03_non_exclusive_HCPCS,most_common_primary_care_physician_2013_prp_in_year_before_diagnosis_03_non_exclusive_HCPCS,most_common_primary_care_physician_2013_avg_in_year_of_diagnosis_non_exclusive_HCPCS,most_common_primary_care_physician_2013_avg_in_2013_non_exclusive_HCPCS,most_common_primary_care_physician_2013_avg_in_year_before_diagnosis_non_exclusive_HCPCS,most_common_cardiologist_2013_prp_in_year_of_diagnosis_05_non_exclusive_HCPCS,most_common_cardiologist_2013_prp_in_2013_05_non_exclusive_HCPCS,most_common_cardiologist_2013_prp_in_year_before_diagnosis_05_non_exclusive_HCPCS,most_common_cardiologist_2013_prp_in_year_of_diagnosis_03_non_exclusive_HCPCS,most_common_cardiologist_2013_prp_in_2013_03_non_exclusive_HCPCS,most_common_cardiologist_2013_prp_in_year_before_diagnosis_03_non_exclusive_HCPCS,most_common_cardiologist_2013_avg_in_year_of_diagnosis_non_exclusive_HCPCS,most_common_cardiologist_2013_avg_in_2013_non_exclusive_HCPCS,most_common_cardiologist_2013_avg_in_year_before_diagnosis_non_exclusive_HCPCS,MI_date,RUCC_2013
<int>,<date>,<dbl>,<int>,<dbl>,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<dbl>,<int>,<dbl>,<int>,<dbl>,<int>,<dbl>,<int>,<dbl>,<int>,<dbl>,<int>,<dbl>,<int>,<dbl>,<lgl>,<lgl>,<lgl>,<lgl>,<lgl>,<lgl>,<lgl>,<lgl>,<lgl>,<lgl>,<lgl>,<lgl>,<date>,<date>,<chr>,<chr>,<chr>,<dbl>,<int>,<lgl>,<lgl>,<dbl>,<int>,<dbl>,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<dbl>,<int>,<dbl>,<int>,<dbl>,<int>,<dbl>,<int>,<dbl>,<int>,<dbl>,<int>,<dbl>,<int>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<int>,<int>,<int>,<int>,<dbl>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,⋯,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<dbl>,<date>,<chr>,<date>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<lgl>,<lgl>,<int>,<dbl>,<fct>,<dbl>,<fct>,<chr>,<int>,<chr>,<chr>,<int>,<chr>,<chr>,<int>,<chr>,<chr>,<int>,<chr>,<chr>,<int>,<chr>,<chr>,<int>,<chr>,<dbl>,<dbl>,<int>,<lgl>,<dbl>,<dbl>,<int>,<lgl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<date>,<dbl>
100004643,2018-07-24,2164.96,2,209.35,2,5,1,0,0,4,0,0.0,1,16.26,0,0,0,0,1,60.06,0,0.0,0,0.0,0,0,True,False,False,False,False,True,False,False,False,False,False,True,,,,,1922000801,2018,,,,1098.55,1,104.3,1,5,1,0,0,4,0,0.0,1,16.26,0,0,0,0,0,0.0,0,0.0,0,0.0,0,0,0,0,0.0,0,0,0,0,0,0,0,0,0.0,0,0,0,0,0,0,209.35,0.0,16.26,0,0,60.06,0.0,0.0,0,104.3,0.0,16.26,0,0,0.0,0.0,0.0,0,269.41,0.0,104.3,0.0,3,0,1,0,13,34,240,2,1,77,0,0,3,3,3,3,3,3,3,3,3,3,3,3,0,0,0,0,0,0,0,0,0,0,0,0,,⋯,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,False,,19,270,2,1,82,12,12,False,False,,1,1-2,1,1-2,1922000801.0,2.0,6.0,,,,1922000801.0,2.0,6.0,1639140775,2,11,1639140775,2,11,,,,0.0,0.0,0,False,0.0,0.0,0,False,2164.96,1098.55,0.0,0.0,0.0,0.0,0.0,0.0,0.04925054,0.06811989,0.07415254,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.04925054,0.06811989,0.07415254,0.0,0.0,0.0,0.0,0.0,0.0,0.04925054,0.06811989,0.07415254,,,,,,,,,,,,,,,,,,,0.0,0.08333333,0.0,0.0,0.1666667,0.0,0.0,0.12643678,0.0,0.0,0.08333333,0,0.0,0.1666667,0,0.0,0.12643678,0,,,,,,,,,,,2.0
100005675,2020-07-06,4598.06,0,0.0,0,20,5,1,1,13,1,166.66,5,44.6,0,0,0,0,0,0.0,2,154.9,0,0.0,0,0,True,False,False,False,False,False,False,False,False,False,False,True,2020-07-13,,1538172747.0,,1477715811,2020,,,,2699.46,0,0.0,0,10,3,0,1,6,1,166.66,1,8.92,0,0,0,0,0,0.0,2,154.9,0,0.0,0,0,0,0,0.0,0,0,0,0,0,0,0,0,0.0,0,0,0,0,0,0,0.0,166.66,44.6,0,0,0.0,154.9,0.0,0,0.0,166.66,8.92,0,0,0.0,154.9,0.0,0,154.9,166.66,154.9,166.66,2,1,2,1,13,22,10,2,1,76,0,0,3,3,3,3,3,3,3,3,3,3,3,3,0,0,0,0,0,0,0,0,0,0,0,0,,⋯,3.0,3.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,V,20200715.0,2020-07-15,True,2020-07-15,22,10,2,1,83,1,6,,,9.0,2,1-2,2,1-2,,,,,,,,,,1558377663,5,11,1558377663,5,11,,,,0.0,250422.85,1,True,0.0,250422.9,1,True,255020.91,253122.31,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.0,,1.0,1.0,,1.0,0.84615385,,0.85416667,1.0,0.1428571,1.0,1.0,0.2857143,1.0,1.0,0.2,1.0,,,,,,,,,,,0.0,0.0,,0.0,0.0,,0.05555556,0.0,,0.0,0,,0.0,0,,0.05555556,0,,,,,,,,,,,3.0
100007109,2013-12-26,2257.14,19,1460.8,1,10,1,2,3,4,0,0.0,1,19.98,0,0,0,0,0,0.0,0,0.0,0,0.0,0,0,True,False,False,False,False,True,False,False,False,True,True,False,,,,,1922185792,2013,,,,871.19,8,761.97,1,4,1,2,0,1,0,0.0,1,19.98,0,0,0,0,0,0.0,0,0.0,0,0.0,0,0,0,0,0.0,0,0,0,0,0,0,0,0,0.0,0,0,0,0,0,0,1460.8,0.0,19.98,0,0,0.0,0.0,0.0,0,761.97,0.0,19.98,0,0,0.0,0.0,0.0,0,1460.8,0.0,761.97,0.0,19,0,8,0,13,7,40,1,1,76,0,0,B,B,B,B,B,B,B,B,B,B,B,B,0,0,0,0,0,0,0,0,0,0,0,0,,⋯,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,V,20200306.0,2020-03-06,True,2020-03-06,7,40,1,1,76,0,12,False,False,2262.0,2,1-2,3,3-4,1922185792.0,10.0,11.0,1922185792.0,10.0,11.0,1235111857.0,1.0,6.0,1922185792,6,11,1922185792,6,11,1356323349.0,2.0,6.0,1078.5,0.0,0,False,0.0,0.0,0,False,3335.64,871.19,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.08333333,0.08333333,0.0,0.08333333,0.08333333,0.0,0.06306306,0.06306306,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0,0.0,0.0,0,,0.0,0.0,,0.0,0.0,,0.0,0.0,,
100008015,2013-02-18,13622.46,39,1909.86,28,26,3,5,4,14,0,0.0,6,92.64,0,0,0,0,1,83.54,0,0.0,0,0.0,0,0,True,False,False,False,False,True,False,False,False,False,True,False,,,,,1205894102,2013,,,,2126.04,6,576.64,4,5,2,1,0,2,0,0.0,4,75.81,0,0,0,0,1,83.54,0,0.0,0,0.0,0,0,0,0,0.0,0,0,0,0,0,0,0,0,0.0,0,0,0,0,0,0,1909.86,0.0,92.64,0,0,83.54,0.0,0.0,0,576.64,0.0,75.81,0,0,83.54,0.0,0.0,0,1993.4,0.0,660.18,0.0,40,0,7,0,13,22,130,1,1,76,0,0,3,3,3,3,3,3,3,3,3,3,3,3,0,0,0,0,0,0,0,0,0,0,0,0,,⋯,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,False,,22,130,1,1,76,12,12,False,False,,0,0,0,0,1013994185.0,28.0,6.0,1417934522.0,2.0,11.0,1013994185.0,28.0,6.0,1013994185,27,6,1417934522,2,11,1013994185.0,27.0,6.0,7092.44,18795.06,1,True,0.0,0.0,0,False,39509.96,2126.04,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.08108108,0.05263158,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.08108108,0.05263158,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.08108108,0.05263158,0.0,0.0,0,0.0,0.0,0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.08108108,0.05263158,,1.0
100008543,2014-04-16,2149.56,12,869.89,2,22,4,5,1,12,0,0.0,3,25.29,0,0,0,0,0,0.0,0,0.0,0,0.0,0,0,True,False,False,False,False,False,False,False,False,False,True,False,,,,,1033107701,2014,,,,178.71,2,146.81,1,4,3,1,0,0,0,0.0,0,0.0,0,0,0,0,0,0.0,0,0.0,0,0.0,0,0,0,0,490.65,0,0,0,0,0,0,0,0,126.3,0,0,0,0,0,0,869.89,0.0,515.94,0,0,0.0,0.0,0.0,0,146.81,0.0,126.3,0,0,0.0,0.0,0.0,0,869.89,0.0,146.81,0.0,12,0,2,0,13,49,90,1,1,76,0,0,3,3,3,3,3,3,3,3,3,3,3,3,0,0,0,0,0,0,0,0,0,0,0,0,,⋯,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,V,20200206.0,2020-02-06,True,2020-02-06,49,90,1,1,77,12,12,False,False,2122.0,0,0,0,0,1033107701.0,4.0,8.0,1033107701.0,4.0,8.0,1700860954.0,2.0,6.0,1033107701,7,8,1033107701,7,8,1700860954.0,3.0,6.0,6337.03,0.0,0,False,372.6,0.0,0,False,8486.59,551.31,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,0.86,0.84375,0.77906977,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0,0.0,0.0,0,1.0,1.0,1.0,1.0,1.0,1.0,0.86,0.84375,0.77906977,,2.0
100010653,2014-02-14,16861.51,16,1376.9,4,49,9,7,2,31,1,345.44,12,114.88,0,0,0,0,0,0.0,2,133.1,2,991.67,0,0,True,True,False,False,False,False,True,False,False,False,True,False,2014-02-14,2014-02-24,1790758688.0,1114990025.0,1790758688,2014,10.0,False,True,3309.14,3,258.31,2,13,7,2,1,3,1,345.44,5,36.15,0,0,0,0,0,0.0,1,66.55,2,991.67,0,0,0,0,0.0,0,0,0,0,0,0,0,0,0.0,0,0,0,0,0,0,1376.9,345.44,114.88,0,0,0.0,133.1,991.67,0,258.31,345.44,36.15,0,0,0.0,66.55,991.67,0,1510.0,1337.11,324.86,1337.11,18,3,4,3,13,33,740,2,1,76,0,0,3,3,3,3,3,3,3,3,3,3,3,3,0,0,0,0,0,0,0,0,0,0,0,0,,⋯,,,,,,,,,,,,,,,,,,,,,,,2018-05-29,True,2018-05-29,33,740,2,1,77,12,12,False,False,1565.0,3,3-4,4,3-4,1801899877.0,4.0,6.0,1518020502.0,2.0,11.0,1801899877.0,4.0,6.0,1023073103,3,29,1518020502,2,11,,,,176109.78,311264.38,4,True,0.0,0.0,0,False,504235.67,3309.14,0.08333333,0.0,0.0,0.08333333,0.25,0.1666667,0.08031088,0.15428571,0.10191083,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.08333333,0.0,0.0,0.08333333,0.25,0.16666667,0.08031088,0.15428571,0.10191083,0.1666667,0.0,0.0,0.1666667,0.0,0.0,0.11578947,0.04214559,0.04489796,0.1666667,0.0,0.0,0.1666667,0.0,0.0,0.1157895,0.04214559,0.04489796,0.0,0.0,0.0,0.1666667,0.0,0.0,0.0776699,0.1003521,0.118705,0.25,0.45454545,0.4166667,0.25,0.6363636,0.5833333,0.3769231,0.43103448,0.35211268,0.0,0.0,0,0.0,0.0,0,0.0,0.0,0,,,,,,,,,,,3.0


## Adding sex, race, and state names from codes

In [99]:
add_personal_details=function(data){
    
  require(tidyverse)
  require(dtplyr)
  require(lubridate)
  
  result=data %>%
  left_join(.,census_and_state_codes[,-1],by=c("state_code_at_diagnosis"="state_code"))%>%
  left_join(.,race_codes,by=c("race_code_at_diagnosis"="race_code"))%>%
  left_join(.,sex_codes,by=c("sex_code_at_diagnosis"="sex_code"))%>%
  mutate(
    age_group=case_when(
      age_at_diagnosis<75 & age_at_diagnosis>64 ~ "65-74",
      age_at_diagnosis>74 & age_at_diagnosis<85 ~ "75-84",
      age_at_diagnosis>84 ~ "85+"
    ),
    urban=(RUCC_2013<=3)
  )%>%
  as.data.table()
  
  return(result)
}

In [100]:
yearly_calculations_stable_angina=add_personal_details(yearly_calculations_stable_angina)
#yearly_calculations_unstable_angina=add_personal_details(yearly_calculations_unstable_angina)

In [101]:
head(yearly_calculations_stable_angina[died_in_one_year_after_diagnosis==T,.(DESY_SORT_KEY,
                                                                             died_in_one_year_after_diagnosis,
                                                                             HMO_INDICATOR_sum,
                                                                             date_difference_diagnosis_death,
                                                                             floor(date_difference_diagnosis_death/30))],10)

DESY_SORT_KEY,died_in_one_year_after_diagnosis,HMO_INDICATOR_sum,date_difference_diagnosis_death,V5
<int>,<lgl>,<dbl>,<int>,<dbl>
100025905,True,9,59,1
100031159,True,11,183,6
100046407,True,9,25,0
100078509,True,12,215,7
100088049,True,12,302,10
100103847,True,8,127,4
100116211,True,8,39,1
100147445,True,4,6,0
100193467,True,10,57,1
100209373,True,10,44,1


## Filtering the data for analysis

In [102]:
data_for_modelling_filter=function(data){
  library(tidyverse)
  library(dtplyr)
  
  data%>%
  filter(state_code_at_diagnosis %!in% non_us_state_codes &
         age_at_diagnosis>=65&
         ((died_in_one_year_after_diagnosis==T & HMO_INDICATOR_sum>=floor(date_difference_diagnosis_death/30)-1) |
          (died_in_one_year_after_diagnosis==F & HMO_INDICATOR_sum==12))&
         ((died_in_one_year_after_diagnosis==T & ENTITLEMENT_BUY_IN_IND_sum>=floor(date_difference_diagnosis_death/30)-1) |
          (died_in_one_year_after_diagnosis==F & ENTITLEMENT_BUY_IN_IND_sum==12))&
         year_first_diagnosed!=2013&
         !is.na(most_common_cardiologist_PRF_PHYSN_NPI)&
         (is.na(catheterization_date)|          
          catheterization_date<MI_date|
          is.na(MI_date)
          )&
          (is.na(angioplasty_date)|          
          angioplasty_date<MI_date|
          is.na(MI_date)
         )
        )%>%
  as.data.table()
}



In [103]:
yearly_calculations_stable_angina=data_for_modelling_filter(yearly_calculations_stable_angina)
#yearly_calculations_unstable_angina=data_for_modelling_filter(yearly_calculations_unstable_angina)

In [104]:
length(unique(yearly_calculations_stable_angina$DESY_SORT_KEY))

In [105]:
length(unique(yearly_calculations_unstable_angina$DESY_SORT_KEY))

In [106]:
yearly_calculations_stable_angina[is.na(angioplasty_with_cath),angioplasty_with_cath:=F]
yearly_calculations_unstable_angina[is.na(angioplasty_with_cath),angioplasty_with_cath:=F]
yearly_calculations_stable_angina[is.na(angioplasty_during_year_after_cath),angioplasty_during_year_after_cath:=F]
yearly_calculations_unstable_angina[is.na(angioplasty_during_year_after_cath),angioplasty_during_year_after_cath:=F]