In [2]:
library(here)
library(here)
library(readr, warn.conflicts = FALSE)
library(arrow, warn.conflicts = FALSE)
library(magrittr, warn.conflicts = FALSE)
library(stringr, warn.conflicts = FALSE)
library(dplyr, warn.conflicts = FALSE)
library(rlang, warn.conflicts = FALSE)
library(data.table, warn.conflicts = FALSE)
library(lubridate, warn.conflicts = FALSE)
library(tidyr, warn.conflicts = FALSE)
library(truveta.study)
library(dbplyr)

here() starts at /home/jovyan/studyrepositories/hypertensioncohorts-is5fyzbqztbebenalfbuo4boem

Loading required package: DBI


Attaching package: ‘dbplyr’


The following objects are masked from ‘package:dplyr’:

    ident, sql




In [3]:
# build connections 
con <- create_connection()
study <- get_study(con, title = "Hypertension Cohort")
population <- get_population(con, study, title = "Study Cohort")
snapshot <- get_latest_snapshot(con, population, finished = TRUE)

To sign in to Truveta, use a web browser to open the page https://login.truveta.com/activate?user_code=GKRF-RMVJ.

Signed in successfully.



In [32]:
#get patient's marital status summary
sql <- "select p.MaritalStatusConceptId, c. ConceptName, count(distinct p.Id) as PatientCount
from Person p join Concept c on (p.MaritalStatusConceptId = c.ConceptId)
group by MaritalStatusConceptId, ConceptName
order by PatientCount desc;"


marital_summary <- load_sql_table(con,snapshot,query = sql) %>% collect() %>% mutate(Percentage = round( PatientCount / sum(PatientCount) *100,3))
marital_summary

MaritalStatusConceptId,ConceptName,PatientCount,Percentage
<int>,<chr>,<int>,<dbl>
1065583,Married,498928,41.339
1065588,Unmarried,199607,16.539
1065591,Widowed,140373,11.631
1067557,Field is not present in source,115502,9.57
1065580,Divorced,92386,7.655
1065579,Civil Union,74246,6.152
1067556,Masked,61147,5.066
1067554,Unknown,10390,0.861
1065582,Legally Separated,7576,0.628
1065587,Domestic partner,5656,0.469


In [33]:
#get patient's religion summary

sql <- "select p.ReligionConceptId, c.ConceptName, count(distinct p.Id) as PatientCount
from Person p join Concept c on (p.ReligionConceptId = c.ConceptId)
group by ReligionConceptId, ConceptName
order by PatientCount desc;"

religion_summary <- load_sql_table(con,snapshot, query = sql) %>% collect() %>% mutate(Percentage = round( PatientCount / sum(PatientCount) *100,3))
religion_summary

ReligionConceptId,ConceptName,PatientCount,Percentage
<int>,<chr>,<int>,<dbl>
1067556,Masked,1206910,100


In [None]:
#get patient's preferred language summary

sql <- "select p.PreferredLanguageConceptId, c.ConceptName, count(distinct p.Id) as PatientCount
from Person p join Concept c on (p.PreferredLanguageConceptId = c.ConceptId)
group by PreferredLanguageConceptId, ConceptName
order by PatientCount desc;"

language_summary <- load_sql_table(con,snapshot, query = sql) %>% collect() %>% mutate(Percentage = round( PatientCount / sum(PatientCount) *100,3))
language_summary

PreferredLanguageConceptId,ConceptName,PatientCount,Percentage
<int>,<chr>,<int>,<dbl>
1067556,Masked,1206910,100


In [36]:
# get OMB standard ethinicity - hispanic or latino ; not hispanic or latino
sql <- "select p.EthnicityConceptId, c.ConceptName, count(distinct p.Id) as PatientCount
from Person p join Concept c on (p.EthnicityConceptId = c.ConceptId)
group by EthnicityConceptId, ConceptName
order by PatientCount desc"
ethnicity_summary <- load_sql_table(con,snapshot,query = sql) %>% collect()%>% 
mutate(Ethnicity = case_when(EthnicityConceptId %in% c(1067556,1067558,3056585,1067561,1067555) ~ "Unknown",
                                                        EthnicityConceptId %in% c(1065401,1065359) ~ ConceptName,
                                                        TRUE ~ paste("Other-",ConceptName )))%>%
group_by(Ethnicity)%>% summarise(Count  = sum(PatientCount))%>%arrange(desc(Count)) %>% mutate(Percentage = round( Count / sum(Count) *100,3))
ethnicity_summary

Ethnicity,Count,Percentage
<chr>,<int>,<dbl>
Not Hispanic or Latino,1040618,86.222
Unknown,93030,7.708
Hispanic or Latino,73247,6.069
Other- Colombian,15,0.001


In [37]:
#define a group of unknown, useless codes
useless <- c(1067556,1067558,3056585,1067561,1067555)

In [42]:
# get OMB standard race summary
sql <- "select p.RaceConceptId, c.ConceptName, count(distinct p.PersonId) as PatientCount
from PersonRace p join Concept c on (p.RaceConceptId = c.ConceptId)
group by RaceConceptId, ConceptName
order by PatientCount desc"
races <- load_sql_table(con,snapshot,query = sql) %>% collect()
OMBRace <- c("Black or African American","Native Hawaiian or Other Pacific Islander","American Indian or Alaska Native","Asian","White")
race_summary <- races %>% mutate(CombinedRaceName = case_when(ConceptName %in% c("African American","Black","African") ~ "Black or African American",
                                             ConceptName %in% c("Other Pacific Islander","Native Hawaiian") ~ "Native Hawaiian or Other Pacific Islander",
                                             ConceptName %in% c("American Indian","Alaska Native") ~ "American Indian or Alaska Native",
                                             ConceptName %in% c("Asian Indian","Korean","Filipino") ~ "Asian",
                                              TRUE ~ ConceptName
                                             ))%>%
mutate(Race = case_when(CombinedRaceName %in% OMBRace ~ CombinedRaceName,
                       RaceConceptId %in% useless ~ "Unknown",
                       TRUE ~ paste("Other-", CombinedRaceName)))%>%group_by(Race) %>% summarise(Count = sum(PatientCount))%>% 
arrange(desc(Count))%>% mutate(Percentage = round( Count / sum(Count) *100,3))
race_summary

Race,Count,Percentage
<chr>,<int>,<dbl>
White,888147,73.781
Black or African American,184790,15.351
Other- Other Race,42685,3.546
Asian,33820,2.81
Unknown,33703,2.8
Other- Declined to answer,7459,0.62
American Indian or Alaska Native,5395,0.448
Other- Multiracial,4270,0.355
Native Hawaiian or Other Pacific Islander,3447,0.286
Other- Middle Eastern or North African,23,0.002


In [43]:
# get gender summary

sql <- "SELECT p.GenderConceptId, c.ConceptName, count(distinct Id) as PatientCount
from Person p join Concept c on (p.GenderConceptId = c.ConceptId)
group by GenderConceptId, ConceptName
order by PatientCount desc;"

gender_summary <- load_sql_table(con,snapshot,query = sql) %>% collect()
gender_summary <- gender_summary %>% mutate(Gender = case_when(GenderConceptId %in% useless ~ 'Unknown',
                                                              TRUE ~ ConceptName))%>% 
group_by(Gender) %>% summarise(Count = sum(PatientCount))%>% arrange(desc(Count))%>% mutate(Percentage = round( Count / sum(Count) *100,3))
gender_summary

Gender,Count,Percentage
<chr>,<int>,<dbl>
Female,637959,52.859
Male,565946,46.892
Unknown,2998,0.248
Other,7,0.001
