## R script for OMOP GIS use cases

### Setup

In [1]:
library(DBI)

# make db connection
# for format of database file see db/env/db_conf.txt
db <- read.delim( '../../db/env/local.txt', header=TRUE, sep=' ' )
con <- dbConnect(RPostgres::Postgres(),
                 dbname = toString(db$database[1]),
                 host = toString(db$host[1]),
                 port = 5432,
                 user = toString(db$user),
                 password = toString(db$pass))

# check the connection
dbListTables(con)

### Examples

### 1 - get all patients within a particular census tract
UTM 17N - EPSG 32617 (meters)

In [None]:
sql <- "SELECT row_number() over () AS _uid_,* FROM (
          SELECT PS.*,L.geom_local FROM persons PS 
          JOIN location_history LH ON PS.person_id = LH.entity_id 
          JOIN locations L ON L.location_id = LH.location_id 
          JOIN geo_miamidade_census_tract_2018 CT ON ST_Within(L.geom_local, CT.geom_local)
          WHERE CT.source_id_value = '12086009100'
        ) _subq1"
res <- dbSendQuery(con,sql)
dbFetch(res)
dbClearResult(res)

### 2 - get all patients within a census tract with annual income below $15,000

**Use Case:** Patients with demographic criteria X that live within area Y, for duration Z  
**Example:** Patients who are over 100 years old that live within the TMC hospital service area for over 5 years  
**Functionality Requirements:** 3a, 3b, 4b  

**note:** this does not have the 'duration' component 

UTM 17N - EPSG 32617 (meters)

In [None]:
sql <- "SELECT row_number() over () AS _uid_,* FROM (
          SELECT PS.*,L.geom_local from persons PS 
          JOIN location_history LH ON PS.person_id = LH.entity_id 
          JOIN locations L on L.location_id = LH.location_id 
          JOIN geo_miamidade_census_tract_2018 CT ON ST_Within(L.geom_local, CT.geom_local)
          JOIN attr_florida_acs ATT ON ATT.geo_record_id = CT.geo_record_id
          WHERE ATT.value_as_number < 15000
        ) _subq1"
res <- dbSendQuery(con,sql)
dbFetch(res)
dbClearResult(res)

### 3 - get all patients within 1km of a carcinogen emitter (TRI data)

**Use Case:** Patients who live within distance X of non-clinical feature Y  
**Example:** Patients who live within 5 miles of a paper mill  
**Functionality Requirements:** 3a, 4a, 6a/6b, 3e 


UTM 17N - EPSG 32617 (meters) - ohdsi schema

In [None]:
sql <- "WITH buffers AS (
            SELECT HP.name, ST_Buffer(HP.geom_local,1000) AS geom_local FROM attr_florida_tri_2018 HA
            JOIN geo_florida_tri_2018 HP ON HA.geo_record_id = HP.geo_record_id
            WHERE (HA.qualifier_concept_id = 1 or HA.qualifier_concept_id = 3)
        ) 
        SELECT PS.*,L.geom_local FROM persons PS 
        JOIN location_history LH ON PS.person_id = LH.entity_id 
        JOIN locations L ON L.location_id = LH.location_id 
        JOIN buffers ON ST_Within(L.geom_local, buffers.geom_local)"
res <- dbSendQuery(con,sql)
dbFetch(res)
dbClearResult(res)

No WITH, but very slow (never completed on local machine)

In [None]:
sql <- "SELECT row_number() over () AS _uid_,* FROM (
            SELECT PS.*,L.geom_local FROM persons PS 
            JOIN location_history LH ON PS.person_id = LH.entity_id 
            JOIN locations L ON L.location_id = LH.location_id 
            JOIN geo_florida_tri_2018 HP ON ST_Within(L.geom_local, ST_Buffer(HP.geom_local,1000))
            JOIN attr_florida_tri_2018 HA ON HP.geo_record_id = HA.geo_record_id
            WHERE HA.qualifier_concept_id = 1 or HA.qualifier_concept_id = 3
        ) _subq1"
res <- dbSendQuery(con,sql)
dbFetch(res)
dbClearResult(res)

### 4 - get all patients within census block that contains a carcinogen emitter (TRI data)

**Use Case:** Patients who live in an area that contains a non-clinical feature Y	
**Example:** Paper mill in same census tract as residence	
**Functionality Requirements:** 3a, 4a, 4b

UTM 17N - EPSG 32617 (meters)

In [None]:
sql <- "SELECT row_number() OVER () AS _uid_,* FROM (
          WITH CG AS (
            SELECT HZP.name, HZP.geom_local FROM attr_florida_tri_2018 ATT
            INNER JOIN geo_florida_tri_2018 HZP ON ATT.geo_record_id = HZP.geo_record_id
            WHERE ATT.qualifier_concept_id = 1 or ATT.qualifier_concept_id = 3
          ), 
          CT AS (
            SELECT DD.source_id_value, DD.geom_local FROM geo_miamidade_census_tract_2018 DD
            JOIN CG ON ST_Contains(DD.geom_local,CG.geom_local)
          ) 
          SELECT PS.*,L.geom_local FROM persons PS 
          INNER JOIN location_history LH ON PS.person_id = LH.entity_id 
          INNER JOIN locations L ON L.location_id = LH.location_id 
          JOIN CT ON ST_Within(L.geom_local, CT.geom_local) 
        ) _subq1"
res <- dbSendQuery(con,sql)
dbFetch(res)
dbClearResult(res)

### 5 - get all patients within 1km of network care sites

**Use Case:** Given a list of care sites, patients who lived within Y miles on date range Z  	
**Example:** For a list of in-network care sites, which patients live within 10 miles  
**Functionality Requirements:** 3a, 3b, 4a, 6b

UTM 17N - EPSG 32617 (meters)

In [None]:
sql <- "SELECT row_number() OVER () AS _uid_,* FROM (
          WITH CB AS (
            SELECT CS.care_site_name, ST_Buffer(CSL.geom_local,1000) AS geom_local FROM care_site CS
            INNER JOIN site_history SH ON SH.entity_id = CS.care_site_id
            INNER JOIN sites CSL ON SH.site_id = CSL.site_id
            WHERE CS.place_of_service_concept_id = 0
          ) 
          SELECT PS.*,L.geom_local FROM persons PS 
          JOIN location_history LH ON PS.person_id = LH.entity_id 
          JOIN locations L ON L.location_id = LH.location_id 
          JOIN CB ON ST_Within(L.geom_local, CB.geom_local)
        ) _subq1"
res <- dbSendQuery(con,sql)
dbFetch(res)
dbClearResult(res)

### 6 - get all patients who are more than 5 kilometers from a care site

**Use Case:** Patients who traveled over Y miles for primary care visit  	
**Example:** Patients who must travel greater than 50 miles to PCP
**Functionality Requirements:** 3a, 3b, 6a

UTM 17N - EPSG 32617 (meters)

In [None]:
sql <- "SELECT row_number() OVER () AS _uid_,* FROM (
          WITH CB AS (
            SELECT CS.care_site_name, ST_Buffer(CSL.geom_local,5000) AS geom_local FROM care_site CS
            INNER JOIN site_history SH ON SH.entity_id = CS.care_site_id
            INNER JOIN sites CSL ON SH.site_id = CSL.site_id
            WHERE CS.place_of_service_concept_id = 0
          ) 
          SELECT persons.*,L.geom_local FROM persons 
          INNER JOIN location_history LH ON person_id = LH.entity_id 
          INNER JOIN locations L ON L.location_id = LH.location_id
          WHERE person_id NOT IN (
            SELECT PS.person_id FROM persons PS
            INNER JOIN location_history LH ON PS.person_id = LH.entity_id 
            INNER JOIN locations L ON L.location_id = LH.location_id 
            JOIN CB ON ST_Within(L.geom_local, CB.geom_local)
          )
        ) _subq1"
res <- dbSendQuery(con,sql)
dbFetch(res)
dbClearResult(res)

## 7 - get avg BMI from patient measures and avg attr from ACS

get centroids for bivariate mapping?

In [None]:
sql <- "SELECT row_number() over () AS _uid_,* FROM (
          SELECT 
            AVG(MS.value_as_number) as avg_bmi,
            ATT.value_as_number as avg_income,
            DD.name,
            DD.geom_local
          FROM persons PS 
          JOIN location_history LH ON PS.person_id = LH.entity_id 
          JOIN measurement MS ON PS.person_id = MS.person_id
          JOIN locations L ON L.location_id = LH.location_id 
          JOIN geo_miamidade_census_tract_2018 DD ON ST_Within(L.geom_local, DD.geom_local)
          JOIN attr_florida_acs ATT ON ATT.geo_record_id = DD.geo_record_id
          GROUP BY DD.name, ATT.value_as_number, DD.geom_local
        ) _subq1"
res <- dbSendQuery(con,sql)
dbFetch(res)
dbClearResult(res)

## libraries for leaflet visualization

In [2]:
library(leaflet)
library(sf)
library(rgdal)

"package 'sf' was built under R version 3.6.2"
Linking to GEOS 3.7.2, GDAL 2.4.2, PROJ 5.2.0

"package 'rgdal' was built under R version 3.6.2"
Loading required package: sp

"package 'sp' was built under R version 3.6.2"
rgdal: version: 1.5-16, (SVN revision 1050)
Geospatial Data Abstraction Library extensions to R successfully loaded
Loaded GDAL runtime: GDAL 2.4.2, released 2019/06/28
Path to GDAL shared files: /Users/tibben/Library/R/3.6/library/rgdal/gdal
GDAL binary built with GEOS: FALSE 
Loaded PROJ runtime: Rel. 5.2.0, September 15th, 2018, [PJ_VERSION: 520]
Path to PROJ shared files: /Users/tibben/Library/R/3.6/library/rgdal/proj
Linking to sp version:1.4-2
Overwritten PROJ_LIB was /Users/tibben/Library/R/3.6/library/rgdal/proj



## Quick leaflet visualization of point data

In [3]:
# persons in census tract 12086009100
sql <- "SELECT row_number() over () AS _uid_,* FROM (
          SELECT PS.*,L.geom_WGS84 FROM persons PS 
          JOIN location_history LH ON PS.person_id = LH.entity_id 
          JOIN locations L ON L.location_id = LH.location_id 
          JOIN geo_miamidade_census_tract_2018 CT ON ST_Within(L.geom_local, CT.geom_local)
          WHERE CT.source_id_value = '12086009100'
          LIMIT 100
        ) _subq1"
pftrs <- st_read(con,query=sql)

In [4]:
# just the census tracts as polygons
sql <- "SELECT source_id_value,geom_wgs84 from geo_miamidade_census_tract_2018"
ctftrs <- st_read(con,query=sql)

In [None]:
# plot(pftrs$geom_wgs84)

In [5]:
m <- leaflet(data=pftrs$geom_wgs84) %>%
  addTiles() %>%  # Add default OpenStreetMap map tiles
  addMarkers() %>% # Add default markers from data
  addPolygons(data=ctftrs,
              fillColor = "#e41a1c", 
              fillOpacity = 0.0, 
              stroke = "#690E0E",
              weight = 0.7,
              label = ~source_id_value,
              labelOptions = labelOptions(style = list("font-weight" = "normal", padding = "3px 8px"), textsize = "13px", direction = "auto"),
              group = "census_tracts",
              highlight = highlightOptions(color = "black", weight = 4, bringToFront = TRUE))

In [6]:
m

## quick leaflet visualization of bivariate polygon data

In [8]:
# census tract centroids with avg BMI (UHealth) and avg income (ACS)
sql <- "SELECT row_number() over () AS _uid_,* FROM (
          SELECT 
            AVG(MS.value_as_number) as avg_bmi,
            ATT.value_as_number as avg_income,
            DD.source_id_value,
            ST_Centroid(DD.geom_wgs84)
          FROM persons PS 
          JOIN location_history LH ON PS.person_id = LH.entity_id 
          JOIN measurement MS ON PS.person_id = MS.person_id
          JOIN locations L ON L.location_id = LH.location_id 
          JOIN geo_miamidade_census_tract_2018 DD ON ST_Within(L.geom_local, DD.geom_local)
          JOIN attr_florida_acs ATT ON ATT.geo_record_id = DD.geo_record_id
          GROUP BY DD.source_id_value, ATT.value_as_number, DD.geom_wgs84
        ) _subq1"
ctftrs <- st_read(con,query=sql)

In [38]:
bmi_pal <- colorNumeric(palette = "magma",domain = ctftrs$avg_bmi)
bmi_pal <- colorBin("magma", ctftrs$avg_bmi, 9, pretty = TRUE)
bmi_pal <- colorQuantile("magma", ctftrs$avg_bmi, 7)

In [39]:
m <- leaflet(ctftrs) %>%
  addTiles() %>%  # Add default OpenStreetMap map tiles
  addCircles(
    weight = 1,
    radius = ~sqrt(avg_income) * 2,
    fillColor = ~bmi_pal(avg_bmi),
    fillOpacity = 0.8
  )

In [40]:
m