## R script for OMOP to load Toxic Release Inventory data

### Setup

In [78]:
library(DBI)

# make db connection
# for format of database file see db/env/db_conf.txt
db <- read.delim( '../../db/env/feta.txt', header=TRUE, sep=' ' )
con <- dbConnect(RPostgres::Postgres(),
                 dbname = toString(db$database[1]),
                 host = toString(db$host[1]),
                 port = 5432,
                 user = toString(db$user),
                 password = toString(db$pass))

# check the connection
dbListTables(con)

### load TRI data

In [None]:
tridata <- read.csv('tri_2018_us.csv')

In [None]:
#tridata[1:50,10:30]

In [None]:
# function to populate names, types and units in respective attribute tables
loadvocab <- function(table,words) {
    sql <- paste("select count(*) from ",table," where name like $1")
    res <- dbSendQuery(con,sql)
    dbBind(res, list(words[1]))
    found <- dbFetch(res)[1,1]
    dbClearResult(res)
    if (found == 0) {
        for (word in words) {
            sql <- paste("insert into ",table," (name) values($1) returning id")
            res <- dbSendQuery(con,sql)
            dbBind(res, list(word))
            dbClearResult(res)
        }
    }
    sql <- paste("select * from ",table)
    res <- dbSendQuery(con,sql)
    vocab <- dbFetch(res)
    dbClearResult(res)
    return (vocab)
}

In [None]:
# get all units, location types, and so on
attunits <- unique(tridata[,39])
levels(attunits) = c(levels(attunits),c('Boolean'))
attunits[3] <- "Boolean"
attunits <- loadvocab('hz_att_unit',attunits)
#attunits
loctypes <- unique(tridata[,16])
loctypes <- loadvocab('hz_type',loctypes)
#loctypes
attnames <- unique(tridata[,30])
attnames <- loadvocab('hz_att_name',attnames)
#attnames
attcats <- c('Carcinogen','Metal','Carcinogen|Metal')
attcats <- loadvocab('hz_att_category',attcats)
#attcats

### Put TRI data into database 

In [None]:
# get TRIFDs only for Florida TRI data
flIDs <- subset(tridata, X8..ST=="FL",select=X2..TRIFD)
locIDs <- unique(flIDs[,1])

In [None]:
for (locID in locIDs) {
    
    # get all attributes for one location
    locAttrs <- subset(tridata, X2..TRIFD==locID)
    print(locID)
    
    # get location and description from first line of TRI attributes where the TRIFD is constrained
    hzpname <- paste(locAttrs[1,]$X4..FACILITY.NAME)
    hzpdesc <- paste(locAttrs[1,]$X5..STREET.ADDRESS,locAttrs[1,]$X6..CITY,locAttrs[1,]$X8..ST,locAttrs[1,]$X9..ZIP)
    hzplat <- locAttrs[1,][1,12]
    hzplon <- locAttrs[1,][1,13]
    hzptype <- loctypes[which(loctypes$name==locAttrs[1,][16]$X16..INDUSTRY.SECTOR),]$id
    hzpsource <- 1

    # create a point from the first line of TRI attributes where the TRIFD is constrained
    sql <- 'insert into hazard_point ("name", "desc", "geom", "hz_type_id", "hz_source_id") 
            values ($1,$2,ST_MakePoint($3,$4),$5,$6)
            returning id'
    res <- dbSendQuery(con,sql)
    dbBind(res, list(hzpname,hzpdesc,hzplon,hzplat,hzptype,hzpsource))
    hazard_point_ID = dbFetch(res)[1,1]
    dbClearResult(res)
    #hazard_point_ID = 1
    #print(paste(hzpname,hzpdesc,hzplon,hzplat,hzpsource))

    # step through attributes from this TRIFD and add to attributes table
    apply(locAttrs, 1, function(vec) {
        hz_category_id <- NA
        if (vec[35] == "YES" & vec[37] == "YES") {
            hz_category_id <- 3
        } else if (vec[35] == "YES") {
            hz_category_id <- 2
        } else if (vec[37] == "YES") {
            hz_category_id <- 1
        }
        hz_unit_id = attunits[which(attunits$name==vec[39]),]$id
        hz_name_id = attnames[which(attnames$name==vec[30]),]$id
        sql <- "insert into hz_attribute (hz_point_id,hz_category_id,hz_unit_id,hz_name_id,value) 
                    values($1,$2,$3,$4,$5)"
        res <- dbSendQuery(con,sql)
        #NOTE this is a major simplication of the TRI data - only the On-Site Relese Total as the value
        dbBind(res, list(hazard_point_ID,hz_category_id,hz_unit_id,hz_name_id,vec[54]))
        dbClearResult(res)
        #print(paste(hazard_point_ID,hz_category_id,hz_unit_id,hz_name_id,vec[54]))
    })
}