Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ Imports:
dplyr,
rJava,
jsonlite,
SqlRender (>= 1.6.0),
SqlRender (>= 1.18.0),
ParallelLogger (>= 2.0.2),
cli,
pillar,
Expand Down
42 changes: 41 additions & 1 deletion R/DefaultCovariateSettings.R
Original file line number Diff line number Diff line change
Expand Up @@ -269,6 +269,18 @@
#' measurements are below, within, or above
#' normal range in the short term window.
#' (analysis ID 712)
#' @param useMeasurementValueAsConceptAnyTimePrior One covariate per measurement-value
#' concept combination any time prior to
#' index. (analysis ID 713)
#' @param useMeasurementValueAsConceptLongTerm One covariate per measurement-value
#' concept combination in the long term
#' window. (analysis ID 714)
#' @param useMeasurementValueAsConceptMediumTerm One covariate per measurement-value
#' concept combination in the medium term
#' window. (analysis ID 715)
#' @param useMeasurementValueAsConceptShortTerm One covariate per measurement-value
#' concept combination in the short term
#' window. (analysis ID 716)
#' @param useObservationAnyTimePrior One covariate per observation in the
#' observation table any time prior to
#' index. (analysis ID 801)
Expand All @@ -281,6 +293,18 @@
#' @param useObservationShortTerm One covariate per observation in the
#' observation table in the short term
#' window. (analysis ID 804)
#' @param useObservationValueAsConceptAnyTimePrior One covariate per observation-value
#' concept combination any time prior to
#' index. (analysis ID 805)
#' @param useObservationValueAsConceptLongTerm One covariate per observation-value
#' concept combination in the long term
#' window. (analysis ID 806)
#' @param useObservationValueAsConceptMediumTerm One covariate per observation-value
#' concept combination in the medium term
#' window. (analysis ID 807)
#' @param useObservationValueAsConceptShortTerm One covariate per observation-value
#' concept combination in the short term
#' window. (analysis ID 808)
#' @param useCharlsonIndex The Charlson comorbidity index (Romano
#' adaptation) using all conditions prior
#' to the window end. (analysis ID 901)
Expand Down Expand Up @@ -455,11 +479,19 @@
#' useMeasurementRangeGroupAnyTimePrior = FALSE,
#' useMeasurementRangeGroupLongTerm = TRUE,
#' useMeasurementRangeGroupMediumTerm = FALSE,
#' useMeasurementRangeGroupShortTerm = FALSE,
#' useMeasurementRangeGroupShortTerm = TRUE,
Comment thread
anthonysena marked this conversation as resolved.
#' useMeasurementValueAsConceptAnyTimePrior = FALSE,
#' useMeasurementValueAsConceptLongTerm = TRUE,
#' useMeasurementValueAsConceptMediumTerm = FALSE,
#' useMeasurementValueAsConceptShortTerm = TRUE,
#' useObservationAnyTimePrior = FALSE,
#' useObservationLongTerm = TRUE,
#' useObservationMediumTerm = FALSE,
#' useObservationShortTerm = TRUE,
#' useObservationValueAsConceptAnyTimePrior = FALSE,
#' useObservationValueAsConceptLongTerm = TRUE,
#' useObservationValueAsConceptMediumTerm = FALSE,
#' useObservationValueAsConceptShortTerm = TRUE,
#' useCharlsonIndex = TRUE,
#' useDcsi = TRUE,
#' useChads2 = TRUE,
Expand Down Expand Up @@ -574,10 +606,18 @@ createCovariateSettings <- function(useDemographicsGender = FALSE,
useMeasurementRangeGroupLongTerm = FALSE,
useMeasurementRangeGroupMediumTerm = FALSE,
useMeasurementRangeGroupShortTerm = FALSE,
useMeasurementValueAsConceptAnyTimePrior = FALSE,
useMeasurementValueAsConceptLongTerm = FALSE,
useMeasurementValueAsConceptMediumTerm = FALSE,
useMeasurementValueAsConceptShortTerm = FALSE,
useObservationAnyTimePrior = FALSE,
useObservationLongTerm = FALSE,
useObservationMediumTerm = FALSE,
useObservationShortTerm = FALSE,
useObservationValueAsConceptAnyTimePrior = FALSE,
useObservationValueAsConceptLongTerm = FALSE,
useObservationValueAsConceptMediumTerm = FALSE,
useObservationValueAsConceptShortTerm = FALSE,
useCharlsonIndex = FALSE,
useDcsi = FALSE,
useChads2 = FALSE,
Expand Down
14 changes: 12 additions & 2 deletions R/DefaultTemporalCovariateSettings.R
Original file line number Diff line number Diff line change
Expand Up @@ -69,8 +69,8 @@
#' the drug_era table starting in the time window.
#' (analysis ID 403)
#' @param useDrugEraGroupOverlap One covariate per drug rolled up to ATC groups in
#' the drug_era table overlapping with any part of
#' the time window. (analysis ID 404)
#' the drug_era table overlapping with any part of the
#' time window. (analysis ID 404)
#' @param useProcedureOccurrence One covariate per procedure in the
#' procedure_occurrence table in the time window.
#' (analysis ID 501)
Expand All @@ -85,8 +85,14 @@
#' @param useMeasurementRangeGroup Covariates indicating whether measurements are
#' below, within, or above normal range within the time
#' period. (analysis ID 703)
#' @param useMeasurementValueAsConcept One covariate per measurement-value concept
#' combination within the time period. (analysis ID
#' 704)
#' @param useObservation One covariate per observation in the observation
#' table in the time window. (analysis ID 801)
#' @param useObservationValueAsConcept One covariate per observation-value concept
#' combination within the time period. (analysis ID
#' 802)
#' @param useCharlsonIndex The Charlson comorbidity index (Romano adaptation)
#' using all conditions prior to the window end.
#' (analysis ID 901)
Expand Down Expand Up @@ -168,7 +174,9 @@
#' useMeasurement = TRUE,
#' useMeasurementValue = FALSE,
#' useMeasurementRangeGroup = TRUE,
#' useMeasurementValueAsConcept = TRUE,
#' useObservation = TRUE,
#' useObservationValueAsConcept = TRUE,
#' useCharlsonIndex = TRUE,
#' useDcsi = TRUE,
#' useChads2 = TRUE,
Expand Down Expand Up @@ -219,7 +227,9 @@ createTemporalCovariateSettings <- function(useDemographicsGender = FALSE,
useMeasurement = FALSE,
useMeasurementValue = FALSE,
useMeasurementRangeGroup = FALSE,
useMeasurementValueAsConcept = FALSE,
useObservation = FALSE,
useObservationValueAsConcept = FALSE,
useCharlsonIndex = FALSE,
useDcsi = FALSE,
useChads2 = FALSE,
Expand Down
7 changes: 7 additions & 0 deletions R/GetDefaultCovariates.R
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,13 @@ getDbDefaultCovariateData <- function(connection,
andromedaTableName = "covariateRef",
snakeCaseToCamelCase = TRUE
)
collisions <- covariateData$covariateRef %>%
filter(collisions > 0) %>%
collect()
if (nrow(collisions) > 0) {
warning(sprintf("Collisions in covariate IDs detected for post-coordinated concepts with covariate IDs %s",
paste(collisions$covariateId, paste = ", ")))
}

# Analysis reference
sql <- SqlRender::translate(
Expand Down
113 changes: 113 additions & 0 deletions extras/TestHashForPostcoordinatedConcepts.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
# To compute covariate IDs for postcoordinated concepts (concept_id - value_as_concept_id pairs),
# we use a simple hashing function we implement in SQL. The resulting covariate ID uses 52 bits of
# precision, so will fit in an R numeric type without loss of precision.
#
# Below is some code evaluating how likely we are to have collisions in covariate IDs (the same
# covariate ID for different concept_id - value_as_concept_id pairs). Although collisions are
# unlikely, they may occur. In general we are not concerned, as most covariates are used for
# prediction or confounder adjustment, and this may simply lead to one covariate (out of tens
# of thousands) being less predictive.

# Check in JnJ network ---------------------------------------------------------
uniquePcCombos <- readRDS("extras/uniquePcCombos.rds")
hash1 <- function(value, bits) {
power <- 2^bits
return(bitwAnd(bitwXor(value, value / power), power-1))
}

hash2 <- function(value, bits) {
# Use Andromeda / SQLite for intermediate steps requiring 64-bit integers:
a <- Andromeda::andromeda(a = data.frame(value = as.integer(value)))
shift <- 2^(32-bits)
mask <- (2^bits) - 1
sql <- sprintf("SELECT CAST((2654435769 * value / %s) & %s AS INT) AS hash FROM a;", shift, mask)
hash <- RSQLite::dbGetQuery(a, sql)
return(hash$hash)
}


cid <- paste(hash1(uniquePcCombos$conceptId, 18), hash1(uniquePcCombos$valueAsConceptId, 21), uniquePcCombos$table)
sum(duplicated(cid))
# [1] 750
sum(duplicated(cid)) / nrow(uniquePcCombos)
# [1] 0.004121423

cid <- paste(hash2(uniquePcCombos$conceptId, 20), hash2(uniquePcCombos$valueAsConceptId, 22), uniquePcCombos$table)
sum(duplicated(cid))
# [1] 27
sum(duplicated(cid)) / nrow(uniquePcCombos)
# [1] 0.0001483712

cid <- hash2(uniquePcCombos$conceptId, 20) * 4194304000 + hash2(uniquePcCombos$valueAsConceptId, 22) * 1000 + as.integer(uniquePcCombos$table == "measurement")
sum(duplicated(cid))

# Find a duplicate for testing:
uniquePcCombos$cid <- cid
dups <- cid[duplicated(cid)]
dups <- uniquePcCombos[cid %in% dups, ]
dups <- dups[order(dups$cid), ]
dups[1:2, ]
# # A tibble: 2 x 4
# conceptId valueAsConceptId table cid
# <int> <int> <fct> <dbl>
# 1 3048564 4069590 measurement 7.41e14
# 2 40483078 4069590 measurement 7.41e14

# Demonstration of hash algorithm 1 in RSQLite ---------------------------------
connection <- DatabaseConnector::connect(dbms = "sqlite", server = ":memory:")

# For reference:
hash1(380844, 18) * 2^21 + hash1(2821462, 21)
# [1] 248934763863

# XOR not available in SQLite, but can implement using (a|b)-(a&b)
# 2^18 = 262144
# 2^21 = 2097152
sql <- "
SELECT (((a | a/262144) - (a & a/262144)) & 262143)*2097152 +
(((b | b/2097152) - (b & b/2097152)) & 2097151) AS covariate_id
FROM (
SELECT 380844 AS a,
2821462 AS b
) tmp;
"
DatabaseConnector::renderTranslateQuerySql(connection, sql)
# # COVARIATE_ID
# 1 248934763863

# OR not available in Oracle, but can be implemented using a + b - (a&b)
sql <- "
SELECT (((a + a/262144 - 2*(a & a/262144))) & 262143)*2097152 +
(((b + b/2097152 - 2*(b & b/2097152))) & 2097151) AS covariate_id
FROM (
SELECT 380844 AS a,
2821462 AS b
) tmp;
"
DatabaseConnector::renderTranslateQuerySql(connection, sql)
# # COVARIATE_ID
# 1 248934763863


DatabaseConnector::disconnect(connection)

# Demonstration of hash algorithm 2 in RSQLite ---------------------------------
connection <- DatabaseConnector::connect(dbms = "sqlite", server = ":memory:")

# For reference:
format(hash2(380844, 20) * 2^22 + hash2(2821462, 22), scientific = FALSE)
# [1] 2358966384914

sql <- "
SELECT ((2654435769 * a / 4096) & 1048575)*4194304 +
((2654435769 * b / 1024) & 4194303) AS covariate_id
FROM (
SELECT 380844 AS a,
2821462 AS b
) tmp;
"
format(DatabaseConnector::renderTranslateQuerySql(connection, sql)[1, 1], scientific = FALSE)
# # COVARIATE_ID
# 1 2358966384914

DatabaseConnector::disconnect(connection)
Binary file added extras/uniquePcCombos.rds
Binary file not shown.
Loading