Skip to content

Commit

Permalink
Adding unit tests for nesting cohorts. Fixing some issues with multip…
Browse files Browse the repository at this point in the history
…le nesting entries per person.
  • Loading branch information
Admin_mschuemi authored and Admin_mschuemi committed Apr 11, 2023
1 parent 1591861 commit cb65923
Show file tree
Hide file tree
Showing 5 changed files with 150 additions and 13 deletions.
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ Bugfix

2. Not ignoring 'allowRegularization' in calendar time settings.

3. When person has multiple nesting cohort entries, prevent double-counting of cases in attrition table if nesting cohort entries have no outcome.


SelfControlledCaseSeries 4.1.0
==============================
Expand Down
14 changes: 8 additions & 6 deletions R/DataLoadingSaving.R
Original file line number Diff line number Diff line change
Expand Up @@ -287,6 +287,7 @@ getDbSccsData <- function(connectionDetails,
max_cases_per_outcome = maxCasesPerOutcome
)
DatabaseConnector::executeSql(conn, sql)

}
}

Expand Down Expand Up @@ -316,12 +317,13 @@ getDbSccsData <- function(connectionDetails,

message("Fetching data from server")
sccsData <- Andromeda::andromeda()
sql <- SqlRender::loadRenderTranslateSql("QueryCases.sql",
packageName = "SelfControlledCaseSeries",
dbms = connectionDetails$dbms,
tempEmulationSchema = tempEmulationSchema,
sampled_cases = sampledCases
)
sql <- SqlRender::loadRenderTranslateSql(
"QueryCases.sql",
packageName = "SelfControlledCaseSeries",
dbms = connectionDetails$dbms,
tempEmulationSchema = tempEmulationSchema,
sampled_cases = sampledCases
)
DatabaseConnector::querySqlToAndromeda(
connection = conn,
sql = sql,
Expand Down
24 changes: 18 additions & 6 deletions inst/sql/sql_server/CreateCases.sql
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,9 @@ FROM (
observation_period_id,
observation_period_start_date,
observation_period_end_date,
{@use_nesting_cohort} ? {
nesting_cohort_start_date,
}
{@study_start_date == '' } ? {
observation_period_start_date AS start_date,
} : {
Expand All @@ -76,31 +79,40 @@ FROM (
ELSE noninformative_end_censor
END AS noninformative_end_censor
}

FROM (
SELECT person_id,
observation_period_id,
observation_period.observation_period_id,
observation_period_start_date,
observation_period_end_date,
{@use_nesting_cohort} ? {
nesting_cohort_start_date,
}
CASE WHEN observation_period_end_date = (SELECT MAX(observation_period_end_date) FROM @cdm_database_schema.observation_period)
THEN CAST(1 AS INT)
ELSE CAST(0 AS INT)
END AS noninformative_end_censor
FROM @cdm_database_schema.observation_period
WHERE observation_period_id IN (
INNER JOIN (
SELECT DISTINCT observation_period_id
{@use_nesting_cohort} ? {
FROM #outcomes_in_nesting
, nesting_cohort_start_date
FROM #outcomes_in_nesting
} : { {@study_start_date != '' & @study_end_date != ''} ? {
FROM #outcomes_in_period
FROM #outcomes_in_period
} : {
FROM #outcomes
FROM #outcomes
}}
)
) outcomes
ON observation_period.observation_period_id = outcomes.observation_period_id
) observation_period
{@use_nesting_cohort} ? {
) temp
INNER JOIN @nesting_cohort_database_schema.@nesting_cohort_table nesting
ON temp.person_id = nesting.subject_id
AND temp.nesting_cohort_start_date = nesting.cohort_start_date
AND temp.observation_period_start_date <= nesting.cohort_start_date
AND temp.observation_period_end_date >= nesting.cohort_start_date
WHERE nesting.cohort_definition_id = @nesting_cohort_id
}
) observation_period
Expand Down
3 changes: 2 additions & 1 deletion inst/sql/sql_server/SelectOutcomes.sql
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,8 @@ SELECT outcome_id,
observation_period_id,
observed_days,
person_id,
outcome_date
outcome_date,
cohort_start_date AS nesting_cohort_start_date
INTO #outcomes_in_nesting
{@study_start_date != '' & @study_end_date != ''} ? {
FROM #outcomes_in_period outcomes
Expand Down
120 changes: 120 additions & 0 deletions tests/testthat/test-nesting.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
library(SelfControlledCaseSeries)
library(testthat)
library(dplyr)

connectionDetails <- Eunomia::getEunomiaConnectionDetails()

# Setup data
connection <- DatabaseConnector::connect(connectionDetails)
person <- data.frame(
personId = c(1, 2, 3),
genderConceptId = c(8507, 8507, 8507),
yearOfBirth = c(1975, 1975, 1975),
monthOfBirth = c(8, 8, 8),
dayOfBirth = c(5, 5, 5)
)
observationPeriods <- data.frame(
observationPeriodId = c(1, 2, 3 ,4),
personId = c(1, 2, 3, 3),
observationPeriodStartDate = as.Date(c("2000-01-01", "2000-01-01", "2000-01-01", "2002-01-01")),
observationPeriodEndDate = as.Date(c("2000-12-31", "2000-12-31", "2000-12-31", "2002-12-31"))
)
nestingCohort <- data.frame(
cohortDefinitionId = c(1, 1, 1, 1),
subjectId = c(1, 1, 2, 3),
cohortStartDate = as.Date(c("2000-07-01", "2000-11-01", "2000-07-01", "2002-07-01")),
cohortEndDate = as.Date(c("2000-09-30", "2000-12-31", "2000-11-30", "2002-12-31"))
)
outcomeCohort <- data.frame(
cohortDefinitionId = c(2, 2, 2, 2),
subjectId = c(1, 1, 2, 3),
cohortStartDate = as.Date(c("2000-08-01", "2000-12-01", "2000-08-01", "2002-08-01")),
cohortEndDate = as.Date(c("2000-08-01", "2000-128-01", "2000-08-01", "2002-08-01"))
)
exposureCohort <- data.frame(
cohortDefinitionId = c(3, 3, 3, 3),
subjectId = c(1, 2, 3, 3),
cohortStartDate = as.Date(c("2000-08-01", "2000-06-01", "2000-08-01", "2002-08-01")),
cohortEndDate = as.Date(c("2000-08-15", "2000-08-01", "2000-08-15", "2002-08-15"))
)
cohort <- rbind(nestingCohort, outcomeCohort, exposureCohort)
DatabaseConnector::insertTable(
connection = connection,
databaseSchema = "main",
tableName = "person",
data = person,
dropTableIfExists = TRUE,
createTable = TRUE,
camelCaseToSnakeCase = TRUE
)
DatabaseConnector::insertTable(
connection = connection,
databaseSchema = "main",
tableName = "observation_period",
data = observationPeriods,
dropTableIfExists = TRUE,
createTable = TRUE,
camelCaseToSnakeCase = TRUE
)
DatabaseConnector::insertTable(
connection = connection,
databaseSchema = "main",
tableName = "cohort",
data = cohort,
dropTableIfExists = TRUE,
createTable = TRUE,
camelCaseToSnakeCase = TRUE
)
DatabaseConnector::disconnect(connection)

sccsData <- getDbSccsData(
connectionDetails = connectionDetails,
cdmDatabaseSchema = "main",
outcomeDatabaseSchema = "main",
outcomeTable = "cohort",
outcomeIds = 2,
exposureDatabaseSchema = "main",
exposureTable = "cohort",
exposureIds = 3,
nestingCohortDatabaseSchema = "main",
nestingCohortTable = "cohort",
nestingCohortId = 1,
useNestingCohort = TRUE
)

test_that("getDbSccsData correctly handles nesting", {
cases <- sccsData$cases %>%
collect()
expect_equal(cases$observationPeriodId, c("1.0", "1.0", "2.0", "4.0"))
expect_equal(cases$personId, c("1.0", "1.0", "2.0", "3.0"))
expect_equal(cases$caseId, c(1, 2, 3, 4))
expect_equal(cases$startYear, c(2000, 2000, 2000, 2002))
expect_equal(cases$startMonth , c(7, 11, 7, 7))
expect_equal(cases$startDay , c(1, 1, 1, 1))

hois <- sccsData$eras %>%
filter(eraType == "hoi") %>%
arrange(caseId, startDay) %>%
collect()
expect_equal(hois$caseId, c(1, 1, 2, 2, 3, 4))
expect_equal(hois$startDay , c(31, 153, -92, 30, 31, 31))
})

studyPop <- createStudyPopulation(
sccsData = sccsData,
outcomeId = 2,
firstOutcomeOnly = FALSE
)
sccsIntervalData <- createSccsIntervalData(
studyPopulation = studyPop,
sccsData = sccsData,
eraCovariateSettings = createEraCovariateSettings(includeEraIds = 3)
)
test_that("sccsIntervalData correctly handles nesting", {
outcomes <- sccsIntervalData$outcomes %>%
collect()
# StratumId = caseId
expect_equal(outcomes$stratumId, c(1, 1, 3, 3, 4, 4))
expect_equal(outcomes$time, c(77,15, 121, 32, 169, 15))
expect_equal(outcomes$y, c(0, 1, 0, 1, 0 ,1))
})

0 comments on commit cb65923

Please sign in to comment.