From 734803fba26d502149edb70e477007c68e03004d Mon Sep 17 00:00:00 2001 From: Mikhail-iontsev Date: Fri, 20 Oct 2023 23:56:24 +0400 Subject: [PATCH 1/7] feat: added support for duckdb in exportToAres --- R/exportToAres.R | 2713 ++++++++++++++++++++++++---------------------- 1 file changed, 1446 insertions(+), 1267 deletions(-) diff --git a/R/exportToAres.R b/R/exportToAres.R index 061085b7..9348cd66 100644 --- a/R/exportToAres.R +++ b/R/exportToAres.R @@ -1,14 +1,91 @@ normalizeEmptyValue <- function(x) { - if (is.null(x) || is.na(x) || "NA" == x || "NULL" == x) { + if (is.null(x) || + is.na(x) || + "NA" == x || + "NULL" == x) { character() } else { x } } +createConceptMedatataTable <- function(report, concept_id, domain) { + df <- data.frame(CONCEPT_ID = concept_id, + CONCEPT_NAME = report$CONCEPT_NAME, + DOMAIN = domain, + NUM_PERSONS = report$NUM_PERSONS, + PERCENT_PERSONS = report$PERCENT_PERSONS, + RECORDS_PER_PERSON = report$RECORDS_PER_PERSON + ) + return(df) +} + +createConceptDataTable <- function(table, concept_id, domain) { + df <- data.frame(table) + df['CONCEPT_ID'] = concept_id + df['DOMAIN'] = domain + return(df) +} + +writeReportToTable <- function(duckdbCon, report, tableName, schema) { + + if (nrow(report) > 0) { + dbWriteTable(duckdbCon, DBI::Id(schema = schema, table = tableName), report, append = TRUE) + } +} + +exportDataToDuckDB <- function(data, duckdbCon = NULL, tableNames = NULL, concept_id = NULL, domain = NULL, schema = NULL) { + if (!is.null(duckdbCon) && + !is.null(tableNames) && + !is.null(concept_id)) { + if (length(data) != length(tableNames)) { + cat("Number of reports and tableNames should match.\n") + return() + } + for (i in seq_along(data)) { + if (nrow(data[[i]]) > 0) { + writeReportToTable(duckdbCon, createConceptDataTable(data[[i]], concept_id, domain), tableNames[[i]], schema) + } + } + } else { + cat("Missing required parameters for DuckDB export.\n") + } +} + +processAndExportConceptData <- function(concept_id, duckdbCon, reports, outputPath, outputFormat, columnsToNormalize, columnsToConvertToDataFrame, dir, domain, schema) { + report <- reports[reports$CONCEPT_ID == concept_id,] + report <- as.list(report) + + tableNames <- lapply(columnsToConvertToDataFrame, tolower) + + #Normalize the specified columns + for (col in columnsToNormalize) { + report[[col]] <- normalizeEmptyValue(report[[col]]) + } + + # Convert specified columns to data frames + for (col in columnsToConvertToDataFrame) { + report[[col]] <- as.data.frame(report[[col]]) + } + + + if (outputFormat == "json") { + dir.create(paste0(outputPath, dir), recursive = T, showWarnings = F) + filename <- paste(outputPath, dir, "/concept_", report$CONCEPT_ID, ".json", sep = '') + write(jsonlite::toJSON(report), filename) + + } + else if (outputFormat == "duckdb") { + metadata <- createConceptMedatataTable(report, concept_id, domain) + dbWriteTable(duckdbCon, DBI::Id(schema = schema, table = "concept_metadata"), metadata, append = TRUE) + tableList <- lapply(columnsToConvertToDataFrame, function(col) report[[col]]) + exportDataToDuckDB(tableList, duckdbCon, tableNames, concept_id, domain, schema) + } +} + + generateAOProcedureReports <- function(connectionDetails, proceduresData, cdmDatabaseSchema, resultsDatabaseSchema, vocabDatabaseSchema, outputPath) { - writeLines("Generating procedure reports") queryPrevalenceByGenderAgeYear <- SqlRender::loadRenderTranslateSql( sqlFilename = "export/procedure/sqlPrevalenceByGenderAgeYear.sql", packageName = "Achilles", @@ -16,7 +93,7 @@ generateAOProcedureReports <- function(connectionDetails, proceduresData, cdmDat results_database_schema = resultsDatabaseSchema, vocab_database_schema = vocabDatabaseSchema ) - + queryPrevalenceByMonth <- SqlRender::loadRenderTranslateSql( sqlFilename = "export/procedure/sqlPrevalenceByMonth.sql", packageName = "Achilles", @@ -24,15 +101,15 @@ generateAOProcedureReports <- function(connectionDetails, proceduresData, cdmDat results_database_schema = resultsDatabaseSchema, vocab_database_schema = vocabDatabaseSchema ) - + queryProcedureFrequencyDistribution <- SqlRender::loadRenderTranslateSql( - sqlFilename = "export/procedure/sqlFrequencyDistribution.sql", + sqlFilename = "export/procedure/sqlFrequencyDistribution.sql", packageName = "Achilles", dbms = connectionDetails$dbms, results_database_schema = resultsDatabaseSchema, vocab_database_schema = vocabDatabaseSchema ) - + queryProceduresByType <- SqlRender::loadRenderTranslateSql( sqlFilename = "export/procedure/sqlProceduresByType.sql", packageName = "Achilles", @@ -40,7 +117,7 @@ generateAOProcedureReports <- function(connectionDetails, proceduresData, cdmDat results_database_schema = resultsDatabaseSchema, vocab_database_schema = vocabDatabaseSchema ) - + queryAgeAtFirstOccurrence <- SqlRender::loadRenderTranslateSql( sqlFilename = "export/procedure/sqlAgeAtFirstOccurrence.sql", packageName = "Achilles", @@ -48,14 +125,13 @@ generateAOProcedureReports <- function(connectionDetails, proceduresData, cdmDat results_database_schema = resultsDatabaseSchema, vocab_database_schema = vocabDatabaseSchema ) - conn <- DatabaseConnector::connect(connectionDetails) - on.exit(DatabaseConnector::disconnect(connection = conn)) - dataPrevalenceByGenderAgeYear <- DatabaseConnector::querySql(conn,queryPrevalenceByGenderAgeYear) - dataPrevalenceByMonth <- DatabaseConnector::querySql(conn,queryPrevalenceByMonth) - dataProceduresByType <- DatabaseConnector::querySql(conn,queryProceduresByType) - dataAgeAtFirstOccurrence <- DatabaseConnector::querySql(conn,queryAgeAtFirstOccurrence) - dataProcedureFrequencyDistribution <- DatabaseConnector::querySql(conn,queryProcedureFrequencyDistribution) + on.exit(DatabaseConnector::disconnect(connection = conn)) + dataPrevalenceByGenderAgeYear <- DatabaseConnector::querySql(conn, queryPrevalenceByGenderAgeYear) + dataPrevalenceByMonth <- DatabaseConnector::querySql(conn, queryPrevalenceByMonth) + dataProceduresByType <- DatabaseConnector::querySql(conn, queryProceduresByType) + dataAgeAtFirstOccurrence <- DatabaseConnector::querySql(conn, queryAgeAtFirstOccurrence) + dataProcedureFrequencyDistribution <- DatabaseConnector::querySql(conn, queryProcedureFrequencyDistribution) if (nrow(proceduresData) == 0) { return() @@ -66,85 +142,61 @@ generateAOProcedureReports <- function(connectionDetails, proceduresData, cdmDat ) reports <- uniqueConcepts %>% - dplyr::left_join( - proceduresData, - by = c("CONCEPT_ID" = "CONCEPT_ID") - ) %>% - dplyr::select("CONCEPT_ID", "CONCEPT_NAME", "CDM_TABLE_NAME", "NUM_PERSONS", "PERCENT_PERSONS", "RECORDS_PER_PERSON") %>% - dplyr::left_join( - ( - dataPrevalenceByGenderAgeYear %>% - dplyr::select(c(1,3,4,5,6)) %>% - tidyr::nest(PREVALENCE_BY_GENDER_AGE_YEAR = c(-1)) - ), - by = c("CONCEPT_ID" = "CONCEPT_ID") - ) %>% - dplyr::left_join( - ( - dataPrevalenceByMonth %>% - dplyr::select(c(1,3,4)) %>% - tidyr::nest(PREVALENCE_BY_MONTH = c(-1)) - ), - by = c("CONCEPT_ID" = "CONCEPT_ID") - ) %>% - dplyr::left_join( - ( - dataProcedureFrequencyDistribution %>% - dplyr::select(c(1,3,4)) %>% - tidyr::nest(PROCEDURE_FREQUENCY_DISTRIBUTION = c(-1)) - ), - by = c("CONCEPT_ID" = "CONCEPT_ID") - ) %>% - dplyr::left_join( - ( - dataProceduresByType %>% - dplyr::select(c(1,4,5)) %>% - tidyr::nest(PROCEDURES_BY_TYPE = c(-1)) - ), - by = c("CONCEPT_ID" = "PROCEDURE_CONCEPT_ID") - ) %>% - dplyr::left_join( - ( - dataAgeAtFirstOccurrence %>% - dplyr::select(c(1,2,3,4,5,6,7,8,9)) %>% - tidyr::nest(AGE_AT_FIRST_OCCURRENCE = c(-1)) - ), - by = c("CONCEPT_ID" = "CONCEPT_ID") - ) %>% - dplyr::collect() - - dir.create(paste0(outputPath,"/concepts/procedure_occurrence"),recursive=T,showWarnings = F) - x <- lapply( - uniqueConcepts$CONCEPT_ID, - function(concept_id, outputPath, reports) { - report <- reports[reports$CONCEPT_ID == concept_id, ] - report <- as.list(report) - - report$CONCEPT_NAME <- normalizeEmptyValue(report$CONCEPT_NAME) - report$NUM_PERSONS <- normalizeEmptyValue(report$NUM_PERSONS) - report$PERCENT_PERSONS <- normalizeEmptyValue(report$PERCENT_PERSONS) - report$RECORDS_PER_PERSON <- normalizeEmptyValue(report$RECORDS_PER_PERSON) - - report$PREVALENCE_BY_GENDER_AGE_YEAR <- as.data.frame(report$PREVALENCE_BY_GENDER_AGE_YEAR) - report$PREVALENCE_BY_MONTH <- as.data.frame(report$PREVALENCE_BY_MONTH) - report$PROCEDURE_FREQUENCY_DISTRIBUTION <- as.data.frame(report$PROCEDURE_FREQUENCY_DISTRIBUTION) - report$PROCEDURES_BY_TYPE <- as.data.frame(report$PROCEDURES_BY_TYPE) - report$AGE_AT_FIRST_OCCURRENCE <- as.data.frame(report$AGE_AT_FIRST_OCCURRENCE) - - filename <- paste(outputPath, "/concepts/procedure_occurrence/concept_" , report$CONCEPT_ID , ".json", sep='') - write(jsonlite::toJSON(report), filename) - }, - outputPath, - reports - ) + dplyr::left_join( + proceduresData, + by = c("CONCEPT_ID" = "CONCEPT_ID") + ) %>% + dplyr::select("CONCEPT_ID", "CONCEPT_NAME", "CDM_TABLE_NAME", "NUM_PERSONS", "PERCENT_PERSONS", "RECORDS_PER_PERSON") %>% + dplyr::left_join( + ( + dataPrevalenceByGenderAgeYear %>% + dplyr::select(c(1, 3, 4, 5, 6)) %>% + tidyr::nest(PREVALENCE_BY_GENDER_AGE_YEAR = c(-1)) + ), + by = c("CONCEPT_ID" = "CONCEPT_ID") + ) %>% + dplyr::left_join( + ( + dataPrevalenceByMonth %>% + dplyr::select(c(1, 3, 4)) %>% + tidyr::nest(PREVALENCE_BY_MONTH = c(-1)) + ), + by = c("CONCEPT_ID" = "CONCEPT_ID") + ) %>% + dplyr::left_join( + ( + dataProcedureFrequencyDistribution %>% + dplyr::select(c(1, 3, 4)) %>% + tidyr::nest(PROCEDURE_FREQUENCY_DISTRIBUTION = c(-1)) + ), + by = c("CONCEPT_ID" = "CONCEPT_ID") + ) %>% + dplyr::left_join( + ( + dataProceduresByType %>% + dplyr::select(c(1, 4, 5)) %>% + tidyr::nest(PROCEDURES_BY_TYPE = c(-1)) + ), + by = c("CONCEPT_ID" = "PROCEDURE_CONCEPT_ID") + ) %>% + dplyr::left_join( + ( + dataAgeAtFirstOccurrence %>% + dplyr::select(c(1, 2, 3, 4, 5, 6, 7, 8, 9)) %>% + tidyr::nest(AGE_AT_FIRST_OCCURRENCE = c(-1)) + ), + by = c("CONCEPT_ID" = "CONCEPT_ID") + ) %>% + dplyr::collect() + + return(list("reports" = reports, "uniqueConcepts" = uniqueConcepts)) } generateAOPersonReport <- function(connectionDetails, cdmDatabaseSchema, resultsDatabaseSchema, vocabDatabaseSchema, outputPath) { - writeLines("Generating person report") - output = {} + output = { } conn <- DatabaseConnector::connect(connectionDetails) - on.exit(DatabaseConnector::disconnect(connection = conn)) + on.exit(DatabaseConnector::disconnect(connection = conn)) renderedSql <- SqlRender::loadRenderTranslateSql( sqlFilename = "export/person/population.sql", packageName = "Achilles", @@ -154,10 +206,10 @@ generateAOPersonReport <- function(connectionDetails, cdmDatabaseSchema, results results_database_schema = resultsDatabaseSchema, vocab_database_schema = vocabDatabaseSchema ) - - personSummaryData <- DatabaseConnector::querySql(conn,renderedSql) + + personSummaryData <- DatabaseConnector::querySql(conn, renderedSql) output$SUMMARY = personSummaryData - + renderedSql <- SqlRender::loadRenderTranslateSql( sqlFilename = "export/person/population_age_gender.sql", packageName = "Achilles", @@ -167,7 +219,7 @@ generateAOPersonReport <- function(connectionDetails, cdmDatabaseSchema, results results_database_schema = resultsDatabaseSchema, vocab_database_schema = vocabDatabaseSchema ) - ageGenderData <- DatabaseConnector::querySql(conn,renderedSql) + ageGenderData <- DatabaseConnector::querySql(conn, renderedSql) output$AGE_GENDER_DATA = ageGenderData renderedSql <- SqlRender::loadRenderTranslateSql( @@ -179,7 +231,7 @@ generateAOPersonReport <- function(connectionDetails, cdmDatabaseSchema, results results_database_schema = resultsDatabaseSchema, vocab_database_schema = vocabDatabaseSchema ) - genderData <- DatabaseConnector::querySql(conn,renderedSql) + genderData <- DatabaseConnector::querySql(conn, renderedSql) output$GENDER_DATA = genderData renderedSql <- SqlRender::loadRenderTranslateSql( @@ -191,7 +243,7 @@ generateAOPersonReport <- function(connectionDetails, cdmDatabaseSchema, results results_database_schema = resultsDatabaseSchema, vocab_database_schema = vocabDatabaseSchema ) - raceData <- DatabaseConnector::querySql(conn,renderedSql) + raceData <- DatabaseConnector::querySql(conn, renderedSql) output$RACE_DATA = raceData renderedSql <- SqlRender::loadRenderTranslateSql( @@ -203,29 +255,26 @@ generateAOPersonReport <- function(connectionDetails, cdmDatabaseSchema, results results_database_schema = resultsDatabaseSchema, vocab_database_schema = vocabDatabaseSchema ) - ethnicityData <- DatabaseConnector::querySql(conn,renderedSql) + ethnicityData <- DatabaseConnector::querySql(conn, renderedSql) output$ETHNICITY_DATA = ethnicityData - + renderedSql <- SqlRender::loadRenderTranslateSql( sqlFilename = "export/person/yearofbirth.sql", packageName = "Achilles", dbms = connectionDetails$dbms, - warnOnMissingParameters = FALSE, - cdm_database_schema = cdmDatabaseSchema, - results_database_schema = resultsDatabaseSchema, - vocab_database_schema = vocabDatabaseSchema + warnOnMissingParameters = FALSE, + cdm_database_schema = cdmDatabaseSchema, + results_database_schema = resultsDatabaseSchema, + vocab_database_schema = vocabDatabaseSchema ) - birthYearData <- DatabaseConnector::querySql(conn,renderedSql) + birthYearData <- DatabaseConnector::querySql(conn, renderedSql) output$BIRTH_YEAR_DATA <- birthYearData - - jsonOutput = jsonlite::toJSON(output) - write(jsonOutput, file=paste(outputPath, "/person.json", sep="")) + return(output) } -generateAOAchillesPerformanceReport <- function(connectionDetails, cdmDatabaseSchema, resultsDatabaseSchema, vocabDatabaseSchema, outputPath) +generateAOAchillesPerformanceReport <- function(connection, cdmDatabaseSchema, resultsDatabaseSchema, vocabDatabaseSchema, outputPath) { - writeLines("Generating achilles performance report") queryAchillesPerformance <- SqlRender::loadRenderTranslateSql(sqlFilename = "export/performance/sqlAchillesPerformance.sql", packageName = "Achilles", @@ -234,19 +283,16 @@ generateAOAchillesPerformanceReport <- function(connectionDetails, cdmDatabaseSc cdm_database_schema = cdmDatabaseSchema, results_database_schema = resultsDatabaseSchema, vocab_database_schema = vocabDatabaseSchema - ) - - conn <- DatabaseConnector::connect(connectionDetails) - on.exit(DatabaseConnector::disconnect(connection = conn)) - dataPerformance <- DatabaseConnector::querySql(conn,queryAchillesPerformance) - names(dataPerformance) <- c("analysis_id", "analysis_name","category", "elapsed_seconds") - dataPerformance$elapsed_seconds <- format(round(as.numeric(dataPerformance$elapsed_seconds),digits = 2),nsmall = 2) - data.table::fwrite(dataPerformance, file.path(outputPath, "achilles-performance.csv")) + ) + + dataPerformance <- DatabaseConnector::querySql(connection, queryAchillesPerformance) + names(dataPerformance) <- c("analysis_id", "analysis_name", "category", "elapsed_seconds") + dataPerformance$elapsed_seconds <- format(round(as.numeric(dataPerformance$elapsed_seconds), digits = 2), nsmall = 2) + return(dataPerformance) } -generateAODeathReport <- function(connectionDetails, cdmDatabaseSchema, resultsDatabaseSchema, vocabDatabaseSchema, outputPath) +generateAODeathReport <- function(connection, cdmDatabaseSchema, resultsDatabaseSchema, vocabDatabaseSchema, outputPath) { - writeLines("Generating death report") queryPrevalenceByGenderAgeYear <- SqlRender::loadRenderTranslateSql( sqlFilename = "export/death/sqlPrevalenceByGenderAgeYear.sql", @@ -254,14 +300,14 @@ generateAODeathReport <- function(connectionDetails, cdmDatabaseSchema, resultsD dbms = connectionDetails$dbms, results_database_schema = resultsDatabaseSchema, vocab_database_schema = vocabDatabaseSchema - ) - + ) + queryPrevalenceByMonth <- SqlRender::loadRenderTranslateSql( sqlFilename = "export/death/sqlPrevalenceByMonth.sql", packageName = "Achilles", dbms = connectionDetails$dbms, results_database_schema = resultsDatabaseSchema - ) + ) queryDeathByType <- SqlRender::loadRenderTranslateSql( sqlFilename = "export/death/sqlDeathByType.sql", @@ -270,47 +316,39 @@ generateAODeathReport <- function(connectionDetails, cdmDatabaseSchema, resultsD results_database_schema = resultsDatabaseSchema, vocab_database_schema = vocabDatabaseSchema ) - + queryAgeAtDeath <- SqlRender::loadRenderTranslateSql( sqlFilename = "export/death/sqlAgeAtDeath.sql", packageName = "Achilles", dbms = connectionDetails$dbms, results_database_schema = resultsDatabaseSchema, vocab_database_schema = vocabDatabaseSchema - ) - - conn <- DatabaseConnector::connect(connectionDetails) - on.exit(DatabaseConnector::disconnect(connection = conn)) - deathByTypeData <- DatabaseConnector::querySql(conn,queryDeathByType) - prevalenceByGenderAgeYearData <- DatabaseConnector::querySql(conn,queryPrevalenceByGenderAgeYear) - prevalenceByMonthData <- DatabaseConnector::querySql(conn,queryPrevalenceByMonth) - ageAtDeathData <- DatabaseConnector::querySql(conn,queryAgeAtDeath) - - output = {} - output$PREVALENCE_BY_GENDER_AGE_YEAR = prevalenceByGenderAgeYearData + ) + deathByTypeData <- DatabaseConnector::querySql(connection, queryDeathByType) + prevalenceByGenderAgeYearData <- DatabaseConnector::querySql(connection, queryPrevalenceByGenderAgeYear) + prevalenceByMonthData <- DatabaseConnector::querySql(connection, queryPrevalenceByMonth) + ageAtDeathData <- DatabaseConnector::querySql(connection, queryAgeAtDeath) + + output = { } + output$PREVALENCE_BY_GENDER_AGE_YEAR = prevalenceByGenderAgeYearData output$PREVALENCE_BY_MONTH = prevalenceByMonthData - output$DEATH_BY_TYPE = deathByTypeData + output$DEATH_BY_TYPE = deathByTypeData output$AGE_AT_DEATH = ageAtDeathData - - filename <- file.path(outputPath, "death.json") - write(jsonlite::toJSON(output),filename) + return(output) } -generateAOObservationPeriodReport <- function(connectionDetails, cdmDatabaseSchema, resultsDatabaseSchema, vocabDatabaseSchema, outputPath) +generateAOObservationPeriodReport <- function(connection, cdmDatabaseSchema, resultsDatabaseSchema, vocabDatabaseSchema, outputPath) { - writeLines("Generating observation period reports") - conn <- DatabaseConnector::connect(connectionDetails) - on.exit(DatabaseConnector::disconnect(connection = conn)) - output = {} + output = { } renderedSql <- SqlRender::loadRenderTranslateSql( sqlFilename = "export/observationperiod/ageatfirst.sql", packageName = "Achilles", dbms = connectionDetails$dbms, results_database_schema = resultsDatabaseSchema ) - ageAtFirstObservationData <- DatabaseConnector::querySql(conn,renderedSql) + ageAtFirstObservationData <- DatabaseConnector::querySql(connection, renderedSql) output$AGE_AT_FIRST_OBSERVATION <- ageAtFirstObservationData - + renderedSql <- SqlRender::loadRenderTranslateSql( sqlFilename = "export/observationperiod/agebygender.sql", packageName = "Achilles", @@ -318,29 +356,29 @@ generateAOObservationPeriodReport <- function(connectionDetails, cdmDatabaseSche results_database_schema = resultsDatabaseSchema, vocab_database_schema = vocabDatabaseSchema ) - ageByGenderData <- DatabaseConnector::querySql(conn,renderedSql) + ageByGenderData <- DatabaseConnector::querySql(connection, renderedSql) output$AGE_BY_GENDER = ageByGenderData - observationLengthHist <- {} + observationLengthHist <- { } renderedSql <- SqlRender::loadRenderTranslateSql( sqlFilename = "export/observationperiod/observationlength_stats.sql", packageName = "Achilles", dbms = connectionDetails$dbms, results_database_schema = resultsDatabaseSchema ) - observationLengthStats <- DatabaseConnector::querySql(conn,renderedSql) + observationLengthStats <- DatabaseConnector::querySql(connection, renderedSql) observationLengthHist$MIN = observationLengthStats$MIN_VALUE observationLengthHist$MAX = observationLengthStats$MAX_VALUE observationLengthHist$INTERVAL_SIZE = observationLengthStats$INTERVAL_SIZE observationLengthHist$INTERVALS = (observationLengthStats$MAX_VALUE - observationLengthStats$MIN_VALUE) / observationLengthStats$INTERVAL_SIZE - + renderedSql <- SqlRender::loadRenderTranslateSql( sqlFilename = "export/observationperiod/observationlength_data.sql", packageName = "Achilles", dbms = connectionDetails$dbms, results_database_schema = resultsDatabaseSchema ) - observationLengthData <- DatabaseConnector::querySql(conn,renderedSql) + observationLengthData <- DatabaseConnector::querySql(connection, renderedSql) output$OBSERVATION_LENGTH_HISTOGRAM = observationLengthHist renderedSql <- SqlRender::loadRenderTranslateSql( @@ -349,10 +387,10 @@ generateAOObservationPeriodReport <- function(connectionDetails, cdmDatabaseSche dbms = connectionDetails$dbms, results_database_schema = resultsDatabaseSchema ) - cumulativeDurationData <- DatabaseConnector::querySql(conn,renderedSql) + cumulativeDurationData <- DatabaseConnector::querySql(connection, renderedSql) cumulativeDurationData$X_LENGTH_OF_OBSERVATION <- cumulativeDurationData$X_LENGTH_OF_OBSERVATION / 365.25 cumulativeDurationData$SERIES_NAME <- NULL - names(cumulativeDurationData) <- c("YEARS","PERCENT_PEOPLE") + names(cumulativeDurationData) <- c("YEARS", "PERCENT_PEOPLE") output$CUMULATIVE_DURATION = cumulativeDurationData renderedSql <- SqlRender::loadRenderTranslateSql( @@ -362,7 +400,7 @@ generateAOObservationPeriodReport <- function(connectionDetails, cdmDatabaseSche results_database_schema = resultsDatabaseSchema, vocab_database_schema = vocabDatabaseSchema ) - opLengthByGenderData <- DatabaseConnector::querySql(conn,renderedSql) + opLengthByGenderData <- DatabaseConnector::querySql(connection, renderedSql) opLengthByGenderData$MIN_VALUE <- opLengthByGenderData$MIN_VALUE / 365.25 opLengthByGenderData$P10_VALUE <- opLengthByGenderData$P10_VALUE / 365.25 opLengthByGenderData$P25_VALUE <- opLengthByGenderData$P25_VALUE / 365.25 @@ -370,7 +408,7 @@ generateAOObservationPeriodReport <- function(connectionDetails, cdmDatabaseSche opLengthByGenderData$P75_VALUE <- opLengthByGenderData$P75_VALUE / 365.25 opLengthByGenderData$P90_VALUE <- opLengthByGenderData$P90_VALUE / 365.25 opLengthByGenderData$MAX_VALUE <- opLengthByGenderData$MAX_VALUE / 365.25 - + output$OBSERVATION_PERIOD_LENGTH_BY_GENDER = opLengthByGenderData renderedSql <- SqlRender::loadRenderTranslateSql( @@ -379,7 +417,7 @@ generateAOObservationPeriodReport <- function(connectionDetails, cdmDatabaseSche dbms = connectionDetails$dbms, results_database_schema = resultsDatabaseSchema ) - opLengthByAgeData <- DatabaseConnector::querySql(conn,renderedSql) + opLengthByAgeData <- DatabaseConnector::querySql(connection, renderedSql) opLengthByAgeData$MIN_VALUE <- opLengthByAgeData$MIN_VALUE / 365.25 opLengthByAgeData$P10_VALUE <- opLengthByAgeData$P10_VALUE / 365.25 opLengthByAgeData$P25_VALUE <- opLengthByAgeData$P25_VALUE / 365.25 @@ -389,37 +427,37 @@ generateAOObservationPeriodReport <- function(connectionDetails, cdmDatabaseSche opLengthByAgeData$MAX_VALUE <- opLengthByAgeData$MAX_VALUE / 365.25 output$OBSERVATION_PERIOD_LENGTH_BY_AGE = opLengthByAgeData - observedByYearHist <- {} + observedByYearHist <- { } renderedSql <- SqlRender::loadRenderTranslateSql( sqlFilename = "export/observationperiod/observedbyyear_stats.sql", packageName = "Achilles", dbms = connectionDetails$dbms, results_database_schema = resultsDatabaseSchema ) - observedByYearStats <- DatabaseConnector::querySql(conn,renderedSql) + observedByYearStats <- DatabaseConnector::querySql(connection, renderedSql) observedByYearHist$MIN = observedByYearStats$MIN_VALUE observedByYearHist$MAX = observedByYearStats$MAX_VALUE observedByYearHist$INTERVAL_SIZE = observedByYearStats$INTERVAL_SIZE observedByYearHist$INTERVALS = (observedByYearStats$MAX_VALUE - observedByYearStats$MIN_VALUE) / observedByYearStats$INTERVAL_SIZE - + renderedSql <- SqlRender::loadRenderTranslateSql( sqlFilename = "export/observationperiod/observedbyyear_data.sql", packageName = "Achilles", dbms = connectionDetails$dbms, results_database_schema = resultsDatabaseSchema ) - observedByYearData <- DatabaseConnector::querySql(conn,renderedSql) + observedByYearData <- DatabaseConnector::querySql(connection, renderedSql) observedByYearHist$DATA <- observedByYearData output$OBSERVED_BY_YEAR_HISTOGRAM = observedByYearHist - - observedByMonth <- {} + + observedByMonth <- { } renderedSql <- SqlRender::loadRenderTranslateSql( sqlFilename = "export/observationperiod/observedbymonth.sql", packageName = "Achilles", dbms = connectionDetails$dbms, results_database_schema = resultsDatabaseSchema ) - observedByMonth <- DatabaseConnector::querySql(conn,renderedSql) + observedByMonth <- DatabaseConnector::querySql(connection, renderedSql) output$OBSERVED_BY_MONTH = observedByMonth renderedSql <- SqlRender::loadRenderTranslateSql( @@ -428,17 +466,14 @@ generateAOObservationPeriodReport <- function(connectionDetails, cdmDatabaseSche dbms = connectionDetails$dbms, results_database_schema = resultsDatabaseSchema ) - personPeriodsData <- DatabaseConnector::querySql(conn,renderedSql) + personPeriodsData <- DatabaseConnector::querySql(connection, renderedSql) output$PERSON_PERIODS_DATA = personPeriodsData - - filename <- file.path(outputPath, "observationperiod.json") - write(jsonlite::toJSON(output),filename) + return(output) } generateAOVisitReports <- function(connectionDetails, cdmDatabaseSchema, resultsDatabaseSchema, vocabDatabaseSchema, outputPath) { - writeLines("Generating visit reports") - + queryVisits <- SqlRender::loadRenderTranslateSql( sqlFilename = "export/visit/sqlVisitTreemap.sql", packageName = "Achilles", @@ -446,7 +481,7 @@ generateAOVisitReports <- function(connectionDetails, cdmDatabaseSchema, results results_database_schema = resultsDatabaseSchema, vocab_database_schema = vocabDatabaseSchema ) - + queryPrevalenceByGenderAgeYear <- SqlRender::loadRenderTranslateSql( sqlFilename = "export/visit/sqlPrevalenceByGenderAgeYear.sql", packageName = "Achilles", @@ -454,7 +489,7 @@ generateAOVisitReports <- function(connectionDetails, cdmDatabaseSchema, results results_database_schema = resultsDatabaseSchema, vocab_database_schema = vocabDatabaseSchema ) - + queryPrevalenceByMonth <- SqlRender::loadRenderTranslateSql( sqlFilename = "export/visit/sqlPrevalenceByMonth.sql", packageName = "Achilles", @@ -462,7 +497,7 @@ generateAOVisitReports <- function(connectionDetails, cdmDatabaseSchema, results results_database_schema = resultsDatabaseSchema, vocab_database_schema = vocabDatabaseSchema ) - + queryVisitDurationByType <- SqlRender::loadRenderTranslateSql( sqlFilename = "export/visit/sqlVisitDurationByType.sql", packageName = "Achilles", @@ -470,7 +505,7 @@ generateAOVisitReports <- function(connectionDetails, cdmDatabaseSchema, results results_database_schema = resultsDatabaseSchema, vocab_database_schema = vocabDatabaseSchema ) - + queryAgeAtFirstOccurrence <- SqlRender::loadRenderTranslateSql( sqlFilename = "export/visit/sqlAgeAtFirstOccurrence.sql", packageName = "Achilles", @@ -478,14 +513,14 @@ generateAOVisitReports <- function(connectionDetails, cdmDatabaseSchema, results results_database_schema = resultsDatabaseSchema, vocab_database_schema = vocabDatabaseSchema ) - + conn <- DatabaseConnector::connect(connectionDetails) - dataVisits <- DatabaseConnector::querySql(conn,queryVisits) + dataVisits <- DatabaseConnector::querySql(conn, queryVisits) names(dataVisits)[names(dataVisits) == 'CONCEPT_PATH'] <- 'CONCEPT_NAME' - dataPrevalenceByGenderAgeYear <- DatabaseConnector::querySql(conn,queryPrevalenceByGenderAgeYear) - dataPrevalenceByMonth <- DatabaseConnector::querySql(conn,queryPrevalenceByMonth) - dataVisitDurationByType <- DatabaseConnector::querySql(conn,queryVisitDurationByType) - dataAgeAtFirstOccurrence <- DatabaseConnector::querySql(conn,queryAgeAtFirstOccurrence) + dataPrevalenceByGenderAgeYear <- DatabaseConnector::querySql(conn, queryPrevalenceByGenderAgeYear) + dataPrevalenceByMonth <- DatabaseConnector::querySql(conn, queryPrevalenceByMonth) + dataVisitDurationByType <- DatabaseConnector::querySql(conn, queryVisitDurationByType) + dataAgeAtFirstOccurrence <- DatabaseConnector::querySql(conn, queryAgeAtFirstOccurrence) if (nrow(dataVisits) == 0) { return() @@ -496,76 +531,51 @@ generateAOVisitReports <- function(connectionDetails, cdmDatabaseSchema, results ) reports <- uniqueConcepts %>% - dplyr::left_join( - ( - dataVisits %>% - dplyr::select("CONCEPT_ID", "CONCEPT_NAME", "NUM_PERSONS", "PERCENT_PERSONS", "RECORDS_PER_PERSON") - ), - by = c("CONCEPT_ID" = "CONCEPT_ID") - ) %>% - dplyr::left_join( - ( - dataPrevalenceByGenderAgeYear %>% - dplyr::select(c(1,3,4,5,6)) %>% - tidyr::nest(PREVALENCE_BY_GENDER_AGE_YEAR = c(-1)) - ), - by = c("CONCEPT_ID" = "CONCEPT_ID") - ) %>% - dplyr::left_join( - ( - dataPrevalenceByMonth %>% - dplyr::select(c(1,3,4)) %>% - tidyr::nest(PREVALENCE_BY_MONTH = c(-1)) - ), - by = c("CONCEPT_ID" = "CONCEPT_ID") - ) %>% - dplyr::left_join( - ( - dataVisitDurationByType %>% - dplyr::select(c(1,2,3,4,5,6,7,8,9)) %>% - tidyr::nest(VISIT_DURATION_BY_TYPE = c(-1)) - ), - by = c("CONCEPT_ID" = "CONCEPT_ID") - ) %>% - dplyr::left_join( - ( - dataAgeAtFirstOccurrence %>% - dplyr::select(c(1,2,3,4,5,6,7,8,9)) %>% - tidyr::nest(AGE_AT_FIRST_OCCURRENCE = c(-1)) - ), - by = c("CONCEPT_ID" = "CONCEPT_ID") - ) %>% - dplyr::collect() - - dir.create(paste0(outputPath,"/concepts/visit_occurrence"),recursive=T,showWarnings = F) - x <- lapply( - uniqueConcepts$CONCEPT_ID, - function(concept_id, outputPath, reports) { - report <- reports[reports$CONCEPT_ID == concept_id, ] - report <- as.list(report) - - report$CONCEPT_NAME <- normalizeEmptyValue(report$CONCEPT_NAME) - report$NUM_PERSONS <- normalizeEmptyValue(report$NUM_PERSONS) - report$PERCENT_PERSONS <- normalizeEmptyValue(report$PERCENT_PERSONS) - report$RECORDS_PER_PERSON <- normalizeEmptyValue(report$RECORDS_PER_PERSON) - - report$PREVALENCE_BY_GENDER_AGE_YEAR <- as.data.frame(report$PREVALENCE_BY_GENDER_AGE_YEAR) - report$PREVALENCE_BY_MONTH <- as.data.frame(report$PREVALENCE_BY_MONTH) - report$VISIT_DURATION_BY_TYPE <- as.data.frame(report$VISIT_DURATION_BY_TYPE) - report$AGE_AT_FIRST_OCCURRENCE <- as.data.frame(report$AGE_AT_FIRST_OCCURRENCE) - - filename <- paste(outputPath, "/concepts/visit_occurrence/concept_" , report$CONCEPT_ID , ".json", sep='') - write(jsonlite::toJSON(report), filename) - }, - outputPath, - reports - ) + dplyr::left_join( + ( + dataVisits %>% + dplyr::select("CONCEPT_ID", "CONCEPT_NAME", "NUM_PERSONS", "PERCENT_PERSONS", "RECORDS_PER_PERSON") + ), + by = c("CONCEPT_ID" = "CONCEPT_ID") + ) %>% + dplyr::left_join( + ( + dataPrevalenceByGenderAgeYear %>% + dplyr::select(c(1, 3, 4, 5, 6)) %>% + tidyr::nest(PREVALENCE_BY_GENDER_AGE_YEAR = c(-1)) + ), + by = c("CONCEPT_ID" = "CONCEPT_ID") + ) %>% + dplyr::left_join( + ( + dataPrevalenceByMonth %>% + dplyr::select(c(1, 3, 4)) %>% + tidyr::nest(PREVALENCE_BY_MONTH = c(-1)) + ), + by = c("CONCEPT_ID" = "CONCEPT_ID") + ) %>% + dplyr::left_join( + ( + dataVisitDurationByType %>% + dplyr::select(c(1, 2, 3, 4, 5, 6, 7, 8, 9)) %>% + tidyr::nest(VISIT_DURATION_BY_TYPE = c(-1)) + ), + by = c("CONCEPT_ID" = "CONCEPT_ID") + ) %>% + dplyr::left_join( + ( + dataAgeAtFirstOccurrence %>% + dplyr::select(c(1, 2, 3, 4, 5, 6, 7, 8, 9)) %>% + tidyr::nest(AGE_AT_FIRST_OCCURRENCE = c(-1)) + ), + by = c("CONCEPT_ID" = "CONCEPT_ID") + ) %>% + dplyr::collect() + return(list("reports" = reports, "uniqueConcepts" = uniqueConcepts)) } generateAOVisitDetailReports <- function(connectionDetails, cdmDatabaseSchema, resultsDatabaseSchema, vocabDatabaseSchema, outputPath) { - writeLines("Generating visit_detail reports") - queryVisitDetails <- SqlRender::loadRenderTranslateSql( sqlFilename = "export/visitdetail/sqlVisitDetailTreemap.sql", packageName = "Achilles", @@ -573,7 +583,7 @@ generateAOVisitDetailReports <- function(connectionDetails, cdmDatabaseSchema, r results_database_schema = resultsDatabaseSchema, vocab_database_schema = vocabDatabaseSchema ) - + queryPrevalenceByGenderAgeYear <- SqlRender::loadRenderTranslateSql( sqlFilename = "export/visitdetail/sqlPrevalenceByGenderAgeYear.sql", packageName = "Achilles", @@ -583,7 +593,7 @@ generateAOVisitDetailReports <- function(connectionDetails, cdmDatabaseSchema, r results_database_schema = resultsDatabaseSchema, vocab_database_schema = vocabDatabaseSchema ) - + queryPrevalenceByMonth <- SqlRender::loadRenderTranslateSql( sqlFilename = "export/visitdetail/sqlPrevalenceByMonth.sql", packageName = "Achilles", @@ -593,7 +603,7 @@ generateAOVisitDetailReports <- function(connectionDetails, cdmDatabaseSchema, r results_database_schema = resultsDatabaseSchema, vocab_database_schema = vocabDatabaseSchema ) - + queryVisitDetailDurationByType <- SqlRender::loadRenderTranslateSql( sqlFilename = "export/visitdetail/sqlVisitDetailDurationByType.sql", packageName = "Achilles", @@ -603,7 +613,7 @@ generateAOVisitDetailReports <- function(connectionDetails, cdmDatabaseSchema, r results_database_schema = resultsDatabaseSchema, vocab_database_schema = vocabDatabaseSchema ) - + queryAgeAtFirstOccurrence <- SqlRender::loadRenderTranslateSql( sqlFilename = "export/visitdetail/sqlAgeAtFirstOccurrence.sql", packageName = "Achilles", @@ -613,15 +623,15 @@ generateAOVisitDetailReports <- function(connectionDetails, cdmDatabaseSchema, r results_database_schema = resultsDatabaseSchema, vocab_database_schema = vocabDatabaseSchema ) - + conn <- DatabaseConnector::connect(connectionDetails) - on.exit(DatabaseConnector::disconnect(connection = conn)) - dataVisitDetails <- DatabaseConnector::querySql(conn,queryVisitDetails) + on.exit(DatabaseConnector::disconnect(connection = conn)) + dataVisitDetails <- DatabaseConnector::querySql(conn, queryVisitDetails) names(dataVisitDetails)[names(dataVisitDetails) == 'CONCEPT_PATH'] <- 'CONCEPT_NAME' - dataPrevalenceByGenderAgeYear <- DatabaseConnector::querySql(conn,queryPrevalenceByGenderAgeYear) - dataPrevalenceByMonth <- DatabaseConnector::querySql(conn,queryPrevalenceByMonth) - dataVisitDetailDurationByType <- DatabaseConnector::querySql(conn,queryVisitDetailDurationByType) - dataAgeAtFirstOccurrence <- DatabaseConnector::querySql(conn,queryAgeAtFirstOccurrence) + dataPrevalenceByGenderAgeYear <- DatabaseConnector::querySql(conn, queryPrevalenceByGenderAgeYear) + dataPrevalenceByMonth <- DatabaseConnector::querySql(conn, queryPrevalenceByMonth) + dataVisitDetailDurationByType <- DatabaseConnector::querySql(conn, queryVisitDetailDurationByType) + dataAgeAtFirstOccurrence <- DatabaseConnector::querySql(conn, queryAgeAtFirstOccurrence) if (nrow(dataVisitDetails) == 0) { return() @@ -632,94 +642,66 @@ generateAOVisitDetailReports <- function(connectionDetails, cdmDatabaseSchema, r ) reports <- uniqueConcepts %>% - dplyr::left_join( - ( - dataVisitDetails %>% - dplyr::select("CONCEPT_ID", "CONCEPT_NAME", "NUM_PERSONS", "PERCENT_PERSONS", "RECORDS_PER_PERSON") - ), - by = c("CONCEPT_ID" = "CONCEPT_ID") - ) %>% - dplyr::left_join( - ( - dataPrevalenceByGenderAgeYear %>% - dplyr::select(c(1,3,4,5,6)) %>% - tidyr::nest(PREVALENCE_BY_GENDER_AGE_YEAR = c(-1)) - ), - by = c("CONCEPT_ID" = "CONCEPT_ID") - ) %>% - dplyr::left_join( - ( - dataPrevalenceByMonth %>% - dplyr::select(c(1,3,4)) %>% - tidyr::nest(PREVALENCE_BY_MONTH = c(-1)) - ), - by = c("CONCEPT_ID" = "CONCEPT_ID") - ) %>% - dplyr::left_join( - ( - dataVisitDetailDurationByType %>% - dplyr::select(c(1,2,3,4,5,6,7,8,9)) %>% - tidyr::nest(VISIT_DETAIL_DURATION_BY_TYPE = c(-1)) - ), - by = c("CONCEPT_ID" = "CONCEPT_ID") - ) %>% - dplyr::left_join( - ( - dataAgeAtFirstOccurrence %>% - dplyr::select(c(1,2,3,4,5,6,7,8,9)) %>% - tidyr::nest(AGE_AT_FIRST_OCCURRENCE = c(-1)) - ), - by = c("CONCEPT_ID" = "CONCEPT_ID") - ) %>% - dplyr::collect() - - dir.create(paste0(outputPath,"/concepts/visit_detail"),recursive=T,showWarnings = F) - x <- lapply( - uniqueConcepts$CONCEPT_ID, - function(concept_id, outputPath, reports) { - report <- reports[reports$CONCEPT_ID == concept_id, ] - report <- as.list(report) - - report$CONCEPT_NAME <- normalizeEmptyValue(report$CONCEPT_NAME) - report$NUM_PERSONS <- normalizeEmptyValue(report$NUM_PERSONS) - report$PERCENT_PERSONS <- normalizeEmptyValue(report$PERCENT_PERSONS) - report$RECORDS_PER_PERSON <- normalizeEmptyValue(report$RECORDS_PER_PERSON) - - report$PREVALENCE_BY_GENDER_AGE_YEAR <- as.data.frame(report$PREVALENCE_BY_GENDER_AGE_YEAR) - report$PREVALENCE_BY_MONTH <- as.data.frame(report$PREVALENCE_BY_MONTH) - report$VISIT_DETAIL_DURATION_BY_TYPE <- as.data.frame(report$VISIT_DETAIL_DURATION_BY_TYPE) - report$AGE_AT_FIRST_OCCURRENCE <- as.data.frame(report$AGE_AT_FIRST_OCCURRENCE) - - filename <- paste(outputPath, "/concepts/visit_detail/concept_" , report$CONCEPT_ID , ".json", sep='') - write(jsonlite::toJSON(report), filename) - }, - outputPath, - reports - ) + dplyr::left_join( + ( + dataVisitDetails %>% + dplyr::select("CONCEPT_ID", "CONCEPT_NAME", "NUM_PERSONS", "PERCENT_PERSONS", "RECORDS_PER_PERSON") + ), + by = c("CONCEPT_ID" = "CONCEPT_ID") + ) %>% + dplyr::left_join( + ( + dataPrevalenceByGenderAgeYear %>% + dplyr::select(c(1, 3, 4, 5, 6)) %>% + tidyr::nest(PREVALENCE_BY_GENDER_AGE_YEAR = c(-1)) + ), + by = c("CONCEPT_ID" = "CONCEPT_ID") + ) %>% + dplyr::left_join( + ( + dataPrevalenceByMonth %>% + dplyr::select(c(1, 3, 4)) %>% + tidyr::nest(PREVALENCE_BY_MONTH = c(-1)) + ), + by = c("CONCEPT_ID" = "CONCEPT_ID") + ) %>% + dplyr::left_join( + ( + dataVisitDetailDurationByType %>% + dplyr::select(c(1, 2, 3, 4, 5, 6, 7, 8, 9)) %>% + tidyr::nest(VISIT_DETAIL_DURATION_BY_TYPE = c(-1)) + ), + by = c("CONCEPT_ID" = "CONCEPT_ID") + ) %>% + dplyr::left_join( + ( + dataAgeAtFirstOccurrence %>% + dplyr::select(c(1, 2, 3, 4, 5, 6, 7, 8, 9)) %>% + tidyr::nest(AGE_AT_FIRST_OCCURRENCE = c(-1)) + ), + by = c("CONCEPT_ID" = "CONCEPT_ID") + ) %>% + dplyr::collect() + return(list("reports" = reports, "uniqueConcepts" = uniqueConcepts)) } -generateAOMetadataReport <- function(connectionDetails, cdmDatabaseSchema, outputPath) +generateAOMetadataReport <- function(connection, cdmDatabaseSchema, outputPath) { - conn <- DatabaseConnector::connect(connectionDetails) - on.exit(DatabaseConnector::disconnect(connection = conn)) - if (DatabaseConnector::existsTable(connection = conn, databaseSchema = cdmDatabaseSchema, tableName = "METADATA")) + if (DatabaseConnector::existsTable(connection = connection, databaseSchema = cdmDatabaseSchema, tableName = "METADATA")) { - writeLines("Generating metadata report") queryMetadata <- SqlRender::loadRenderTranslateSql( sqlFilename = "export/metadata/sqlMetadata.sql", packageName = "Achilles", dbms = connectionDetails$dbms, cdm_database_schema = cdmDatabaseSchema - ) - dataMetadata <- DatabaseConnector::querySql(conn, queryMetadata) - data.table::fwrite(dataMetadata, file=paste0(outputPath, "/metadata.csv")) + ) + dataMetadata <- DatabaseConnector::querySql(connection, queryMetadata) + return(dataMetadata) } } generateAOObservationReports <- function(connectionDetails, observationsData, cdmDatabaseSchema, resultsDatabaseSchema, vocabDatabaseSchema, outputPath) { - writeLines("Generating Observation reports") - queryPrevalenceByGenderAgeYear <- SqlRender::loadRenderTranslateSql( sqlFilename = "export/observation/sqlPrevalenceByGenderAgeYear.sql", packageName = "Achilles", @@ -727,7 +709,7 @@ generateAOObservationReports <- function(connectionDetails, observationsData, cd results_database_schema = resultsDatabaseSchema, vocab_database_schema = vocabDatabaseSchema ) - + queryPrevalenceByMonth <- SqlRender::loadRenderTranslateSql( sqlFilename = "export/observation/sqlPrevalenceByMonth.sql", packageName = "Achilles", @@ -735,15 +717,15 @@ generateAOObservationReports <- function(connectionDetails, observationsData, cd results_database_schema = resultsDatabaseSchema, vocab_database_schema = vocabDatabaseSchema ) - + queryObsFrequencyDistribution <- SqlRender::loadRenderTranslateSql( - sqlFilename = "export/observation/sqlFrequencyDistribution.sql", + sqlFilename = "export/observation/sqlFrequencyDistribution.sql", packageName = "Achilles", dbms = connectionDetails$dbms, results_database_schema = resultsDatabaseSchema, vocab_database_schema = vocabDatabaseSchema ) - + queryObservationsByType <- SqlRender::loadRenderTranslateSql( sqlFilename = "export/observation/sqlObservationsByType.sql", packageName = "Achilles", @@ -751,7 +733,7 @@ generateAOObservationReports <- function(connectionDetails, observationsData, cd results_database_schema = resultsDatabaseSchema, vocab_database_schema = vocabDatabaseSchema ) - + queryAgeAtFirstOccurrence <- SqlRender::loadRenderTranslateSql( sqlFilename = "export/observation/sqlAgeAtFirstOccurrence.sql", packageName = "Achilles", @@ -759,14 +741,14 @@ generateAOObservationReports <- function(connectionDetails, observationsData, cd results_database_schema = resultsDatabaseSchema, vocab_database_schema = vocabDatabaseSchema ) - - conn <- DatabaseConnector::connect(connectionDetails) - on.exit(DatabaseConnector::disconnect(connection = conn)) - dataPrevalenceByGenderAgeYear <- DatabaseConnector::querySql(conn,queryPrevalenceByGenderAgeYear) - dataPrevalenceByMonth <- DatabaseConnector::querySql(conn,queryPrevalenceByMonth) - dataObservationsByType <- DatabaseConnector::querySql(conn,queryObservationsByType) - dataAgeAtFirstOccurrence <- DatabaseConnector::querySql(conn,queryAgeAtFirstOccurrence) - dataObsFrequencyDistribution <- DatabaseConnector::querySql(conn,queryObsFrequencyDistribution) + + conn <- DatabaseConnector::connect(connectionDetails) + on.exit(DatabaseConnector::disconnect(connection = conn)) + dataPrevalenceByGenderAgeYear <- DatabaseConnector::querySql(conn, queryPrevalenceByGenderAgeYear) + dataPrevalenceByMonth <- DatabaseConnector::querySql(conn, queryPrevalenceByMonth) + dataObservationsByType <- DatabaseConnector::querySql(conn, queryObservationsByType) + dataAgeAtFirstOccurrence <- DatabaseConnector::querySql(conn, queryAgeAtFirstOccurrence) + dataObsFrequencyDistribution <- DatabaseConnector::querySql(conn, queryObsFrequencyDistribution) if (nrow(observationsData) == 0) { return() @@ -777,112 +759,85 @@ generateAOObservationReports <- function(connectionDetails, observationsData, cd ) reports <- uniqueConcepts %>% - dplyr::left_join( - observationsData, - by = c("CONCEPT_ID" = "CONCEPT_ID") - ) %>% - dplyr::select("CONCEPT_ID", "CONCEPT_NAME", "CDM_TABLE_NAME", "NUM_PERSONS", "PERCENT_PERSONS", "RECORDS_PER_PERSON") %>% - dplyr::left_join( - ( - dataPrevalenceByGenderAgeYear %>% - dplyr::select(c(1,3,4,5,6)) %>% - tidyr::nest(PREVALENCE_BY_GENDER_AGE_YEAR = c(-1)) - ), - by = c("CONCEPT_ID" = "CONCEPT_ID") - ) %>% - dplyr::left_join( - ( - dataPrevalenceByMonth %>% - dplyr::select(c(1,3,4)) %>% - tidyr::nest(PREVALENCE_BY_MONTH = c(-1)) - ), - by = c("CONCEPT_ID" = "CONCEPT_ID") - ) %>% - dplyr::left_join( - ( - dataObsFrequencyDistribution %>% - dplyr::select(c(1,3,4)) %>% - tidyr::nest(OBS_FREQUENCY_DISTRIBUTION = c(-1)) - ), - by = c("CONCEPT_ID" = "CONCEPT_ID") - ) %>% - dplyr::left_join( - ( - dataObservationsByType %>% - dplyr::select(c(1,4,5)) %>% - tidyr::nest(OBSERVATIONS_BY_TYPE = c(-1)) - ), - by = c("CONCEPT_ID" = "OBSERVATION_CONCEPT_ID") - ) %>% - dplyr::left_join( - ( - dataAgeAtFirstOccurrence %>% - dplyr::select(c(1,2,3,4,5,6,7,8,9)) %>% - tidyr::nest(AGE_AT_FIRST_OCCURRENCE = c(-1)) - ), - by = c("CONCEPT_ID" = "CONCEPT_ID") - ) %>% - dplyr::collect() - - dir.create(paste0(outputPath,"/concepts/observation"),recursive=T,showWarnings = F) - x <- lapply( - uniqueConcepts$CONCEPT_ID, - function(concept_id, outputPath, reports) { - report <- reports[reports$CONCEPT_ID == concept_id, ] - report <- as.list(report) - - report$CONCEPT_NAME <- normalizeEmptyValue(report$CONCEPT_NAME) - report$NUM_PERSONS <- normalizeEmptyValue(report$NUM_PERSONS) - report$PERCENT_PERSONS <- normalizeEmptyValue(report$PERCENT_PERSONS) - report$RECORDS_PER_PERSON <- normalizeEmptyValue(report$RECORDS_PER_PERSON) - - report$PREVALENCE_BY_GENDER_AGE_YEAR <- as.data.frame(report$PREVALENCE_BY_GENDER_AGE_YEAR) - report$PREVALENCE_BY_MONTH <- as.data.frame(report$PREVALENCE_BY_MONTH) - report$OBS_FREQUENCY_DISTRIBUTION <- as.data.frame(report$OBS_FREQUENCY_DISTRIBUTION) - report$OBSERVATIONS_BY_TYPE <- as.data.frame(report$OBSERVATIONS_BY_TYPE) - report$AGE_AT_FIRST_OCCURRENCE <- as.data.frame(report$AGE_AT_FIRST_OCCURRENCE) - - filename <- paste(outputPath, "/concepts/observation/concept_" , report$CONCEPT_ID , ".json", sep='') - write(jsonlite::toJSON(report), filename) - }, - outputPath, - reports - ) + dplyr::left_join( + observationsData, + by = c("CONCEPT_ID" = "CONCEPT_ID") + ) %>% + dplyr::select("CONCEPT_ID", "CONCEPT_NAME", "CDM_TABLE_NAME", "NUM_PERSONS", "PERCENT_PERSONS", "RECORDS_PER_PERSON") %>% + dplyr::left_join( + ( + dataPrevalenceByGenderAgeYear %>% + dplyr::select(c(1, 3, 4, 5, 6)) %>% + tidyr::nest(PREVALENCE_BY_GENDER_AGE_YEAR = c(-1)) + ), + by = c("CONCEPT_ID" = "CONCEPT_ID") + ) %>% + dplyr::left_join( + ( + dataPrevalenceByMonth %>% + dplyr::select(c(1, 3, 4)) %>% + tidyr::nest(PREVALENCE_BY_MONTH = c(-1)) + ), + by = c("CONCEPT_ID" = "CONCEPT_ID") + ) %>% + dplyr::left_join( + ( + dataObsFrequencyDistribution %>% + dplyr::select(c(1, 3, 4)) %>% + tidyr::nest(OBS_FREQUENCY_DISTRIBUTION = c(-1)) + ), + by = c("CONCEPT_ID" = "CONCEPT_ID") + ) %>% + dplyr::left_join( + ( + dataObservationsByType %>% + dplyr::select(c(1, 4, 5)) %>% + tidyr::nest(OBSERVATIONS_BY_TYPE = c(-1)) + ), + by = c("CONCEPT_ID" = "OBSERVATION_CONCEPT_ID") + ) %>% + dplyr::left_join( + ( + dataAgeAtFirstOccurrence %>% + dplyr::select(c(1, 2, 3, 4, 5, 6, 7, 8, 9)) %>% + tidyr::nest(AGE_AT_FIRST_OCCURRENCE = c(-1)) + ), + by = c("CONCEPT_ID" = "CONCEPT_ID") + ) %>% + dplyr::collect() + return(list("reports" = reports, "uniqueConcepts" = uniqueConcepts)) } -generateAOCdmSourceReport <- function(connectionDetails, cdmDatabaseSchema, outputPath) +generateAOCdmSourceReport <- function(connection, cdmDatabaseSchema, outputPath) { - conn <- DatabaseConnector::connect(connectionDetails) - on.exit(DatabaseConnector::disconnect(connection = conn)) - if (DatabaseConnector::existsTable(connection = conn, databaseSchema = cdmDatabaseSchema, tableName = "CDM_SOURCE")) + if (DatabaseConnector::existsTable(connection = connection, databaseSchema = cdmDatabaseSchema, tableName = "CDM_SOURCE")) { - writeLines("Generating cdm source report") queryCdmSource <- SqlRender::loadRenderTranslateSql( sqlFilename = "export/metadata/sqlCdmSource.sql", packageName = "Achilles", dbms = connectionDetails$dbms, cdm_database_schema = cdmDatabaseSchema - ) - - dataCdmSource <- DatabaseConnector::querySql(conn, queryCdmSource) - data.table::fwrite(dataCdmSource, file=paste0(outputPath, "/cdmsource.csv")) + ) + + dataCdmSource <- DatabaseConnector::querySql(connection, queryCdmSource) + return(dataCdmSource) } } generateAODashboardReport <- function(outputPath) { - output <- {} - personReport <- jsonlite::fromJSON(file = paste(outputPath, "/person.json", sep="")) + output <- { } + personReport <- jsonlite::fromJSON(file = paste(outputPath, "/person.json", sep = "")) output$SUMMARY <- personReport$SUMMARY output$GENDER_DATA <- personReport$GENDER_DATA - opReport <- jsonlite::fromJSON(file = paste(outputPath, "/observationperiod.json", sep="")) - + opReport <- jsonlite::fromJSON(file = paste(outputPath, "/observationperiod.json", sep = "")) + output$AGE_AT_FIRST_OBSERVATION_HISTOGRAM <- opReport$AGE_AT_FIRST_OBSERVATION_HISTOGRAM output$CUMULATIVE_DURATION <- opReport$CUMULATIVE_DURATION output$OBSERVED_BY_MONTH <- opReport$OBSERVED_BY_MONTH jsonOutput <- jsonlite::toJSON(output) - write(jsonOutput, file=paste(outputPath, "/dashboard.json", sep="")) + write(jsonOutput, file = paste(outputPath, "/dashboard.json", sep = "")) } generateAOMeasurementReports <- function(connectionDetails, dataMeasurements, cdmDatabaseSchema, resultsDatabaseSchema, vocabDatabaseSchema, outputPath) @@ -895,7 +850,7 @@ generateAOMeasurementReports <- function(connectionDetails, dataMeasurements, cd results_database_schema = resultsDatabaseSchema, vocab_database_schema = vocabDatabaseSchema ) - + queryPrevalenceByMonth <- SqlRender::loadRenderTranslateSql( sqlFilename = "export/measurement/sqlPrevalenceByMonth.sql", packageName = "Achilles", @@ -903,15 +858,15 @@ generateAOMeasurementReports <- function(connectionDetails, dataMeasurements, cd results_database_schema = resultsDatabaseSchema, vocab_database_schema = vocabDatabaseSchema ) - + queryFrequencyDistribution <- SqlRender::loadRenderTranslateSql( - sqlFilename = "export/measurement/sqlFrequencyDistribution.sql", + sqlFilename = "export/measurement/sqlFrequencyDistribution.sql", packageName = "Achilles", dbms = connectionDetails$dbms, results_database_schema = resultsDatabaseSchema, vocab_database_schema = vocabDatabaseSchema ) - + queryMeasurementsByType <- SqlRender::loadRenderTranslateSql( sqlFilename = "export/measurement/sqlMeasurementsByType.sql", packageName = "Achilles", @@ -919,7 +874,7 @@ generateAOMeasurementReports <- function(connectionDetails, dataMeasurements, cd results_database_schema = resultsDatabaseSchema, vocab_database_schema = vocabDatabaseSchema ) - + queryAgeAtFirstOccurrence <- SqlRender::loadRenderTranslateSql( sqlFilename = "export/measurement/sqlAgeAtFirstOccurrence.sql", packageName = "Achilles", @@ -927,7 +882,7 @@ generateAOMeasurementReports <- function(connectionDetails, dataMeasurements, cd results_database_schema = resultsDatabaseSchema, vocab_database_schema = vocabDatabaseSchema ) - + queryRecordsByUnit <- SqlRender::loadRenderTranslateSql( sqlFilename = "export/measurement/sqlRecordsByUnit.sql", packageName = "Achilles", @@ -935,7 +890,7 @@ generateAOMeasurementReports <- function(connectionDetails, dataMeasurements, cd results_database_schema = resultsDatabaseSchema, vocab_database_schema = vocabDatabaseSchema ) - + queryMeasurementValueDistribution <- SqlRender::loadRenderTranslateSql( sqlFilename = "export/measurement/sqlMeasurementValueDistribution.sql", packageName = "Achilles", @@ -943,7 +898,7 @@ generateAOMeasurementReports <- function(connectionDetails, dataMeasurements, cd results_database_schema = resultsDatabaseSchema, vocab_database_schema = vocabDatabaseSchema ) - + queryLowerLimitDistribution <- SqlRender::loadRenderTranslateSql( sqlFilename = "export/measurement/sqlLowerLimitDistribution.sql", packageName = "Achilles", @@ -951,7 +906,7 @@ generateAOMeasurementReports <- function(connectionDetails, dataMeasurements, cd results_database_schema = resultsDatabaseSchema, vocab_database_schema = vocabDatabaseSchema ) - + queryUpperLimitDistribution <- SqlRender::loadRenderTranslateSql( sqlFilename = "export/measurement/sqlUpperLimitDistribution.sql", packageName = "Achilles", @@ -959,7 +914,7 @@ generateAOMeasurementReports <- function(connectionDetails, dataMeasurements, cd results_database_schema = resultsDatabaseSchema, vocab_database_schema = vocabDatabaseSchema ) - + queryValuesRelativeToNorm <- SqlRender::loadRenderTranslateSql( sqlFilename = "export/measurement/sqlValuesRelativeToNorm.sql", packageName = "Achilles", @@ -967,19 +922,19 @@ generateAOMeasurementReports <- function(connectionDetails, dataMeasurements, cd results_database_schema = resultsDatabaseSchema, vocab_database_schema = vocabDatabaseSchema ) - + conn <- DatabaseConnector::connect(connectionDetails) - on.exit(DatabaseConnector::disconnect(connection = conn)) - dataPrevalenceByGenderAgeYear <- DatabaseConnector::querySql(conn,queryPrevalenceByGenderAgeYear) - dataPrevalenceByMonth <- DatabaseConnector::querySql(conn,queryPrevalenceByMonth) - dataMeasurementsByType <- DatabaseConnector::querySql(conn,queryMeasurementsByType) - dataAgeAtFirstOccurrence <- DatabaseConnector::querySql(conn,queryAgeAtFirstOccurrence) - dataRecordsByUnit <- DatabaseConnector::querySql(conn,queryRecordsByUnit) - dataMeasurementValueDistribution <- DatabaseConnector::querySql(conn,queryMeasurementValueDistribution) - dataLowerLimitDistribution <- DatabaseConnector::querySql(conn,queryLowerLimitDistribution) - dataUpperLimitDistribution <- DatabaseConnector::querySql(conn,queryUpperLimitDistribution) - dataValuesRelativeToNorm <- DatabaseConnector::querySql(conn,queryValuesRelativeToNorm) - dataFrequencyDistribution <- DatabaseConnector::querySql(conn,queryFrequencyDistribution) + on.exit(DatabaseConnector::disconnect(connection = conn)) + dataPrevalenceByGenderAgeYear <- DatabaseConnector::querySql(conn, queryPrevalenceByGenderAgeYear) + dataPrevalenceByMonth <- DatabaseConnector::querySql(conn, queryPrevalenceByMonth) + dataMeasurementsByType <- DatabaseConnector::querySql(conn, queryMeasurementsByType) + dataAgeAtFirstOccurrence <- DatabaseConnector::querySql(conn, queryAgeAtFirstOccurrence) + dataRecordsByUnit <- DatabaseConnector::querySql(conn, queryRecordsByUnit) + dataMeasurementValueDistribution <- DatabaseConnector::querySql(conn, queryMeasurementValueDistribution) + dataLowerLimitDistribution <- DatabaseConnector::querySql(conn, queryLowerLimitDistribution) + dataUpperLimitDistribution <- DatabaseConnector::querySql(conn, queryUpperLimitDistribution) + dataValuesRelativeToNorm <- DatabaseConnector::querySql(conn, queryValuesRelativeToNorm) + dataFrequencyDistribution <- DatabaseConnector::querySql(conn, queryFrequencyDistribution) if (nrow(dataPrevalenceByMonth) == 0) { return() @@ -990,129 +945,99 @@ generateAOMeasurementReports <- function(connectionDetails, dataMeasurements, cd ) reports <- uniqueConcepts %>% - dplyr::left_join( - ( - dataMeasurements %>% - dplyr::select("CONCEPT_ID", "CONCEPT_NAME", "NUM_PERSONS", "PERCENT_PERSONS", "RECORDS_PER_PERSON") - ), - by = c("CONCEPT_ID" = "CONCEPT_ID") - ) %>% - dplyr::left_join( - ( - dataPrevalenceByGenderAgeYear %>% - dplyr::select(c(1,3,4,5,6)) %>% - tidyr::nest(PREVALENCE_BY_GENDER_AGE_YEAR = c(-1)) - ), - by = c("CONCEPT_ID" = "CONCEPT_ID") - ) %>% - dplyr::left_join( - ( - dataPrevalenceByMonth %>% - dplyr::select(c(1,3,4)) %>% - tidyr::nest(PREVALENCE_BY_MONTH = c(-1)) - ), - by = c("CONCEPT_ID" = "CONCEPT_ID") - ) %>% - dplyr::left_join( - ( - dataFrequencyDistribution %>% - dplyr::select(c(1,3,4)) %>% - tidyr::nest(FREQUENCY_DISTRIBUTION = c(-1)) - ), - by = c("CONCEPT_ID" = "CONCEPT_ID") - ) %>% - dplyr::left_join( - ( - dataMeasurementsByType %>% - dplyr::select(c(1,4,5)) %>% - tidyr::nest(MEASUREMENTS_BY_TYPE = c(-1)) - ), - by = c("CONCEPT_ID" = "MEASUREMENT_CONCEPT_ID") - ) %>% - dplyr::left_join( - ( - dataAgeAtFirstOccurrence %>% - dplyr::select(c(1,2,3,4,5,6,7,8,9)) %>% - tidyr::nest(AGE_AT_FIRST_OCCURRENCE = c(-1)) - ), - by = c("CONCEPT_ID" = "CONCEPT_ID") - ) %>% - dplyr::left_join( - ( - dataRecordsByUnit %>% - dplyr::select(c(1,4,5)) %>% - tidyr::nest(RECORDS_BY_UNIT = c(-1)) - ), - by = c("CONCEPT_ID" = "MEASUREMENT_CONCEPT_ID") - ) %>% - dplyr::left_join( - ( - dataMeasurementValueDistribution %>% - dplyr::select(c(1,2,3,4,5,6,7,8,9)) %>% - tidyr::nest(MEASUREMENT_VALUE_DISTRIBUTION = c(-1)) - ), - by = c("CONCEPT_ID" = "CONCEPT_ID") - ) %>% - dplyr::left_join( - ( - dataLowerLimitDistribution %>% - dplyr::select(c(1,2,3,4,5,6,7,8,9)) %>% - tidyr::nest(LOWER_LIMIT_DISTRIBUTION = c(-1)) - ), - by = c("CONCEPT_ID" = "CONCEPT_ID") - ) %>% - dplyr::left_join( - ( - dataUpperLimitDistribution %>% - dplyr::select(c(1,2,3,4,5,6,7,8,9)) %>% - tidyr::nest(UPPER_LIMIT_DISTRIBUTION = c(-1)) - ), - by = c("CONCEPT_ID" = "CONCEPT_ID") - ) %>% - dplyr::left_join( - ( - dataValuesRelativeToNorm %>% - dplyr::select(c(1,4,5)) %>% - tidyr::nest(VALUES_RELATIVE_TO_NORM = c(-1)) - ), - by = c("CONCEPT_ID" = "MEASUREMENT_CONCEPT_ID") - ) %>% - dplyr::collect() - - dir.create(paste0(outputPath,"/concepts/measurement"),recursive=T,showWarnings = F) - x <- lapply( - uniqueConcepts$CONCEPT_ID, - function(concept_id, outputPath, reports) { - report <- reports[reports$CONCEPT_ID == concept_id, ] - report <- as.list(report) - - report$CONCEPT_NAME <- normalizeEmptyValue(report$CONCEPT_NAME) - report$NUM_PERSONS <- normalizeEmptyValue(report$NUM_PERSONS) - report$PERCENT_PERSONS <- normalizeEmptyValue(report$PERCENT_PERSONS) - report$RECORDS_PER_PERSON <- normalizeEmptyValue(report$RECORDS_PER_PERSON) - - report$PREVALENCE_BY_GENDER_AGE_YEAR <- as.data.frame(report$PREVALENCE_BY_GENDER_AGE_YEAR) - report$PREVALENCE_BY_MONTH <- as.data.frame(report$PREVALENCE_BY_MONTH) - report$FREQUENCY_DISTRIBUTION <- as.data.frame(report$FREQUENCY_DISTRIBUTION) - report$MEASUREMENTS_BY_TYPE <- as.data.frame(report$MEASUREMENTS_BY_TYPE) - report$AGE_AT_FIRST_OCCURRENCE <- as.data.frame(report$AGE_AT_FIRST_OCCURRENCE) - report$RECORDS_BY_UNIT <- as.data.frame(report$RECORDS_BY_UNIT) - report$MEASUREMENT_VALUE_DISTRIBUTION <- as.data.frame(report$MEASUREMENT_VALUE_DISTRIBUTION) - report$LOWER_LIMIT_DISTRIBUTION <- as.data.frame(report$LOWER_LIMIT_DISTRIBUTION) - report$UPPER_LIMIT_DISTRIBUTION <- as.data.frame(report$UPPER_LIMIT_DISTRIBUTION) - report$VALUES_RELATIVE_TO_NORM <- as.data.frame(report$VALUES_RELATIVE_TO_NORM) - - filename <- paste(outputPath, "/concepts/measurement/concept_" , report$CONCEPT_ID , ".json", sep='') - write(jsonlite::toJSON(report), filename) - }, - outputPath, - reports - ) + dplyr::left_join( + ( + dataMeasurements %>% + dplyr::select("CONCEPT_ID", "CONCEPT_NAME", "NUM_PERSONS", "PERCENT_PERSONS", "RECORDS_PER_PERSON") + ), + by = c("CONCEPT_ID" = "CONCEPT_ID") + ) %>% + dplyr::left_join( + ( + dataPrevalenceByGenderAgeYear %>% + dplyr::select(c(1, 3, 4, 5, 6)) %>% + tidyr::nest(PREVALENCE_BY_GENDER_AGE_YEAR = c(-1)) + ), + by = c("CONCEPT_ID" = "CONCEPT_ID") + ) %>% + dplyr::left_join( + ( + dataPrevalenceByMonth %>% + dplyr::select(c(1, 3, 4)) %>% + tidyr::nest(PREVALENCE_BY_MONTH = c(-1)) + ), + by = c("CONCEPT_ID" = "CONCEPT_ID") + ) %>% + dplyr::left_join( + ( + dataFrequencyDistribution %>% + dplyr::select(c(1, 3, 4)) %>% + tidyr::nest(FREQUENCY_DISTRIBUTION = c(-1)) + ), + by = c("CONCEPT_ID" = "CONCEPT_ID") + ) %>% + dplyr::left_join( + ( + dataMeasurementsByType %>% + dplyr::select(c(1, 4, 5)) %>% + tidyr::nest(MEASUREMENTS_BY_TYPE = c(-1)) + ), + by = c("CONCEPT_ID" = "MEASUREMENT_CONCEPT_ID") + ) %>% + dplyr::left_join( + ( + dataAgeAtFirstOccurrence %>% + dplyr::select(c(1, 2, 3, 4, 5, 6, 7, 8, 9)) %>% + tidyr::nest(AGE_AT_FIRST_OCCURRENCE = c(-1)) + ), + by = c("CONCEPT_ID" = "CONCEPT_ID") + ) %>% + dplyr::left_join( + ( + dataRecordsByUnit %>% + dplyr::select(c(1, 4, 5)) %>% + tidyr::nest(RECORDS_BY_UNIT = c(-1)) + ), + by = c("CONCEPT_ID" = "MEASUREMENT_CONCEPT_ID") + ) %>% + dplyr::left_join( + ( + dataMeasurementValueDistribution %>% + dplyr::select(c(1, 2, 3, 4, 5, 6, 7, 8, 9)) %>% + tidyr::nest(MEASUREMENT_VALUE_DISTRIBUTION = c(-1)) + ), + by = c("CONCEPT_ID" = "CONCEPT_ID") + ) %>% + dplyr::left_join( + ( + dataLowerLimitDistribution %>% + dplyr::select(c(1, 2, 3, 4, 5, 6, 7, 8, 9)) %>% + tidyr::nest(LOWER_LIMIT_DISTRIBUTION = c(-1)) + ), + by = c("CONCEPT_ID" = "CONCEPT_ID") + ) %>% + dplyr::left_join( + ( + dataUpperLimitDistribution %>% + dplyr::select(c(1, 2, 3, 4, 5, 6, 7, 8, 9)) %>% + tidyr::nest(UPPER_LIMIT_DISTRIBUTION = c(-1)) + ), + by = c("CONCEPT_ID" = "CONCEPT_ID") + ) %>% + dplyr::left_join( + ( + dataValuesRelativeToNorm %>% + dplyr::select(c(1, 4, 5)) %>% + tidyr::nest(VALUES_RELATIVE_TO_NORM = c(-1)) + ), + by = c("CONCEPT_ID" = "MEASUREMENT_CONCEPT_ID") + ) %>% + dplyr::collect() + return(list("reports" = reports, "uniqueConcepts" = uniqueConcepts)) } -generateAODrugEraReports <- function(connectionDetails, dataDrugEra, cdmDatabaseSchema, resultsDatabaseSchema, vocabDatabaseSchema, outputPath) +generateAODrugEraReports <- function(connectionDetails, dataDrugEra, cdmDatabaseSchema, resultsDatabaseSchema, vocabDatabaseSchema, outputPath) { - writeLines("Generating drug era reports") queryAgeAtFirstExposure <- SqlRender::loadRenderTranslateSql( sqlFilename = "export/drugera/sqlAgeAtFirstExposure.sql", @@ -1121,7 +1046,7 @@ generateAODrugEraReports <- function(connectionDetails, dataDrugEra, cdmDatabase results_database_schema = resultsDatabaseSchema, vocab_database_schema = vocabDatabaseSchema ) - + queryPrevalenceByGenderAgeYear <- SqlRender::loadRenderTranslateSql( sqlFilename = "export/drugera/sqlPrevalenceByGenderAgeYear.sql", packageName = "Achilles", @@ -1129,7 +1054,7 @@ generateAODrugEraReports <- function(connectionDetails, dataDrugEra, cdmDatabase results_database_schema = resultsDatabaseSchema, vocab_database_schema = vocabDatabaseSchema ) - + queryPrevalenceByMonth <- SqlRender::loadRenderTranslateSql( sqlFilename = "export/drugera/sqlPrevalenceByMonth.sql", packageName = "Achilles", @@ -1137,7 +1062,7 @@ generateAODrugEraReports <- function(connectionDetails, dataDrugEra, cdmDatabase results_database_schema = resultsDatabaseSchema, vocab_database_schema = vocabDatabaseSchema ) - + queryLengthOfEra <- SqlRender::loadRenderTranslateSql( sqlFilename = "export/drugera/sqlLengthOfEra.sql", packageName = "Achilles", @@ -1146,12 +1071,12 @@ generateAODrugEraReports <- function(connectionDetails, dataDrugEra, cdmDatabase vocab_database_schema = vocabDatabaseSchema ) - conn <- DatabaseConnector::connect(connectionDetails) - on.exit(DatabaseConnector::disconnect(connection = conn)) - dataAgeAtFirstExposure <- DatabaseConnector::querySql(conn,queryAgeAtFirstExposure) - dataPrevalenceByGenderAgeYear <- DatabaseConnector::querySql(conn,queryPrevalenceByGenderAgeYear) - dataPrevalenceByMonth <- DatabaseConnector::querySql(conn,queryPrevalenceByMonth) - dataLengthOfEra <- DatabaseConnector::querySql(conn,queryLengthOfEra) + conn <- DatabaseConnector::connect(connectionDetails) + on.exit(DatabaseConnector::disconnect(connection = conn)) + dataAgeAtFirstExposure <- DatabaseConnector::querySql(conn, queryAgeAtFirstExposure) + dataPrevalenceByGenderAgeYear <- DatabaseConnector::querySql(conn, queryPrevalenceByGenderAgeYear) + dataPrevalenceByMonth <- DatabaseConnector::querySql(conn, queryPrevalenceByMonth) + dataLengthOfEra <- DatabaseConnector::querySql(conn, queryLengthOfEra) if (nrow(dataDrugEra) == 0) { return() @@ -1162,76 +1087,52 @@ generateAODrugEraReports <- function(connectionDetails, dataDrugEra, cdmDatabase ) reports <- uniqueConcepts %>% - dplyr::left_join( - ( - dataDrugEra %>% - dplyr::select("CONCEPT_ID", "CONCEPT_NAME", "NUM_PERSONS", "PERCENT_PERSONS", "RECORDS_PER_PERSON") - ), - by = c("CONCEPT_ID" = "CONCEPT_ID") - ) %>% - dplyr::left_join( - ( - dataAgeAtFirstExposure %>% - dplyr::select(c(1,2,3,4,5,6,7,8,9)) %>% - tidyr::nest(AGE_AT_FIRST_EXPOSURE = c(-1)) - ), - by = c("CONCEPT_ID" = "CONCEPT_ID") - ) %>% - dplyr::left_join( - ( - dataPrevalenceByGenderAgeYear %>% - dplyr::select(c(1,2,3,4,5)) %>% - tidyr::nest(PREVALENCE_BY_GENDER_AGE_YEAR = c(-1)) - ), - by = c("CONCEPT_ID" = "CONCEPT_ID") - ) %>% - dplyr::left_join( - ( - dataPrevalenceByMonth %>% - dplyr::select(c(1,2,3)) %>% - tidyr::nest(PREVALENCE_BY_MONTH = c(-1)) - ), - by = c("CONCEPT_ID" = "CONCEPT_ID") - ) %>% - dplyr::left_join( - ( - dataLengthOfEra %>% - dplyr::select(c(1,2,3,4,5,6,7,8,9)) %>% - tidyr::nest(LENGTH_OF_ERA = c(-1)) - ), - by = c("CONCEPT_ID" = "CONCEPT_ID") - ) %>% - dplyr::collect() - - dir.create(paste0(outputPath,"/concepts/drug_era"),recursive=T,showWarnings = F) - x <- lapply( - uniqueConcepts$CONCEPT_ID, - function(concept_id, outputPath, reports) { - report <- reports[reports$CONCEPT_ID == concept_id, ] - report <- as.list(report) - - report$CONCEPT_NAME <- normalizeEmptyValue(report$CONCEPT_NAME) - report$NUM_PERSONS <- normalizeEmptyValue(report$NUM_PERSONS) - report$PERCENT_PERSONS <- normalizeEmptyValue(report$PERCENT_PERSONS) - report$RECORDS_PER_PERSON <- normalizeEmptyValue(report$RECORDS_PER_PERSON) - - report$AGE_AT_FIRST_EXPOSURE <- as.data.frame(report$AGE_AT_FIRST_EXPOSURE) - report$PREVALENCE_BY_GENDER_AGE_YEAR <- as.data.frame(report$PREVALENCE_BY_GENDER_AGE_YEAR) - report$PREVALENCE_BY_MONTH <- as.data.frame(report$PREVALENCE_BY_MONTH) - report$LENGTH_OF_ERA <- as.data.frame(report$LENGTH_OF_ERA) - - filename <- paste(outputPath, "/concepts/drug_era/concept_" , report$CONCEPT_ID , ".json", sep='') - write(jsonlite::toJSON(report), filename) - }, - outputPath, - reports - ) + dplyr::left_join( + ( + dataDrugEra %>% + dplyr::select("CONCEPT_ID", "CONCEPT_NAME", "NUM_PERSONS", "PERCENT_PERSONS", "RECORDS_PER_PERSON") + ), + by = c("CONCEPT_ID" = "CONCEPT_ID") + ) %>% + dplyr::left_join( + ( + dataAgeAtFirstExposure %>% + dplyr::select(c(1, 2, 3, 4, 5, 6, 7, 8, 9)) %>% + tidyr::nest(AGE_AT_FIRST_EXPOSURE = c(-1)) + ), + by = c("CONCEPT_ID" = "CONCEPT_ID") + ) %>% + dplyr::left_join( + ( + dataPrevalenceByGenderAgeYear %>% + dplyr::select(c(1, 2, 3, 4, 5)) %>% + tidyr::nest(PREVALENCE_BY_GENDER_AGE_YEAR = c(-1)) + ), + by = c("CONCEPT_ID" = "CONCEPT_ID") + ) %>% + dplyr::left_join( + ( + dataPrevalenceByMonth %>% + dplyr::select(c(1, 2, 3)) %>% + tidyr::nest(PREVALENCE_BY_MONTH = c(-1)) + ), + by = c("CONCEPT_ID" = "CONCEPT_ID") + ) %>% + dplyr::left_join( + ( + dataLengthOfEra %>% + dplyr::select(c(1, 2, 3, 4, 5, 6, 7, 8, 9)) %>% + tidyr::nest(LENGTH_OF_ERA = c(-1)) + ), + by = c("CONCEPT_ID" = "CONCEPT_ID") + ) %>% + dplyr::collect() + return(list("reports" = reports, "uniqueConcepts" = uniqueConcepts)) } -generateAODrugReports <- function(connectionDetails, dataDrugs, cdmDatabaseSchema, resultsDatabaseSchema, vocabDatabaseSchema, outputPath) +generateAODrugReports <- function(connectionDetails, dataDrugs, cdmDatabaseSchema, resultsDatabaseSchema, vocabDatabaseSchema, outputPath) { - writeLines("Generating drug reports") - + queryAgeAtFirstExposure <- SqlRender::loadRenderTranslateSql( sqlFilename = "export/drug/sqlAgeAtFirstExposure.sql", packageName = "Achilles", @@ -1239,7 +1140,7 @@ generateAODrugReports <- function(connectionDetails, dataDrugs, cdmDatabaseSchem results_database_schema = resultsDatabaseSchema, vocab_database_schema = vocabDatabaseSchema ) - + queryDaysSupplyDistribution <- SqlRender::loadRenderTranslateSql( sqlFilename = "export/drug/sqlDaysSupplyDistribution.sql", packageName = "Achilles", @@ -1247,7 +1148,7 @@ generateAODrugReports <- function(connectionDetails, dataDrugs, cdmDatabaseSchem results_database_schema = resultsDatabaseSchema, vocab_database_schema = vocabDatabaseSchema ) - + queryDrugsByType <- SqlRender::loadRenderTranslateSql( sqlFilename = "export/drug/sqlDrugsByType.sql", packageName = "Achilles", @@ -1255,7 +1156,7 @@ generateAODrugReports <- function(connectionDetails, dataDrugs, cdmDatabaseSchem results_database_schema = resultsDatabaseSchema, vocab_database_schema = vocabDatabaseSchema ) - + queryPrevalenceByGenderAgeYear <- SqlRender::loadRenderTranslateSql( sqlFilename = "export/drug/sqlPrevalenceByGenderAgeYear.sql", packageName = "Achilles", @@ -1263,7 +1164,7 @@ generateAODrugReports <- function(connectionDetails, dataDrugs, cdmDatabaseSchem results_database_schema = resultsDatabaseSchema, vocab_database_schema = vocabDatabaseSchema ) - + queryPrevalenceByMonth <- SqlRender::loadRenderTranslateSql( sqlFilename = "export/drug/sqlPrevalenceByMonth.sql", packageName = "Achilles", @@ -1271,15 +1172,15 @@ generateAODrugReports <- function(connectionDetails, dataDrugs, cdmDatabaseSchem results_database_schema = resultsDatabaseSchema, vocab_database_schema = vocabDatabaseSchema ) - + queryDrugFrequencyDistribution <- SqlRender::loadRenderTranslateSql( - sqlFilename = "export/drug/sqlFrequencyDistribution.sql", + sqlFilename = "export/drug/sqlFrequencyDistribution.sql", packageName = "Achilles", dbms = connectionDetails$dbms, results_database_schema = resultsDatabaseSchema, vocab_database_schema = vocabDatabaseSchema ) - + queryQuantityDistribution <- SqlRender::loadRenderTranslateSql( sqlFilename = "export/drug/sqlQuantityDistribution.sql", packageName = "Achilles", @@ -1287,7 +1188,7 @@ generateAODrugReports <- function(connectionDetails, dataDrugs, cdmDatabaseSchem results_database_schema = resultsDatabaseSchema, vocab_database_schema = vocabDatabaseSchema ) - + queryRefillsDistribution <- SqlRender::loadRenderTranslateSql( sqlFilename = "export/drug/sqlRefillsDistribution.sql", packageName = "Achilles", @@ -1295,17 +1196,17 @@ generateAODrugReports <- function(connectionDetails, dataDrugs, cdmDatabaseSchem results_database_schema = resultsDatabaseSchema, vocab_database_schema = vocabDatabaseSchema ) - - conn <- DatabaseConnector::connect(connectionDetails) - on.exit(DatabaseConnector::disconnect(connection = conn)) - dataAgeAtFirstExposure <- DatabaseConnector::querySql(conn,queryAgeAtFirstExposure) - dataDaysSupplyDistribution <- DatabaseConnector::querySql(conn,queryDaysSupplyDistribution) - dataDrugsByType <- DatabaseConnector::querySql(conn,queryDrugsByType) - dataPrevalenceByGenderAgeYear <- DatabaseConnector::querySql(conn,queryPrevalenceByGenderAgeYear) - dataPrevalenceByMonth <- DatabaseConnector::querySql(conn,queryPrevalenceByMonth) - dataQuantityDistribution <- DatabaseConnector::querySql(conn,queryQuantityDistribution) - dataRefillsDistribution <- DatabaseConnector::querySql(conn,queryRefillsDistribution) - dataDrugFrequencyDistribution <- DatabaseConnector::querySql(conn,queryDrugFrequencyDistribution) + + conn <- DatabaseConnector::connect(connectionDetails) + on.exit(DatabaseConnector::disconnect(connection = conn)) + dataAgeAtFirstExposure <- DatabaseConnector::querySql(conn, queryAgeAtFirstExposure) + dataDaysSupplyDistribution <- DatabaseConnector::querySql(conn, queryDaysSupplyDistribution) + dataDrugsByType <- DatabaseConnector::querySql(conn, queryDrugsByType) + dataPrevalenceByGenderAgeYear <- DatabaseConnector::querySql(conn, queryPrevalenceByGenderAgeYear) + dataPrevalenceByMonth <- DatabaseConnector::querySql(conn, queryPrevalenceByMonth) + dataQuantityDistribution <- DatabaseConnector::querySql(conn, queryQuantityDistribution) + dataRefillsDistribution <- DatabaseConnector::querySql(conn, queryRefillsDistribution) + dataDrugFrequencyDistribution <- DatabaseConnector::querySql(conn, queryDrugFrequencyDistribution) if (nrow(dataPrevalenceByMonth) == 0) { return() @@ -1316,112 +1217,83 @@ generateAODrugReports <- function(connectionDetails, dataDrugs, cdmDatabaseSchem ) reports <- uniqueConcepts %>% - dplyr::left_join( - ( - dataDrugs %>% - dplyr::select("CONCEPT_ID", "CONCEPT_NAME", "NUM_PERSONS", "PERCENT_PERSONS", "RECORDS_PER_PERSON") - ), - by = c("CONCEPT_ID" = "CONCEPT_ID") - ) %>% - dplyr::left_join( - ( - dataAgeAtFirstExposure %>% - dplyr::select(c(1,2,3,4,5,6,7,8,9)) %>% - tidyr::nest(AGE_AT_FIRST_EXPOSURE = c(-1)) - ), - by = c("CONCEPT_ID" = "DRUG_CONCEPT_ID") - ) %>% - dplyr::left_join( - ( - dataDaysSupplyDistribution %>% - dplyr::select(c(1,2,3,4,5,6,7,8,9)) %>% - tidyr::nest(DAYS_SUPPLY_DISTRIBUTION = c(-1)) - ), - by = c("CONCEPT_ID" = "DRUG_CONCEPT_ID") - ) %>% - dplyr::left_join( - ( - dataDrugsByType %>% - dplyr::select(c(1,3,4)) %>% - tidyr::nest(DRUGS_BY_TYPE = c(-1)) - ), - by = c("CONCEPT_ID" = "DRUG_CONCEPT_ID") - ) %>% - dplyr::left_join( - ( - dataPrevalenceByGenderAgeYear %>% - dplyr::select(c(1,3,4,5,6)) %>% - tidyr::nest(PREVALENCE_BY_GENDER_AGE_YEAR = c(-1)) - ), - by = c("CONCEPT_ID" = "CONCEPT_ID") - ) %>% - dplyr::left_join( - ( - dataPrevalenceByMonth %>% - dplyr::select(c(1,3,4)) %>% - tidyr::nest(PREVALENCE_BY_MONTH = c(-1)) - ), - by = c("CONCEPT_ID" = "CONCEPT_ID") - ) %>% - dplyr::left_join( - ( - dataDrugFrequencyDistribution %>% - dplyr::select(c(1,3,4)) %>% - tidyr::nest(DRUG_FREQUENCY_DISTRIBUTION = c(-1)) - ), - by = c("CONCEPT_ID" = "CONCEPT_ID") - ) %>% - dplyr::left_join( - ( - dataQuantityDistribution %>% - dplyr::select(c(1,2,3,4,5,6,7,8,9)) %>% - tidyr::nest(QUANTITY_DISTRIBUTION = c(-1)) - ), - by = c("CONCEPT_ID" = "DRUG_CONCEPT_ID") - ) %>% - dplyr::left_join( - ( - dataRefillsDistribution %>% - dplyr::select(c(1,2,3,4,5,6,7,8,9)) %>% - tidyr::nest(REFILLS_DISTRIBUTION = c(-1)) - ), - by = c("CONCEPT_ID" = "DRUG_CONCEPT_ID") - ) %>% - dplyr::collect() - - dir.create(paste0(outputPath,"/concepts/drug_exposure"),recursive=T,showWarnings = F) - x <- lapply( - uniqueConcepts$CONCEPT_ID, - function(concept_id, outputPath, reports) { - report <- reports[reports$CONCEPT_ID == concept_id, ] - report <- as.list(report) - - report$CONCEPT_NAME <- normalizeEmptyValue(report$CONCEPT_NAME) - report$NUM_PERSONS <- normalizeEmptyValue(report$NUM_PERSONS) - report$PERCENT_PERSONS <- normalizeEmptyValue(report$PERCENT_PERSONS) - report$RECORDS_PER_PERSON <- normalizeEmptyValue(report$RECORDS_PER_PERSON) - - report$AGE_AT_FIRST_EXPOSURE <- as.data.frame(report$AGE_AT_FIRST_EXPOSURE) - report$DAYS_SUPPLY_DISTRIBUTION <- as.data.frame(report$DAYS_SUPPLY_DISTRIBUTION) - report$DRUGS_BY_TYPE <- as.data.frame(report$DRUGS_BY_TYPE) - report$PREVALENCE_BY_GENDER_AGE_YEAR <- as.data.frame(report$PREVALENCE_BY_GENDER_AGE_YEAR) - report$PREVALENCE_BY_MONTH <- as.data.frame(report$PREVALENCE_BY_MONTH) - report$DRUG_FREQUENCY_DISTRIBUTION <- as.data.frame(report$DRUG_FREQUENCY_DISTRIBUTION) - report$QUANTITY_DISTRIBUTION <- as.data.frame(report$QUANTITY_DISTRIBUTION) - report$REFILLS_DISTRIBUTION <- as.data.frame(report$REFILLS_DISTRIBUTION) - - filename <- paste(outputPath, "/concepts/drug_exposure/concept_" , report$CONCEPT_ID , ".json", sep='') - write(jsonlite::toJSON(report), filename) - }, - outputPath, - reports - ) + dplyr::left_join( + ( + dataDrugs %>% + dplyr::select("CONCEPT_ID", "CONCEPT_NAME", "NUM_PERSONS", "PERCENT_PERSONS", "RECORDS_PER_PERSON") + ), + by = c("CONCEPT_ID" = "CONCEPT_ID") + ) %>% + dplyr::left_join( + ( + dataAgeAtFirstExposure %>% + dplyr::select(c(1, 2, 3, 4, 5, 6, 7, 8, 9)) %>% + tidyr::nest(AGE_AT_FIRST_EXPOSURE = c(-1)) + ), + by = c("CONCEPT_ID" = "DRUG_CONCEPT_ID") + ) %>% + dplyr::left_join( + ( + dataDaysSupplyDistribution %>% + dplyr::select(c(1, 2, 3, 4, 5, 6, 7, 8, 9)) %>% + tidyr::nest(DAYS_SUPPLY_DISTRIBUTION = c(-1)) + ), + by = c("CONCEPT_ID" = "DRUG_CONCEPT_ID") + ) %>% + dplyr::left_join( + ( + dataDrugsByType %>% + dplyr::select(c(1, 3, 4)) %>% + tidyr::nest(DRUGS_BY_TYPE = c(-1)) + ), + by = c("CONCEPT_ID" = "DRUG_CONCEPT_ID") + ) %>% + dplyr::left_join( + ( + dataPrevalenceByGenderAgeYear %>% + dplyr::select(c(1, 3, 4, 5, 6)) %>% + tidyr::nest(PREVALENCE_BY_GENDER_AGE_YEAR = c(-1)) + ), + by = c("CONCEPT_ID" = "CONCEPT_ID") + ) %>% + dplyr::left_join( + ( + dataPrevalenceByMonth %>% + dplyr::select(c(1, 3, 4)) %>% + tidyr::nest(PREVALENCE_BY_MONTH = c(-1)) + ), + by = c("CONCEPT_ID" = "CONCEPT_ID") + ) %>% + dplyr::left_join( + ( + dataDrugFrequencyDistribution %>% + dplyr::select(c(1, 3, 4)) %>% + tidyr::nest(DRUG_FREQUENCY_DISTRIBUTION = c(-1)) + ), + by = c("CONCEPT_ID" = "CONCEPT_ID") + ) %>% + dplyr::left_join( + ( + dataQuantityDistribution %>% + dplyr::select(c(1, 2, 3, 4, 5, 6, 7, 8, 9)) %>% + tidyr::nest(QUANTITY_DISTRIBUTION = c(-1)) + ), + by = c("CONCEPT_ID" = "DRUG_CONCEPT_ID") + ) %>% + dplyr::left_join( + ( + dataRefillsDistribution %>% + dplyr::select(c(1, 2, 3, 4, 5, 6, 7, 8, 9)) %>% + tidyr::nest(REFILLS_DISTRIBUTION = c(-1)) + ), + by = c("CONCEPT_ID" = "DRUG_CONCEPT_ID") + ) %>% + dplyr::collect() + return(list("reports" = reports, "uniqueConcepts" = uniqueConcepts)) } -generateAODeviceReports <- function(connectionDetails, dataDevices, cdmDatabaseSchema, resultsDatabaseSchema, vocabDatabaseSchema, outputPath) +generateAODeviceReports <- function(connectionDetails, dataDevices, cdmDatabaseSchema, resultsDatabaseSchema, vocabDatabaseSchema, outputPath) { - writeLines("Generating device exposure reports") - queryAgeAtFirstExposure <- SqlRender::loadRenderTranslateSql( sqlFilename = "export/device/sqlAgeAtFirstExposure.sql", packageName = "Achilles", @@ -1437,7 +1309,7 @@ generateAODeviceReports <- function(connectionDetails, dataDevices, cdmDatabaseS results_database_schema = resultsDatabaseSchema, vocab_database_schema = vocabDatabaseSchema ) - + queryPrevalenceByGenderAgeYear <- SqlRender::loadRenderTranslateSql( sqlFilename = "export/device/sqlPrevalenceByGenderAgeYear.sql", packageName = "Achilles", @@ -1445,7 +1317,7 @@ generateAODeviceReports <- function(connectionDetails, dataDevices, cdmDatabaseS results_database_schema = resultsDatabaseSchema, vocab_database_schema = vocabDatabaseSchema ) - + queryPrevalenceByMonth <- SqlRender::loadRenderTranslateSql( sqlFilename = "export/device/sqlPrevalenceByMonth.sql", packageName = "Achilles", @@ -1453,22 +1325,22 @@ generateAODeviceReports <- function(connectionDetails, dataDevices, cdmDatabaseS results_database_schema = resultsDatabaseSchema, vocab_database_schema = vocabDatabaseSchema ) - + queryDeviceFrequencyDistribution <- SqlRender::loadRenderTranslateSql( - sqlFilename = "export/device/sqlFrequencyDistribution.sql", + sqlFilename = "export/device/sqlFrequencyDistribution.sql", packageName = "Achilles", dbms = connectionDetails$dbms, results_database_schema = resultsDatabaseSchema, vocab_database_schema = vocabDatabaseSchema ) - conn <- DatabaseConnector::connect(connectionDetails) - on.exit(DatabaseConnector::disconnect(connection = conn)) - dataAgeAtFirstExposure <- DatabaseConnector::querySql(conn,queryAgeAtFirstExposure) - dataDevicesByType <- DatabaseConnector::querySql(conn,queryDevicesByType) - dataPrevalenceByGenderAgeYear <- DatabaseConnector::querySql(conn,queryPrevalenceByGenderAgeYear) - dataPrevalenceByMonth <- DatabaseConnector::querySql(conn,queryPrevalenceByMonth) - dataDeviceFrequencyDistribution <- DatabaseConnector::querySql(conn,queryDeviceFrequencyDistribution) + conn <- DatabaseConnector::connect(connectionDetails) + on.exit(DatabaseConnector::disconnect(connection = conn)) + dataAgeAtFirstExposure <- DatabaseConnector::querySql(conn, queryAgeAtFirstExposure) + dataDevicesByType <- DatabaseConnector::querySql(conn, queryDevicesByType) + dataPrevalenceByGenderAgeYear <- DatabaseConnector::querySql(conn, queryPrevalenceByGenderAgeYear) + dataPrevalenceByMonth <- DatabaseConnector::querySql(conn, queryPrevalenceByMonth) + dataDeviceFrequencyDistribution <- DatabaseConnector::querySql(conn, queryDeviceFrequencyDistribution) if (nrow(dataDevices) == 0) { return() @@ -1479,84 +1351,59 @@ generateAODeviceReports <- function(connectionDetails, dataDevices, cdmDatabaseS ) reports <- uniqueConcepts %>% - dplyr::left_join( - ( - dataDevices %>% - dplyr::select("CONCEPT_ID", "CONCEPT_NAME", "NUM_PERSONS", "PERCENT_PERSONS", "RECORDS_PER_PERSON") - ), - by = c("CONCEPT_ID" = "CONCEPT_ID") - ) %>% - dplyr::left_join( - ( - dataAgeAtFirstExposure %>% - dplyr::select(c(1,2,3,4,5,6,7,8,9)) %>% - tidyr::nest(AGE_AT_FIRST_EXPOSURE = c(-1)) - ), - by = c("CONCEPT_ID" = "CONCEPT_ID") - ) %>% - dplyr::left_join( - ( - dataDevicesByType %>% - dplyr::select(c(1,4,5)) %>% - tidyr::nest(DEVICES_BY_TYPE = c(-1)) - ), - by = c("CONCEPT_ID" = "DEVICE_CONCEPT_ID") - ) %>% - dplyr::left_join( - ( - dataPrevalenceByGenderAgeYear %>% - dplyr::select(c(1,3,4,5,6)) %>% - tidyr::nest(PREVALENCE_BY_GENDER_AGE_YEAR = c(-1)) - ), - by = c("CONCEPT_ID" = "CONCEPT_ID") - ) %>% - dplyr::left_join( - ( - dataPrevalenceByMonth %>% - dplyr::select(c(1,3,4)) %>% - tidyr::nest(PREVALENCE_BY_MONTH = c(-1)) - ), - by = c("CONCEPT_ID" = "CONCEPT_ID") - ) %>% - dplyr::left_join( - ( - dataDeviceFrequencyDistribution %>% - dplyr::select(c(1,3,4)) %>% - tidyr::nest(DEVICE_FREQUENCY_DISTRIBUTION = c(-1)) - ), - by = c("CONCEPT_ID" = "CONCEPT_ID") - ) %>% - dplyr::collect() - - dir.create(paste0(outputPath,"/concepts/device_exposure"),recursive=T,showWarnings = F) - x <- lapply( - uniqueConcepts$CONCEPT_ID, - function(concept_id, outputPath, reports) { - report <- reports[reports$CONCEPT_ID == concept_id, ] - report <- as.list(report) - - report$CONCEPT_NAME <- normalizeEmptyValue(report$CONCEPT_NAME) - report$NUM_PERSONS <- normalizeEmptyValue(report$NUM_PERSONS) - report$PERCENT_PERSONS <- normalizeEmptyValue(report$PERCENT_PERSONS) - report$RECORDS_PER_PERSON <- normalizeEmptyValue(report$RECORDS_PER_PERSON) - - report$AGE_AT_FIRST_EXPOSURE <- as.data.frame(report$AGE_AT_FIRST_EXPOSURE) - report$DEVICES_BY_TYPE <- as.data.frame(report$DEVICES_BY_TYPE) - report$PREVALENCE_BY_GENDER_AGE_YEAR <- as.data.frame(report$PREVALENCE_BY_GENDER_AGE_YEAR) - report$PREVALENCE_BY_MONTH <- as.data.frame(report$PREVALENCE_BY_MONTH) - report$DEVICE_FREQUENCY_DISTRIBUTION <- as.data.frame(report$DEVICE_FREQUENCY_DISTRIBUTION) - - filename <- paste(outputPath, "/concepts/device_exposure/concept_" , report$CONCEPT_ID , ".json", sep='') - write(jsonlite::toJSON(report), filename) - }, - outputPath, - reports - ) + dplyr::left_join( + ( + dataDevices %>% + dplyr::select("CONCEPT_ID", "CONCEPT_NAME", "NUM_PERSONS", "PERCENT_PERSONS", "RECORDS_PER_PERSON") + ), + by = c("CONCEPT_ID" = "CONCEPT_ID") + ) %>% + dplyr::left_join( + ( + dataAgeAtFirstExposure %>% + dplyr::select(c(1, 2, 3, 4, 5, 6, 7, 8, 9)) %>% + tidyr::nest(AGE_AT_FIRST_EXPOSURE = c(-1)) + ), + by = c("CONCEPT_ID" = "CONCEPT_ID") + ) %>% + dplyr::left_join( + ( + dataDevicesByType %>% + dplyr::select(c(1, 4, 5)) %>% + tidyr::nest(DEVICES_BY_TYPE = c(-1)) + ), + by = c("CONCEPT_ID" = "DEVICE_CONCEPT_ID") + ) %>% + dplyr::left_join( + ( + dataPrevalenceByGenderAgeYear %>% + dplyr::select(c(1, 3, 4, 5, 6)) %>% + tidyr::nest(PREVALENCE_BY_GENDER_AGE_YEAR = c(-1)) + ), + by = c("CONCEPT_ID" = "CONCEPT_ID") + ) %>% + dplyr::left_join( + ( + dataPrevalenceByMonth %>% + dplyr::select(c(1, 3, 4)) %>% + tidyr::nest(PREVALENCE_BY_MONTH = c(-1)) + ), + by = c("CONCEPT_ID" = "CONCEPT_ID") + ) %>% + dplyr::left_join( + ( + dataDeviceFrequencyDistribution %>% + dplyr::select(c(1, 3, 4)) %>% + tidyr::nest(DEVICE_FREQUENCY_DISTRIBUTION = c(-1)) + ), + by = c("CONCEPT_ID" = "CONCEPT_ID") + ) %>% + dplyr::collect() + return(list("reports" = reports, "uniqueConcepts" = uniqueConcepts)) } -generateAOConditionReports <- function(connectionDetails, dataConditions, cdmDatabaseSchema, resultsDatabaseSchema, vocabDatabaseSchema, outputPath) +generateAOConditionReports <- function(connectionDetails, duckdbCon, dataConditions, cdmDatabaseSchema, resultsDatabaseSchema, vocabDatabaseSchema, outputPath) { - writeLines("Generating condition reports") queryPrevalenceByGenderAgeYear <- SqlRender::loadRenderTranslateSql( sqlFilename = "export/condition/sqlPrevalenceByGenderAgeYear.sql", packageName = "Achilles", @@ -1566,7 +1413,7 @@ generateAOConditionReports <- function(connectionDetails, dataConditions, cdmDat results_database_schema = resultsDatabaseSchema, vocab_database_schema = vocabDatabaseSchema ) - + queryPrevalenceByMonth <- SqlRender::loadRenderTranslateSql( sqlFilename = "export/condition/sqlPrevalenceByMonth.sql", packageName = "Achilles", @@ -1576,7 +1423,7 @@ generateAOConditionReports <- function(connectionDetails, dataConditions, cdmDat results_database_schema = resultsDatabaseSchema, vocab_database_schema = vocabDatabaseSchema ) - + queryConditionsByType <- SqlRender::loadRenderTranslateSql( sqlFilename = "export/condition/sqlConditionsByType.sql", packageName = "Achilles", @@ -1586,7 +1433,7 @@ generateAOConditionReports <- function(connectionDetails, dataConditions, cdmDat results_database_schema = resultsDatabaseSchema, vocab_database_schema = vocabDatabaseSchema ) - + queryAgeAtFirstDiagnosis <- SqlRender::loadRenderTranslateSql( sqlFilename = "export/condition/sqlAgeAtFirstDiagnosis.sql", packageName = "Achilles", @@ -1596,13 +1443,13 @@ generateAOConditionReports <- function(connectionDetails, dataConditions, cdmDat results_database_schema = resultsDatabaseSchema, vocab_database_schema = vocabDatabaseSchema ) - + conn <- DatabaseConnector::connect(connectionDetails) - on.exit(DatabaseConnector::disconnect(connection = conn)) - dataPrevalenceByGenderAgeYear <- DatabaseConnector::querySql(conn,queryPrevalenceByGenderAgeYear) - dataPrevalenceByMonth <- DatabaseConnector::querySql(conn,queryPrevalenceByMonth) - dataConditionsByType <- DatabaseConnector::querySql(conn,queryConditionsByType) - dataAgeAtFirstDiagnosis <- DatabaseConnector::querySql(conn,queryAgeAtFirstDiagnosis) + on.exit(DatabaseConnector::disconnect(connection = conn)) + dataPrevalenceByGenderAgeYear <- DatabaseConnector::querySql(conn, queryPrevalenceByGenderAgeYear) + dataPrevalenceByMonth <- DatabaseConnector::querySql(conn, queryPrevalenceByMonth) + dataConditionsByType <- DatabaseConnector::querySql(conn, queryConditionsByType) + dataAgeAtFirstDiagnosis <- DatabaseConnector::querySql(conn, queryAgeAtFirstDiagnosis) if (nrow(dataPrevalenceByMonth) == 0) { return() @@ -1613,76 +1460,51 @@ generateAOConditionReports <- function(connectionDetails, dataConditions, cdmDat ) reports <- uniqueConcepts %>% - dplyr::left_join( - ( - dataConditions %>% - dplyr::select("CONCEPT_ID", "CONCEPT_NAME", "NUM_PERSONS", "PERCENT_PERSONS", "RECORDS_PER_PERSON") - ), - by = c("CONCEPT_ID" = "CONCEPT_ID") - ) %>% - dplyr::left_join( - ( - dataPrevalenceByGenderAgeYear %>% - dplyr::select(c(1,3,4,5,6)) %>% - tidyr::nest(PREVALENCE_BY_GENDER_AGE_YEAR = c(-1)) - ), - by = c("CONCEPT_ID" = "CONCEPT_ID") - ) %>% - dplyr::left_join( - ( - dataPrevalenceByMonth %>% - dplyr::select(c(1,3,4)) %>% - tidyr::nest(PREVALENCE_BY_MONTH = c(-1)) - ), - by = c("CONCEPT_ID" = "CONCEPT_ID") - ) %>% - dplyr::left_join( - ( - dataConditionsByType %>% - dplyr::select(c(1,2,3)) %>% - tidyr::nest(CONDITIONS_BY_TYPE = c(-1)) - ), - by = c("CONCEPT_ID" = "CONDITION_CONCEPT_ID") - ) %>% - dplyr::left_join( - ( - dataAgeAtFirstDiagnosis %>% - dplyr::select(c(1,2,3,4,5,6,7,8,9)) %>% - tidyr::nest(AGE_AT_FIRST_DIAGNOSIS = c(-1)) - ), - by = c("CONCEPT_ID" = "CONCEPT_ID") - ) %>% - dplyr::collect() - - dir.create(paste0(outputPath,"/concepts/condition_occurrence"),recursive=T,showWarnings = F) - x <- lapply( - uniqueConcepts$CONCEPT_ID, - function(concept_id, outputPath, reports) { - report <- reports[reports$CONCEPT_ID == concept_id, ] - report <- as.list(report) - - report$CONCEPT_NAME <- normalizeEmptyValue(report$CONCEPT_NAME) - report$NUM_PERSONS <- normalizeEmptyValue(report$NUM_PERSONS) - report$PERCENT_PERSONS <- normalizeEmptyValue(report$PERCENT_PERSONS) - report$RECORDS_PER_PERSON <- normalizeEmptyValue(report$RECORDS_PER_PERSON) - - report$PREVALENCE_BY_GENDER_AGE_YEAR <- as.data.frame(report$PREVALENCE_BY_GENDER_AGE_YEAR) - report$PREVALENCE_BY_MONTH <- as.data.frame(report$PREVALENCE_BY_MONTH) - report$CONDITIONS_BY_TYPE <- as.data.frame(report$CONDITIONS_BY_TYPE) - report$AGE_AT_FIRST_DIAGNOSIS <- as.data.frame(report$AGE_AT_FIRST_DIAGNOSIS) - - filename <- paste(outputPath, "/concepts/condition_occurrence/concept_" , report$CONCEPT_ID , ".json", sep='') - write(jsonlite::toJSON(report), filename) - }, - outputPath, - reports - ) + dplyr::left_join( + ( + dataConditions %>% + dplyr::select("CONCEPT_ID", "CONCEPT_NAME", "NUM_PERSONS", "PERCENT_PERSONS", "RECORDS_PER_PERSON") + ), + by = c("CONCEPT_ID" = "CONCEPT_ID") + ) %>% + dplyr::left_join( + ( + dataPrevalenceByGenderAgeYear %>% + dplyr::select(c(1, 3, 4, 5, 6)) %>% + tidyr::nest(PREVALENCE_BY_GENDER_AGE_YEAR = c(-1)) + ), + by = c("CONCEPT_ID" = "CONCEPT_ID") + ) %>% + dplyr::left_join( + ( + dataPrevalenceByMonth %>% + dplyr::select(c(1, 3, 4)) %>% + tidyr::nest(PREVALENCE_BY_MONTH = c(-1)) + ), + by = c("CONCEPT_ID" = "CONCEPT_ID") + ) %>% + dplyr::left_join( + ( + dataConditionsByType %>% + dplyr::select(c(1, 2, 3)) %>% + tidyr::nest(CONDITIONS_BY_TYPE = c(-1)) + ), + by = c("CONCEPT_ID" = "CONDITION_CONCEPT_ID") + ) %>% + dplyr::left_join( + ( + dataAgeAtFirstDiagnosis %>% + dplyr::select(c(1, 2, 3, 4, 5, 6, 7, 8, 9)) %>% + tidyr::nest(AGE_AT_FIRST_DIAGNOSIS = c(-1)) + ), + by = c("CONCEPT_ID" = "CONCEPT_ID") + ) %>% + dplyr::collect() + return(list("reports" = reports, "uniqueConcepts" = uniqueConcepts)) } generateAOConditionEraReports <- function(connectionDetails, dataConditionEra, cdmDatabaseSchema, resultsDatabaseSchema, vocabDatabaseSchema, outputPath) { - writeLines("Generating condition era reports") - queryPrevalenceByGenderAgeYear <- SqlRender::loadRenderTranslateSql( sqlFilename = "export/conditionera/sqlPrevalenceByGenderAgeYear.sql", packageName = "Achilles", @@ -1692,7 +1514,7 @@ generateAOConditionEraReports <- function(connectionDetails, dataConditionEra, c results_database_schema = resultsDatabaseSchema, vocab_database_schema = vocabDatabaseSchema ) - + queryPrevalenceByMonth <- SqlRender::loadRenderTranslateSql( sqlFilename = "export/conditionera/sqlPrevalenceByMonth.sql", packageName = "Achilles", @@ -1702,7 +1524,7 @@ generateAOConditionEraReports <- function(connectionDetails, dataConditionEra, c results_database_schema = resultsDatabaseSchema, vocab_database_schema = vocabDatabaseSchema ) - + queryAgeAtFirstDiagnosis <- SqlRender::loadRenderTranslateSql( sqlFilename = "export/conditionera/sqlAgeAtFirstDiagnosis.sql", packageName = "Achilles", @@ -1712,7 +1534,7 @@ generateAOConditionEraReports <- function(connectionDetails, dataConditionEra, c results_database_schema = resultsDatabaseSchema, vocab_database_schema = vocabDatabaseSchema ) - + queryLengthOfEra <- SqlRender::loadRenderTranslateSql( sqlFilename = "export/conditionera/sqlLengthOfEra.sql", packageName = "Achilles", @@ -1722,9 +1544,9 @@ generateAOConditionEraReports <- function(connectionDetails, dataConditionEra, c results_database_schema = resultsDatabaseSchema, vocab_database_schema = vocabDatabaseSchema ) - - conn <- DatabaseConnector::connect(connectionDetails) - on.exit(DatabaseConnector::disconnect(connection = conn)) + + conn <- DatabaseConnector::connect(connectionDetails) + on.exit(DatabaseConnector::disconnect(connection = conn)) dataPrevalenceByGenderAgeYear <- DatabaseConnector::querySql(conn, queryPrevalenceByGenderAgeYear) dataPrevalenceByMonth <- DatabaseConnector::querySql(conn, queryPrevalenceByMonth) dataLengthOfEra <- DatabaseConnector::querySql(conn, queryLengthOfEra) @@ -1739,70 +1561,340 @@ generateAOConditionEraReports <- function(connectionDetails, dataConditionEra, c ) reports <- uniqueConcepts %>% - dplyr::left_join( - ( - dataConditionEra %>% - dplyr::select("CONCEPT_ID", "CONCEPT_NAME", "NUM_PERSONS", "PERCENT_PERSONS", "RECORDS_PER_PERSON") - ), - by = c("CONCEPT_ID" = "CONCEPT_ID") - ) %>% - dplyr::left_join( - ( - dataAgeAtFirstDiagnosis %>% - dplyr::select(c(1,2,3,4,5,6,7,8,9)) %>% - tidyr::nest(AGE_AT_FIRST_EXPOSURE = c(-1)) - ), - by = c("CONCEPT_ID" = "CONCEPT_ID") - ) %>% - dplyr::left_join( - ( - dataPrevalenceByGenderAgeYear %>% - dplyr::select(c(1,2,3,4,5)) %>% - tidyr::nest(PREVALENCE_BY_GENDER_AGE_YEAR = c(-1)) - ), - by = c("CONCEPT_ID" = "CONCEPT_ID") - ) %>% - dplyr::left_join( - ( - dataPrevalenceByMonth %>% - dplyr::select(c(1,2,3)) %>% - tidyr::nest(PREVALENCE_BY_MONTH = c(-1)) - ), - by = c("CONCEPT_ID" = "CONCEPT_ID") - ) %>% - dplyr::left_join( - ( - dataLengthOfEra %>% - dplyr::select(c(1,2,3,4,5,6,7,8,9)) %>% - tidyr::nest(LENGTH_OF_ERA = c(-1)) - ), - by = c("CONCEPT_ID" = "CONCEPT_ID") - ) %>% - dplyr::collect() - - dir.create(paste0(outputPath,"/concepts/condition_era"),recursive=T,showWarnings = F) - x <- lapply( - uniqueConcepts$CONCEPT_ID, - function(concept_id, outputPath, reports) { - report <- reports[reports$CONCEPT_ID == concept_id, ] - report <- as.list(report) - - report$CONCEPT_NAME <- normalizeEmptyValue(report$CONCEPT_NAME) - report$NUM_PERSONS <- normalizeEmptyValue(report$NUM_PERSONS) - report$PERCENT_PERSONS <- normalizeEmptyValue(report$PERCENT_PERSONS) - report$RECORDS_PER_PERSON <- normalizeEmptyValue(report$RECORDS_PER_PERSON) - - report$AGE_AT_FIRST_EXPOSURE <- as.data.frame(report$AGE_AT_FIRST_EXPOSURE) - report$PREVALENCE_BY_GENDER_AGE_YEAR <- as.data.frame(report$PREVALENCE_BY_GENDER_AGE_YEAR) - report$PREVALENCE_BY_MONTH <- as.data.frame(report$PREVALENCE_BY_MONTH) - report$LENGTH_OF_ERA <- as.data.frame(report$LENGTH_OF_ERA) - - filename <- paste(outputPath, "/concepts/condition_era/concept_" , report$CONCEPT_ID , ".json", sep='') - write(jsonlite::toJSON(report), filename) - }, - outputPath, - reports + dplyr::left_join( + ( + dataConditionEra %>% + dplyr::select("CONCEPT_ID", "CONCEPT_NAME", "NUM_PERSONS", "PERCENT_PERSONS", "RECORDS_PER_PERSON") + ), + by = c("CONCEPT_ID" = "CONCEPT_ID") + ) %>% + dplyr::left_join( + ( + dataAgeAtFirstDiagnosis %>% + dplyr::select(c(1, 2, 3, 4, 5, 6, 7, 8, 9)) %>% + tidyr::nest(AGE_AT_FIRST_EXPOSURE = c(-1)) + ), + by = c("CONCEPT_ID" = "CONCEPT_ID") + ) %>% + dplyr::left_join( + ( + dataPrevalenceByGenderAgeYear %>% + dplyr::select(c(1, 2, 3, 4, 5)) %>% + tidyr::nest(PREVALENCE_BY_GENDER_AGE_YEAR = c(-1)) + ), + by = c("CONCEPT_ID" = "CONCEPT_ID") + ) %>% + dplyr::left_join( + ( + dataPrevalenceByMonth %>% + dplyr::select(c(1, 2, 3)) %>% + tidyr::nest(PREVALENCE_BY_MONTH = c(-1)) + ), + by = c("CONCEPT_ID" = "CONCEPT_ID") + ) %>% + dplyr::left_join( + ( + dataLengthOfEra %>% + dplyr::select(c(1, 2, 3, 4, 5, 6, 7, 8, 9)) %>% + tidyr::nest(LENGTH_OF_ERA = c(-1)) + ), + by = c("CONCEPT_ID" = "CONCEPT_ID") + ) %>% + dplyr::collect() + return(list("reports" = reports, "uniqueConcepts" = uniqueConcepts)) +} + + +generateDataDensityTotal <- function(connection, resultsDatabaseSchema) { + renderedSql <- SqlRender::loadRenderTranslateSql( + sqlFilename = "export/datadensity/totalrecords.sql", + packageName = "Achilles", + dbms = connectionDetails$dbms, + results_database_schema = resultsDatabaseSchema + ) + + totalRecordsData <- DatabaseConnector::querySql(connection, renderedSql) + colnames(totalRecordsData) <- c("domain", "date", "records") + totalRecordsData$date <- lubridate::parse_date_time(totalRecordsData$date, "ym") + + domainAggregates <- aggregate(totalRecordsData$records, by = list(domain = totalRecordsData$domain), FUN = sum) + names(domainAggregates) <- c("domain", "count_records") + data <- list("totalRecordsData" = totalRecordsData, "domainAggregates" = domainAggregates) + return(data) +} + +generateDataDensityRecordsPerPerson <- function(connection, resultsDatabaseSchema) { + renderedSql <- SqlRender::loadRenderTranslateSql( + sqlFilename = "export/datadensity/recordsperperson.sql", + packageName = "Achilles", + dbms = connectionDetails$dbms, + results_database_schema = resultsDatabaseSchema + ) + + recordsPerPerson <- DatabaseConnector::querySql(connection, renderedSql) + colnames(recordsPerPerson) <- c("domain", "date", "records") + recordsPerPerson$date <- lubridate::parse_date_time(recordsPerPerson$date, "ym") + recordsPerPerson$records <- round(recordsPerPerson$records, 2) + return(recordsPerPerson) +} + +generateDataDensityConceptsPerPerson <- function(connection, resultsDatabaseSchema) { + renderedSql <- SqlRender::loadRenderTranslateSql( + sqlFilename = "export/datadensity/conceptsperperson.sql", + packageName = "Achilles", + dbms = connectionDetails$dbms, + results_database_schema = resultsDatabaseSchema + ) + conceptsPerPerson <- DatabaseConnector::querySql(connection, renderedSql) + return(conceptsPerPerson) + #data.table::fwrite(conceptsPerPerson, file=paste0(sourceOutputPath, "/datadensity-concepts-per-person.csv")) + #dbWriteTable(duckdbCon, "concepts_per_person", conceptsPerPerson) +} + +generateDataDensityDomainsPerPerson <- function(connection, resultsDatabaseSchema) { + renderedSql <- SqlRender::loadRenderTranslateSql( + sqlFilename = "export/datadensity/domainsperperson.sql", + packageName = "Achilles", + dbms = connectionDetails$dbms, + results_database_schema = resultsDatabaseSchema + ) + domainsPerPerson <- DatabaseConnector::querySql(connection, renderedSql) + domainsPerPerson$PERCENT_VALUE <- round(as.numeric(domainsPerPerson$PERCENT_VALUE), 2) + return(domainsPerPerson) + #data.table::fwrite(domainsPerPerson, file=paste0(sourceOutputPath, "/datadensity-domains-per-person.csv")) + #dbWriteTable(duckdbCon, "domains_per_person", domainsPerPerson) +} + +generateDomainSummaryConditions <- function(connection, resultsDatabaseSchema, vocabDatabaseSchema) { + queryConditions <- SqlRender::loadRenderTranslateSql( + sqlFilename = "export/condition/sqlConditionTable.sql", + packageName = "Achilles", + dbms = connectionDetails$dbms, + results_database_schema = resultsDatabaseSchema, + vocab_database_schema = vocabDatabaseSchema + ) + dataConditions <- DatabaseConnector::querySql(connection, queryConditions) + dataConditions$PERCENT_PERSONS <- format(round(dataConditions$PERCENT_PERSONS, 4), nsmall = 4) + dataConditions$PERCENT_PERSONS_NTILE <- dplyr::ntile(dplyr::desc(dataConditions$PERCENT_PERSONS), 10) + dataConditions$RECORDS_PER_PERSON <- format(round(dataConditions$RECORDS_PER_PERSON, 1), nsmall = 1) + dataConditions$RECORDS_PER_PERSON_NTILE <- dplyr::ntile(dplyr::desc(dataConditions$RECORDS_PER_PERSON), 10) + return(dataConditions) + #data.table::fwrite(dataConditions, file=paste0(sourceOutputPath, "/domain-summary-condition_occurrence.csv")) + #dbWriteTable(duckdbCon, "domain_summary", dataConditions, append = TRUE) +} + +generateDomainSummaryConditionEras <- function(connection, resultsDatabaseSchema, vocabDatabaseSchema) { + queryConditionEra <- SqlRender::loadRenderTranslateSql( + sqlFilename = "export/conditionera/sqlConditionEraTable.sql", + packageName = "Achilles", + dbms = connectionDetails$dbms, + results_database_schema = resultsDatabaseSchema, + vocab_database_schema = vocabDatabaseSchema + ) + dataConditionEra <- DatabaseConnector::querySql(connection, queryConditionEra) + dataConditionEra$PERCENT_PERSONS <- format(round(dataConditionEra$PERCENT_PERSONS, 4), nsmall = 4) + dataConditionEra$PERCENT_PERSONS_NTILE <- dplyr::ntile(dplyr::desc(dataConditionEra$PERCENT_PERSONS), 10) + dataConditionEra$RECORDS_PER_PERSON <- format(round(dataConditionEra$RECORDS_PER_PERSON, 1), nsmall = 1) + dataConditionEra$RECORDS_PER_PERSON_NTILE <- dplyr::ntile(dplyr::desc(dataConditionEra$RECORDS_PER_PERSON), 10) + return(dataConditionEra) + #data.table::fwrite(dataConditionEra, file=paste0(sourceOutputPath, "/domain-summary-condition_era.csv")) +} + +generateDomainSummaryDrugs <- function(connection, resultsDatabaseSchema, vocabDatabaseSchema) { + queryDrugs <- SqlRender::loadRenderTranslateSql( + sqlFilename = "export/drug/sqlDrugTable.sql", + packageName = "Achilles", + dbms = connectionDetails$dbms, + results_database_schema = resultsDatabaseSchema, + vocab_database_schema = vocabDatabaseSchema ) + dataDrugs <- DatabaseConnector::querySql(connection, queryDrugs) + dataDrugs$PERCENT_PERSONS <- format(round(dataDrugs$PERCENT_PERSONS, 4), nsmall = 4) + dataDrugs$PERCENT_PERSONS_NTILE <- dplyr::ntile(dplyr::desc(dataDrugs$PERCENT_PERSONS), 10) + dataDrugs$RECORDS_PER_PERSON <- format(round(dataDrugs$RECORDS_PER_PERSON, 1), nsmall = 1) + dataDrugs$RECORDS_PER_PERSON_NTILE <- dplyr::ntile(dplyr::desc(dataDrugs$RECORDS_PER_PERSON), 10) + return(dataDrugs) + #data.table::fwrite(dataDrugs, file=paste0(sourceOutputPath, "/domain-summary-drug_exposure.csv")) +} + +generateDomainDrugStratification <- function(connection, resultsDatabaseSchema, vocabDatabaseSchema) { + queryDrugType <- SqlRender::loadRenderTranslateSql( + sqlFilename = "export/drug/sqlDomainDrugStratification.sql", + packageName = "Achilles", + dbms = connectionDetails$dbms, + results_database_schema = resultsDatabaseSchema, + vocab_database_schema = vocabDatabaseSchema + ) + dataDrugType <- DatabaseConnector::querySql(connection, queryDrugType) + return(dataDrugType) + #data.table::fwrite(dataDrugType, file=paste0(sourceOutputPath, "/domain-drug-stratification.csv")) +} + +generateDomainSummaryDrugEra <- function(connection, resultsDatabaseSchema, vocabDatabaseSchema) { + queryDrugEra <- SqlRender::loadRenderTranslateSql( + sqlFilename = "export/drugera/sqlDrugEraTable.sql", + packageName = "Achilles", + dbms = connectionDetails$dbms, + results_database_schema = resultsDatabaseSchema, + vocab_database_schema = vocabDatabaseSchema + ) + dataDrugEra <- DatabaseConnector::querySql(connection, queryDrugEra) + dataDrugEra$PERCENT_PERSONS <- format(round(dataDrugEra$PERCENT_PERSONS, 4), nsmall = 4) + dataDrugEra$PERCENT_PERSONS_NTILE <- dplyr::ntile(dplyr::desc(dataDrugEra$PERCENT_PERSONS), 10) + dataDrugEra$RECORDS_PER_PERSON <- format(round(dataDrugEra$RECORDS_PER_PERSON, 1), nsmall = 1) + dataDrugEra$RECORDS_PER_PERSON_NTILE <- dplyr::ntile(dplyr::desc(dataDrugEra$RECORDS_PER_PERSON), 10) + return(dataDrugEra) + #data.table::fwrite(dataDrugEra, file=paste0(sourceOutputPath, "/domain-summary-drug_era.csv")) +} + +generateDomainSummaryMeasurements <- function(connection, resultsDatabaseSchema, vocabDatabaseSchema) { + queryMeasurements <- SqlRender::loadRenderTranslateSql( + sqlFilename = "export/measurement/sqlMeasurementTable.sql", + packageName = "Achilles", + dbms = connectionDetails$dbms, + results_database_schema = resultsDatabaseSchema, + vocab_database_schema = vocabDatabaseSchema + ) + dataMeasurements <- DatabaseConnector::querySql(connection, queryMeasurements) + dataMeasurements$PERCENT_PERSONS <- format(round(dataMeasurements$PERCENT_PERSONS, 4), nsmall = 4) + dataMeasurements$PERCENT_PERSONS_NTILE <- dplyr::ntile(dplyr::desc(dataMeasurements$PERCENT_PERSONS), 10) + dataMeasurements$RECORDS_PER_PERSON <- format(round(dataMeasurements$RECORDS_PER_PERSON, 1), nsmall = 1) + dataMeasurements$RECORDS_PER_PERSON_NTILE <- dplyr::ntile(dplyr::desc(dataMeasurements$RECORDS_PER_PERSON), 10) + return(dataMeasurements) + #data.table::fwrite(dataMeasurements, file=paste0(sourceOutputPath, "/domain-summary-measurement.csv")) +} + +generateDomainSummaryObservations <- function(connection, resultsDatabaseSchema, vocabDatabaseSchema) { + queryObservations <- SqlRender::loadRenderTranslateSql( + sqlFilename = "export/observation/sqlObservationTable.sql", + packageName = "Achilles", + dbms = connectionDetails$dbms, + results_database_schema = resultsDatabaseSchema, + vocab_database_schema = vocabDatabaseSchema + ) + dataObservations <- DatabaseConnector::querySql(connection, queryObservations) + dataObservations$PERCENT_PERSONS <- format(round(dataObservations$PERCENT_PERSONS, 4), nsmall = 4) + dataObservations$PERCENT_PERSONS_NTILE <- dplyr::ntile(dplyr::desc(dataObservations$PERCENT_PERSONS), 10) + dataObservations$RECORDS_PER_PERSON <- format(round(dataObservations$RECORDS_PER_PERSON, 1), nsmall = 1) + dataObservations$RECORDS_PER_PERSON_NTILE <- dplyr::ntile(dplyr::desc(dataObservations$RECORDS_PER_PERSON), 10) + return(dataObservations) + #data.table::fwrite(dataObservations, file=paste0(sourceOutputPath, "/domain-summary-observation.csv")) +} + +generateDomainSummaryVisitDetails <- function(connection, resultsDatabaseSchema, vocabDatabaseSchema) { + queryVisitDetails <- SqlRender::loadRenderTranslateSql( + sqlFilename = "export/visitdetail/sqlVisitDetailTreemap.sql", + packageName = "Achilles", + dbms = connectionDetails$dbms, + results_database_schema = resultsDatabaseSchema, + vocab_database_schema = vocabDatabaseSchema + ) + dataVisitDetails <- DatabaseConnector::querySql(connection, queryVisitDetails) + dataVisitDetails$PERCENT_PERSONS <- format(round(dataVisitDetails$PERCENT_PERSONS, 4), nsmall = 4) + dataVisitDetails$PERCENT_PERSONS_NTILE <- dplyr::ntile(dplyr::desc(dataVisitDetails$PERCENT_PERSONS), 10) + dataVisitDetails$RECORDS_PER_PERSON <- format(round(dataVisitDetails$RECORDS_PER_PERSON, 1), nsmall = 1) + dataVisitDetails$RECORDS_PER_PERSON_NTILE <- dplyr::ntile(dplyr::desc(dataVisitDetails$RECORDS_PER_PERSON), 10) + names(dataVisitDetails)[names(dataVisitDetails) == 'CONCEPT_PATH'] <- 'CONCEPT_NAME' + return(dataVisitDetails) + #data.table::fwrite(dataVisitDetails, file=paste0(sourceOutputPath, "/domain-summary-visit_detail.csv")) +} + +generateDomainSummaryVisits <- function(connection, resultsDatabaseSchema, vocabDatabaseSchema) { + queryVisits <- SqlRender::loadRenderTranslateSql( + sqlFilename = "export/visit/sqlVisitTreemap.sql", + packageName = "Achilles", + dbms = connectionDetails$dbms, + results_database_schema = resultsDatabaseSchema, + vocab_database_schema = vocabDatabaseSchema + ) + dataVisits <- DatabaseConnector::querySql(connection, queryVisits) + dataVisits$PERCENT_PERSONS <- format(round(dataVisits$PERCENT_PERSONS, 4), nsmall = 4) + dataVisits$PERCENT_PERSONS_NTILE <- dplyr::ntile(dplyr::desc(dataVisits$PERCENT_PERSONS), 10) + dataVisits$RECORDS_PER_PERSON <- format(round(dataVisits$RECORDS_PER_PERSON, 1), nsmall = 1) + dataVisits$RECORDS_PER_PERSON_NTILE <- dplyr::ntile(dplyr::desc(dataVisits$RECORDS_PER_PERSON), 10) + names(dataVisits)[names(dataVisits) == 'CONCEPT_PATH'] <- 'CONCEPT_NAME' + return(dataVisits) + #data.table::fwrite(dataVisits, file=paste0(sourceOutputPath, "/domain-summary-visit_occurrence.csv")) +} + +generateDomainVisitStratification <- function(connection, resultsDatabaseSchema, vocabDatabaseSchema) { + queryVisits <- SqlRender::loadRenderTranslateSql( + sqlFilename = "export/visit/sqlDomainVisitStratification.sql", + packageName = "Achilles", + dbms = connectionDetails$dbms, + results_database_schema = resultsDatabaseSchema, + vocab_database_schema = vocabDatabaseSchema + ) + dataVisits <- DatabaseConnector::querySql(connection, queryVisits) + return(dataVisits) + #data.table::fwrite(dataVisits, file=paste0(sourceOutputPath, "/domain-visit-stratification.csv")) +} + +generateDomainSummaryProcedures <- function(connection, resultsDatabaseSchema, vocabDatabaseSchema) { + queryProcedures <- SqlRender::loadRenderTranslateSql( + sqlFilename = "export/procedure/sqlProcedureTable.sql", + packageName = "Achilles", + dbms = connectionDetails$dbms, + results_database_schema = resultsDatabaseSchema, + vocab_database_schema = vocabDatabaseSchema + ) + dataProcedures <- DatabaseConnector::querySql(connection, queryProcedures) + dataProcedures$PERCENT_PERSONS <- format(round(dataProcedures$PERCENT_PERSONS, 4), nsmall = 4) + dataProcedures$PERCENT_PERSONS_NTILE <- dplyr::ntile(dplyr::desc(dataProcedures$PERCENT_PERSONS), 10) + dataProcedures$RECORDS_PER_PERSON <- format(round(dataProcedures$RECORDS_PER_PERSON, 1), nsmall = 1) + dataProcedures$RECORDS_PER_PERSON_NTILE <- dplyr::ntile(dplyr::desc(dataProcedures$RECORDS_PER_PERSON), 10) + return(dataProcedures) + #data.table::fwrite(dataProcedures, file=paste0(sourceOutputPath, "/domain-summary-procedure_occurrence.csv")) +} + +generateDomainSummaryDevices <- function(connection, resultsDatabaseSchema, vocabDatabaseSchema) { + queryDevices <- SqlRender::loadRenderTranslateSql( + sqlFilename = "export/device/sqlDeviceTable.sql", + packageName = "Achilles", + dbms = connectionDetails$dbms, + results_database_schema = resultsDatabaseSchema, + vocab_database_schema = vocabDatabaseSchema + ) + dataDevices <- DatabaseConnector::querySql(connection, queryDevices) + dataDevices$PERCENT_PERSONS <- format(round(dataDevices$PERCENT_PERSONS, 4), nsmall = 4) + dataDevices$PERCENT_PERSONS_NTILE <- dplyr::ntile(dplyr::desc(dataDevices$PERCENT_PERSONS), 10) + dataDevices$RECORDS_PER_PERSON <- format(round(dataDevices$RECORDS_PER_PERSON, 1), nsmall = 1) + dataDevices$RECORDS_PER_PERSON_NTILE <- dplyr::ntile(dplyr::desc(dataDevices$RECORDS_PER_PERSON), 10) + return(dataDevices) + #data.table::fwrite(dataDevices, file=paste0(sourceOutputPath, "/domain-summary-device_exposure.csv")) +} + +generateDomainSummaryProvider <- function(connection, resultsDatabaseSchema, vocabDatabaseSchema) { + queryProviders <- SqlRender::loadRenderTranslateSql( + sqlFilename = "export/provider/sqlProviderSpecialty.sql", + packageName = "Achilles", + dbms = connectionDetails$dbms, + results_database_schema = resultsDatabaseSchema, + vocab_database_schema = vocabDatabaseSchema + ) + writeLines("Generating provider reports") + dataProviders <- DatabaseConnector::querySql(connection, queryProviders) + dataProviders$PERCENT_PERSONS <- format(round(dataProviders$PERCENT_PERSONS, 4), nsmall = 4) + return(dataProviders) + #data.table::fwrite(dataProviders, file=paste0(sourceOutputPath, "/domain-summary-provider.csv")) + #dbWriteTable(duckdbCon, "domain_summary", dataProviders, append = TRUE) +} + +generateQualityCompleteness <- function(connection, resultsDatabaseSchema) { + queryCompleteness <- SqlRender::loadRenderTranslateSql( + sqlFilename = "export/quality/sqlCompletenessTable.sql", + packageName = "Achilles", + dbms = connectionDetails$dbms, + results_database_schema = resultsDatabaseSchema + ) + dataCompleteness <- DatabaseConnector::querySql(connection, queryCompleteness) + dataCompleteness <- dataCompleteness[order(-dataCompleteness$RECORD_COUNT),] + # prevent downstream crashes with large files + if (nrow(dataCompleteness) > 100000) { + dataCompleteness <- dataCompleteness[1:100000,] + } + #data.table::fwrite(dataCompleteness, file=paste0(sourceOutputPath, "/quality-completeness.csv")) + return(dataCompleteness) } #' @title exportToAres @@ -1829,320 +1921,407 @@ generateAOConditionEraReports <- function(connectionDetails, dataConditionEra, c #'@importFrom dplyr ntile desc #'@export #' +library("DBI") + exportToAres <- function( - connectionDetails, - cdmDatabaseSchema, - resultsDatabaseSchema, - vocabDatabaseSchema, + connectionDetails, + cdmDatabaseSchema, + resultsDatabaseSchema, + vocabDatabaseSchema, outputPath, + outputFormat, reports = c()) { conn <- DatabaseConnector::connect(connectionDetails) - on.exit(DatabaseConnector::disconnect(connection = conn)) - + on.exit(DatabaseConnector::disconnect(connection = conn)) + # generate a folder name for this release of the cdm characterization - sql <- SqlRender::render(sql = "select * from @cdmDatabaseSchema.cdm_source;",cdmDatabaseSchema = cdmDatabaseSchema) + sql <- SqlRender::render(sql = "select * from @cdmDatabaseSchema.cdm_source;", cdmDatabaseSchema = cdmDatabaseSchema) sql <- SqlRender::translate(sql = sql, targetDialect = connectionDetails$dbms) metadata <- DatabaseConnector::querySql(conn, sql) - sourceKey <- gsub(" ","_",metadata$CDM_SOURCE_ABBREVIATION) + sourceKey <- gsub(" ", "_", metadata$CDM_SOURCE_ABBREVIATION) releaseDateKey <- format(lubridate::ymd(metadata$CDM_RELEASE_DATE), "%Y%m%d") sourceOutputPath <- file.path(outputPath, sourceKey, releaseDateKey) - dir.create(sourceOutputPath,showWarnings = F,recursive=T) + dir.create(sourceOutputPath, showWarnings = F, recursive = T) + duckdbCon <- NULL + conceptsSchema <- "concepts" + domainSchema <- "domain" + densitySchema <- "data_density" + metadataSchema <- "metadata" + personSchema <- "person" + observationPeriodSchema <- "observation_period" + deathSchema <- "death" + if (outputFormat == "duckdb") { + duckdbCon <- dbConnect(duckdb::duckdb(), dbdir = file.path(outputPath, sourceKey, releaseDateKey, 'ares.duckdb'), read_only = FALSE) + dbExecute(duckdbCon, paste("CREATE SCHEMA", conceptsSchema)) + dbExecute(duckdbCon, paste("CREATE SCHEMA", domainSchema)) + dbExecute(duckdbCon, paste("CREATE SCHEMA", densitySchema)) + dbExecute(duckdbCon, paste("CREATE SCHEMA", metadataSchema)) + dbExecute(duckdbCon, paste("CREATE SCHEMA", observationPeriodSchema)) + dbExecute(duckdbCon, paste("CREATE SCHEMA", personSchema)) + dbExecute(duckdbCon, paste("CREATE SCHEMA", deathSchema)) + + + on.exit(dbDisconnect(duckdbCon, shutdown = TRUE)) + } print(paste0("processing AO export to ", sourceOutputPath)) - - if (length(reports) == 0 || (length(reports) > 0 && "density" %in% reports)) { + + if (length(reports) == 0 || (length(reports) > 0 && "density" %in% reports)) { + writeLines("Generating data density reports") + currentTable <- { } # data density - totals - renderedSql <- SqlRender::loadRenderTranslateSql( - sqlFilename = "export/datadensity/totalrecords.sql", - packageName = "Achilles", - dbms = connectionDetails$dbms, - results_database_schema = resultsDatabaseSchema - ) - - totalRecordsData <- DatabaseConnector::querySql(conn,renderedSql) - colnames(totalRecordsData) <- c("domain", "date", "records") - totalRecordsData$date <- lubridate::parse_date_time(totalRecordsData$date, "ym") - data.table::fwrite(totalRecordsData, file=paste0(sourceOutputPath, "/datadensity-total.csv")) - - domainAggregates <- aggregate(totalRecordsData$records, by=list(domain=totalRecordsData$domain), FUN=sum) - names(domainAggregates) <- c("domain","count_records") - data.table::fwrite(domainAggregates, file=paste0(sourceOutputPath, "/records-by-domain.csv")) + currentTable <- generateDataDensityTotal(conn, resultsDatabaseSchema) + if (outputFormat == "json") { + data.table::fwrite(currentTable$totalRecordsData, file = paste0(sourceOutputPath, "/datadensity-total.csv")) + data.table::fwrite(currentTable$domainAggregates, file = paste0(sourceOutputPath, "/records-by-domain.csv")) + } + if (outputFormat == "duckdb") { + writeReportToTable(duckdbCon, currentTable$totalRecordsData, "datadensity_total", densitySchema) + writeReportToTable(duckdbCon, currentTable$totalRecordsData, "records_by_domain", densitySchema) + } # data density - records per person - renderedSql <- SqlRender::loadRenderTranslateSql( - sqlFilename = "export/datadensity/recordsperperson.sql", - packageName = "Achilles", - dbms = connectionDetails$dbms, - results_database_schema = resultsDatabaseSchema - ) - - recordsPerPerson <- DatabaseConnector::querySql(conn,renderedSql) - colnames(recordsPerPerson) <- c("domain", "date", "records") - recordsPerPerson$date <- lubridate::parse_date_time(recordsPerPerson$date, "ym") - recordsPerPerson$records <- round(recordsPerPerson$records,2) - data.table::fwrite(recordsPerPerson, file=paste0(sourceOutputPath, "/datadensity-records-per-person.csv")) - + currentTable <- generateDataDensityRecordsPerPerson(conn, resultsDatabaseSchema) + if (outputFormat == "json") { + data.table::fwrite(currentTable, file = paste0(sourceOutputPath, "/datadensity-records-per-person.csv")) + } + if (outputFormat == "duckdb") { + writeReportToTable(duckdbCon, currentTable, "datadensity_records_per_person", densitySchema) + } + # data density - concepts per person - renderedSql <- SqlRender::loadRenderTranslateSql( - sqlFilename = "export/datadensity/conceptsperperson.sql", - packageName = "Achilles", - dbms = connectionDetails$dbms, - results_database_schema = resultsDatabaseSchema - ) - conceptsPerPerson <- DatabaseConnector::querySql(conn,renderedSql) - data.table::fwrite(conceptsPerPerson, file=paste0(sourceOutputPath, "/datadensity-concepts-per-person.csv")) - + currentTable <- generateDataDensityConceptsPerPerson(conn, resultsDatabaseSchema) + if (outputFormat == "json") { + data.table::fwrite(currentTable, file = paste0(sourceOutputPath, "/datadensity-concepts-per-person.csv")) + } + if (outputFormat == "duckdb") { + writeReportToTable(duckdbCon, currentTable, "datadensity_concepts_per_person", densitySchema) + } + + # data density - domains per person - renderedSql <- SqlRender::loadRenderTranslateSql( - sqlFilename = "export/datadensity/domainsperperson.sql", - packageName = "Achilles", - dbms = connectionDetails$dbms, - results_database_schema = resultsDatabaseSchema - ) - domainsPerPerson <- DatabaseConnector::querySql(conn,renderedSql) - domainsPerPerson$PERCENT_VALUE <- round(as.numeric(domainsPerPerson$PERCENT_VALUE),2) - data.table::fwrite(domainsPerPerson, file=paste0(sourceOutputPath, "/datadensity-domains-per-person.csv")) + currentTable <- generateDataDensityDomainsPerPerson(conn, resultsDatabaseSchema) + if (outputFormat == "json") { + data.table::fwrite(currentTable, file = paste0(sourceOutputPath, "/datadensity-domains-per-person.csv")) + } + if (outputFormat == "duckdb") { + writeReportToTable(duckdbCon, currentTable, "datadensity_domains_per_person", densitySchema) + } } - - if (length(reports) == 0 || (length(reports) > 0 && ("domain" %in% reports || "concept" %in% reports))) { - # metadata - generateAOMetadataReport(connectionDetails, cdmDatabaseSchema, sourceOutputPath) - + + if (length(reports) == 0 || (length(reports) > 0 && ("domain" %in% reports || "concept" %in% reports))) { + # metadata + writeLines("Generating metadata report") + currentTable <- generateAOMetadataReport(conn, cdmDatabaseSchema, sourceOutputPath) + if (outputFormat == "json") { + data.table::fwrite(currentTable, file = paste0(sourceOutputPath, "/metadata.csv")) + } + if (outputFormat == "duckdb") { + writeReportToTable(duckdbCon, currentTable, "metadata", metadataSchema) + } + + # cdm source - generateAOCdmSourceReport(connectionDetails, cdmDatabaseSchema, sourceOutputPath) - + writeLines("Generating cdm source report") + currentTable <- generateAOCdmSourceReport(conn, cdmDatabaseSchema, sourceOutputPath) + if (outputFormat == "json") { + data.table::fwrite(currentTable, file = paste0(sourceOutputPath, "/cdmsource.csv")) + } + if (outputFormat == "duckdb") { + writeReportToTable(duckdbCon, currentTable, "cdm_source", metadataSchema) + } + # domain summary - observation period - generateAOObservationPeriodReport(connectionDetails, cdmDatabaseSchema, resultsDatabaseSchema, vocabDatabaseSchema, sourceOutputPath) - + writeLines("Generating observation period reports") + currentTable <- generateAOObservationPeriodReport(conn, cdmDatabaseSchema, resultsDatabaseSchema, vocabDatabaseSchema, sourceOutputPath) + if (outputFormat == "json") { + filename <- file.path(sourceOutputPath, "observationperiod.json") + write(jsonlite::toJSON(currentTable), filename) + } + + if (outputFormat == "duckdb") { + for (key in names(currentTable)) { + print(is.data.frame(currentTable[key])) + writeReportToTable(duckdbCon, as.data.frame(currentTable[[key]]), key, observationPeriodSchema) + } + } + # death report - generateAODeathReport(connectionDetails, cdmDatabaseSchema, resultsDatabaseSchema, vocabDatabaseSchema, sourceOutputPath) - + writeLines("Generating death report") + currentTable <- generateAODeathReport(conn, cdmDatabaseSchema, resultsDatabaseSchema, vocabDatabaseSchema, sourceOutputPath) + if (outputFormat == "json") { + filename <- file.path(sourceOutputPath, "death.json") + write(jsonlite::toJSON(currentTable), filename) + } + + if (outputFormat == "duckdb") { + for (key in names(currentTable)) { + print(is.data.frame(currentTable[key])) + writeReportToTable(duckdbCon, as.data.frame(currentTable[[key]]), key, deathSchema) + } + } + + + writeLines("Generating domain summary reports") + # domain summary - conditions - queryConditions <- SqlRender::loadRenderTranslateSql( - sqlFilename = "export/condition/sqlConditionTable.sql", - packageName = "Achilles", - dbms = connectionDetails$dbms, - results_database_schema = resultsDatabaseSchema, - vocab_database_schema = vocabDatabaseSchema - ) - dataConditions <- DatabaseConnector::querySql(conn,queryConditions) - dataConditions$PERCENT_PERSONS <- format(round(dataConditions$PERCENT_PERSONS,4), nsmall=4) - dataConditions$PERCENT_PERSONS_NTILE <- dplyr::ntile(dplyr::desc(dataConditions$PERCENT_PERSONS),10) - dataConditions$RECORDS_PER_PERSON <- format(round(dataConditions$RECORDS_PER_PERSON,1),nsmall=1) - dataConditions$RECORDS_PER_PERSON_NTILE <- dplyr::ntile(dplyr::desc(dataConditions$RECORDS_PER_PERSON),10) - data.table::fwrite(dataConditions, file=paste0(sourceOutputPath, "/domain-summary-condition_occurrence.csv")) - + dataConditions <- generateDomainSummaryConditions(conn, resultsDatabaseSchema, vocabDatabaseSchema) + if (outputFormat == "json") { + data.table::fwrite(dataConditions, file = paste0(sourceOutputPath, "/domain-summary-condition_occurrence.csv")) + } + if (outputFormat == "duckdb") { + writeReportToTable(duckdbCon, dataConditions, "condition_occurrence", domainSchema) + } + + # domain summary - condition eras - queryConditionEra <- SqlRender::loadRenderTranslateSql( - sqlFilename = "export/conditionera/sqlConditionEraTable.sql", - packageName = "Achilles", - dbms = connectionDetails$dbms, - results_database_schema = resultsDatabaseSchema, - vocab_database_schema = vocabDatabaseSchema - ) - dataConditionEra <- DatabaseConnector::querySql(conn,queryConditionEra) - dataConditionEra$PERCENT_PERSONS <- format(round(dataConditionEra$PERCENT_PERSONS,4), nsmall=4) - dataConditionEra$PERCENT_PERSONS_NTILE <- dplyr::ntile(dplyr::desc(dataConditionEra$PERCENT_PERSONS),10) - dataConditionEra$RECORDS_PER_PERSON <- format(round(dataConditionEra$RECORDS_PER_PERSON,1),nsmall=1) - dataConditionEra$RECORDS_PER_PERSON_NTILE <- dplyr::ntile(dplyr::desc(dataConditionEra$RECORDS_PER_PERSON),10) - data.table::fwrite(dataConditionEra, file=paste0(sourceOutputPath, "/domain-summary-condition_era.csv")) - + dataConditionEra <- generateDomainSummaryConditionEras(conn, resultsDatabaseSchema, vocabDatabaseSchema) + if (outputFormat == "json") { + data.table::fwrite(dataConditionEra, file = paste0(sourceOutputPath, "/domain-summary-condition_era.csv")) + } + if (outputFormat == "duckdb") { + writeReportToTable(duckdbCon, dataConditionEra, "condition_era", domainSchema) + } + + # domain summary - drugs - queryDrugs <- SqlRender::loadRenderTranslateSql( - sqlFilename = "export/drug/sqlDrugTable.sql", - packageName = "Achilles", - dbms = connectionDetails$dbms, - results_database_schema = resultsDatabaseSchema, - vocab_database_schema = vocabDatabaseSchema - ) - dataDrugs <- DatabaseConnector::querySql(conn,queryDrugs) - dataDrugs$PERCENT_PERSONS <- format(round(dataDrugs$PERCENT_PERSONS,4), nsmall=4) - dataDrugs$PERCENT_PERSONS_NTILE <- dplyr::ntile(dplyr::desc(dataDrugs$PERCENT_PERSONS),10) - dataDrugs$RECORDS_PER_PERSON <- format(round(dataDrugs$RECORDS_PER_PERSON,1),nsmall=1) - dataDrugs$RECORDS_PER_PERSON_NTILE <- dplyr::ntile(dplyr::desc(dataDrugs$RECORDS_PER_PERSON),10) - data.table::fwrite(dataDrugs, file=paste0(sourceOutputPath, "/domain-summary-drug_exposure.csv")) - + dataDrugs <- generateDomainSummaryDrugs(conn, resultsDatabaseSchema, vocabDatabaseSchema) + if (outputFormat == "json") { + data.table::fwrite(dataDrugs, file = paste0(sourceOutputPath, "/domain-summary-drug_exposure.csv")) + } + if (outputFormat == "duckdb") { + writeReportToTable(duckdbCon, dataDrugs, "drug_exposure", domainSchema) + } + + # domain stratification by drug type concept - queryDrugType <- SqlRender::loadRenderTranslateSql( - sqlFilename = "export/drug/sqlDomainDrugStratification.sql", - packageName = "Achilles", - dbms = connectionDetails$dbms, - results_database_schema = resultsDatabaseSchema, - vocab_database_schema = vocabDatabaseSchema - ) - dataDrugType <- DatabaseConnector::querySql(conn,queryDrugType) - data.table::fwrite(dataDrugType, file=paste0(sourceOutputPath, "/domain-drug-stratification.csv")) + dataDrugType <- generateDomainDrugStratification(conn, resultsDatabaseSchema, vocabDatabaseSchema) + if (outputFormat == "json") { + data.table::fwrite(dataDrugType, file = paste0(sourceOutputPath, "/domain-drug-stratification.csv")) + } + if (outputFormat == "duckdb") { + writeReportToTable(duckdbCon, dataDrugType, "drug_stratification", domainSchema) + } + # domain summary - drug era - queryDrugEra <- SqlRender::loadRenderTranslateSql( - sqlFilename = "export/drugera/sqlDrugEraTable.sql", - packageName = "Achilles", - dbms = connectionDetails$dbms, - results_database_schema = resultsDatabaseSchema, - vocab_database_schema = vocabDatabaseSchema - ) - dataDrugEra <- DatabaseConnector::querySql(conn,queryDrugEra) - dataDrugEra$PERCENT_PERSONS <- format(round(dataDrugEra$PERCENT_PERSONS,4), nsmall=4) - dataDrugEra$PERCENT_PERSONS_NTILE <- dplyr::ntile(dplyr::desc(dataDrugEra$PERCENT_PERSONS),10) - dataDrugEra$RECORDS_PER_PERSON <- format(round(dataDrugEra$RECORDS_PER_PERSON,1),nsmall=1) - dataDrugEra$RECORDS_PER_PERSON_NTILE <- dplyr::ntile(dplyr::desc(dataDrugEra$RECORDS_PER_PERSON), 10) - data.table::fwrite(dataDrugEra, file=paste0(sourceOutputPath, "/domain-summary-drug_era.csv")) - + dataDrugEra <- generateDomainSummaryDrugEra(conn, resultsDatabaseSchema, vocabDatabaseSchema) + if (outputFormat == "json") { + data.table::fwrite(dataDrugEra, file = paste0(sourceOutputPath, "/domain-summary-drug_era.csv")) + } + if (outputFormat == "duckdb") { + writeReportToTable(duckdbCon, dataDrugType, "drug_stratification", domainSchema) + } + + # domain summary - measurements - queryMeasurements <- SqlRender::loadRenderTranslateSql( - sqlFilename = "export/measurement/sqlMeasurementTable.sql", - packageName = "Achilles", - dbms = connectionDetails$dbms, - results_database_schema = resultsDatabaseSchema, - vocab_database_schema = vocabDatabaseSchema - ) - dataMeasurements <- DatabaseConnector::querySql(conn,queryMeasurements) - dataMeasurements$PERCENT_PERSONS <- format(round(dataMeasurements$PERCENT_PERSONS,4), nsmall=4) - dataMeasurements$PERCENT_PERSONS_NTILE <- dplyr::ntile(dplyr::desc(dataMeasurements$PERCENT_PERSONS), 10) - dataMeasurements$RECORDS_PER_PERSON <- format(round(dataMeasurements$RECORDS_PER_PERSON,1),nsmall=1) - dataMeasurements$RECORDS_PER_PERSON_NTILE <- dplyr::ntile(dplyr::desc(dataMeasurements$RECORDS_PER_PERSON), 10) - data.table::fwrite(dataMeasurements, file=paste0(sourceOutputPath, "/domain-summary-measurement.csv")) - + dataMeasurements <- generateDomainSummaryMeasurements(conn, resultsDatabaseSchema, vocabDatabaseSchema) + if (outputFormat == "json") { + data.table::fwrite(dataMeasurements, file = paste0(sourceOutputPath, "/domain-summary-measurement.csv")) + } + if (outputFormat == "duckdb") { + writeReportToTable(duckdbCon, dataMeasurements, "measurement", domainSchema) + } + + # domain summary - observations - queryObservations <- SqlRender::loadRenderTranslateSql( - sqlFilename = "export/observation/sqlObservationTable.sql", - packageName = "Achilles", - dbms = connectionDetails$dbms, - results_database_schema = resultsDatabaseSchema, - vocab_database_schema = vocabDatabaseSchema - ) - dataObservations <- DatabaseConnector::querySql(conn,queryObservations) - dataObservations$PERCENT_PERSONS <- format(round(dataObservations$PERCENT_PERSONS,4), nsmall=4) - dataObservations$PERCENT_PERSONS_NTILE <- dplyr::ntile(dplyr::desc(dataObservations$PERCENT_PERSONS), 10) - dataObservations$RECORDS_PER_PERSON <- format(round(dataObservations$RECORDS_PER_PERSON,1),nsmall=1) - dataObservations$RECORDS_PER_PERSON_NTILE <- dplyr::ntile(dplyr::desc(dataObservations$RECORDS_PER_PERSON), 10) - data.table::fwrite(dataObservations, file=paste0(sourceOutputPath, "/domain-summary-observation.csv")) - + dataObservations <- generateDomainSummaryObservations(conn, resultsDatabaseSchema, vocabDatabaseSchema) + if (outputFormat == "json") { + data.table::fwrite(dataObservations, file = paste0(sourceOutputPath, "/domain-summary-observation.csv")) + } + if (outputFormat == "duckdb") { + writeReportToTable(duckdbCon, dataObservations, "observation", domainSchema) + } + + # domain summary - visit details - queryVisitDetails <- SqlRender::loadRenderTranslateSql( - sqlFilename = "export/visitdetail/sqlVisitDetailTreemap.sql", - packageName = "Achilles", - dbms = connectionDetails$dbms, - results_database_schema = resultsDatabaseSchema, - vocab_database_schema = vocabDatabaseSchema - ) - dataVisitDetails <- DatabaseConnector::querySql(conn,queryVisitDetails) - dataVisitDetails$PERCENT_PERSONS <- format(round(dataVisitDetails$PERCENT_PERSONS,4), nsmall=4) - dataVisitDetails$PERCENT_PERSONS_NTILE <- dplyr::ntile(dplyr::desc(dataVisitDetails$PERCENT_PERSONS),10) - dataVisitDetails$RECORDS_PER_PERSON <- format(round(dataVisitDetails$RECORDS_PER_PERSON,1),nsmall=1) - dataVisitDetails$RECORDS_PER_PERSON_NTILE <- dplyr::ntile(dplyr::desc(dataVisitDetails$RECORDS_PER_PERSON),10) - names(dataVisitDetails)[names(dataVisitDetails) == 'CONCEPT_PATH'] <- 'CONCEPT_NAME' - data.table::fwrite(dataVisitDetails, file=paste0(sourceOutputPath, "/domain-summary-visit_detail.csv")) - + dataVisitDetails <- generateDomainSummaryVisitDetails(conn, resultsDatabaseSchema, vocabDatabaseSchema) + if (outputFormat == "json") { + data.table::fwrite(dataVisitDetails, file = paste0(sourceOutputPath, "/domain-summary-visit_detail.csv")) + } + if (outputFormat == "duckdb") { + writeReportToTable(duckdbCon, dataVisitDetails, "visit_detail", domainSchema) + } + + # domain summary - visits - queryVisits <- SqlRender::loadRenderTranslateSql( - sqlFilename = "export/visit/sqlVisitTreemap.sql", - packageName = "Achilles", - dbms = connectionDetails$dbms, - results_database_schema = resultsDatabaseSchema, - vocab_database_schema = vocabDatabaseSchema - ) - dataVisits <- DatabaseConnector::querySql(conn,queryVisits) - dataVisits$PERCENT_PERSONS <- format(round(dataVisits$PERCENT_PERSONS,4), nsmall=4) - dataVisits$PERCENT_PERSONS_NTILE <- dplyr::ntile(dplyr::desc(dataVisits$PERCENT_PERSONS),10) - dataVisits$RECORDS_PER_PERSON <- format(round(dataVisits$RECORDS_PER_PERSON,1),nsmall=1) - dataVisits$RECORDS_PER_PERSON_NTILE <- dplyr::ntile(dplyr::desc(dataVisits$RECORDS_PER_PERSON),10) - names(dataVisits)[names(dataVisits) == 'CONCEPT_PATH'] <- 'CONCEPT_NAME' - data.table::fwrite(dataVisits, file=paste0(sourceOutputPath, "/domain-summary-visit_occurrence.csv")) - + dataVisits <- generateDomainSummaryVisits(conn, resultsDatabaseSchema, vocabDatabaseSchema) + if (outputFormat == "json") { + data.table::fwrite(dataVisits, file = paste0(sourceOutputPath, "/domain-summary-visit_occurrence.csv")) + } + if (outputFormat == "duckdb") { + writeReportToTable(duckdbCon, dataVisits, "visit_occurrence", domainSchema) + } + # domain stratification by visit concept - queryVisits <- SqlRender::loadRenderTranslateSql( - sqlFilename = "export/visit/sqlDomainVisitStratification.sql", - packageName = "Achilles", - dbms = connectionDetails$dbms, - results_database_schema = resultsDatabaseSchema, - vocab_database_schema = vocabDatabaseSchema - ) - dataVisits <- DatabaseConnector::querySql(conn,queryVisits) - data.table::fwrite(dataVisits, file=paste0(sourceOutputPath, "/domain-visit-stratification.csv")) + currentTable <- generateDomainVisitStratification(conn, resultsDatabaseSchema, vocabDatabaseSchema) + if (outputFormat == "json") { + data.table::fwrite(currentTable, file = paste0(sourceOutputPath, "/domain-visit-stratification.csv")) + } + if (outputFormat == "duckdb") { + writeReportToTable(duckdbCon, currentTable, "visit_stratification", domainSchema) + } + # domain summary - procedures - queryProcedures <- SqlRender::loadRenderTranslateSql( - sqlFilename = "export/procedure/sqlProcedureTable.sql", - packageName = "Achilles", - dbms = connectionDetails$dbms, - results_database_schema = resultsDatabaseSchema, - vocab_database_schema = vocabDatabaseSchema - ) - dataProcedures <- DatabaseConnector::querySql(conn,queryProcedures) - dataProcedures$PERCENT_PERSONS <- format(round(dataProcedures$PERCENT_PERSONS,4), nsmall=4) - dataProcedures$PERCENT_PERSONS_NTILE <- dplyr::ntile(dplyr::desc(dataProcedures$PERCENT_PERSONS),10) - dataProcedures$RECORDS_PER_PERSON <- format(round(dataProcedures$RECORDS_PER_PERSON,1),nsmall=1) - dataProcedures$RECORDS_PER_PERSON_NTILE <- dplyr::ntile(dplyr::desc(dataProcedures$RECORDS_PER_PERSON),10) - data.table::fwrite(dataProcedures, file=paste0(sourceOutputPath, "/domain-summary-procedure_occurrence.csv")) - + dataProcedures <- generateDomainSummaryProcedures(conn, resultsDatabaseSchema, vocabDatabaseSchema) + if (outputFormat == "json") { + data.table::fwrite(dataProcedures, file = paste0(sourceOutputPath, "/domain-summary-procedure_occurrence.csv")) + } + if (outputFormat == "duckdb") { + writeReportToTable(duckdbCon, dataProcedures, "procedure_occurrence", domainSchema) + } + + # domain summary - devices - queryDevices <- SqlRender::loadRenderTranslateSql( - sqlFilename = "export/device/sqlDeviceTable.sql", - packageName = "Achilles", - dbms = connectionDetails$dbms, - results_database_schema = resultsDatabaseSchema, - vocab_database_schema = vocabDatabaseSchema - ) - dataDevices <- DatabaseConnector::querySql(conn,queryDevices) - dataDevices$PERCENT_PERSONS <- format(round(dataDevices$PERCENT_PERSONS,4), nsmall=4) - dataDevices$PERCENT_PERSONS_NTILE <- dplyr::ntile(dplyr::desc(dataDevices$PERCENT_PERSONS),10) - dataDevices$RECORDS_PER_PERSON <- format(round(dataDevices$RECORDS_PER_PERSON,1),nsmall=1) - dataDevices$RECORDS_PER_PERSON_NTILE <- dplyr::ntile(dplyr::desc(dataDevices$RECORDS_PER_PERSON),10) - data.table::fwrite(dataDevices, file=paste0(sourceOutputPath, "/domain-summary-device_exposure.csv")) + dataDevices <- generateDomainSummaryDevices(conn, resultsDatabaseSchema, vocabDatabaseSchema) + if (outputFormat == "json") { + data.table::fwrite(dataDevices, file = paste0(sourceOutputPath, "/domain-summary-device_exposure.csv")) + } + if (outputFormat == "duckdb") { + writeReportToTable(duckdbCon, dataDevices, "device_exposure", domainSchema) + } } - - # domain summary - provider - queryProviders <- SqlRender::loadRenderTranslateSql( - sqlFilename = "export/provider/sqlProviderSpecialty.sql", - packageName = "Achilles", - dbms = connectionDetails$dbms, - results_database_schema = resultsDatabaseSchema, - vocab_database_schema = vocabDatabaseSchema - ) - writeLines("Generating provider reports") - dataProviders <- DatabaseConnector::querySql(conn,queryProviders) - dataProviders$PERCENT_PERSONS <- format(round(dataProviders$PERCENT_PERSONS,4), nsmall=4) - data.table::fwrite(dataProviders, file=paste0(sourceOutputPath, "/domain-summary-provider.csv")) - - if (length(reports) == 0 || (length(reports) > 0 && "quality" %in% reports)) { + + # domain summary - provider + dataProviders <- generateDomainSummaryProvider(conn, resultsDatabaseSchema, vocabDatabaseSchema) + if (outputFormat == "json") { + data.table::fwrite(dataProviders, file = paste0(sourceOutputPath, "/domain-summary-provider.csv")) + } + if (outputFormat == "duckdb") { + writeReportToTable(duckdbCon, dataProviders, "provider", domainSchema) + } + + + if (length(reports) == 0 || (length(reports) > 0 && "quality" %in% reports)) { + writeLines("Generating quality completeness report") + # quality - completeness - queryCompleteness <- SqlRender::loadRenderTranslateSql( - sqlFilename = "export/quality/sqlCompletenessTable.sql", - packageName = "Achilles", - dbms = connectionDetails$dbms, - results_database_schema = resultsDatabaseSchema - ) - dataCompleteness <- DatabaseConnector::querySql(conn,queryCompleteness) - dataCompleteness <- dataCompleteness[order(-dataCompleteness$RECORD_COUNT),] - # prevent downstream crashes with large files - if (nrow(dataCompleteness) > 100000) { - dataCompleteness <- dataCompleteness[1:100000,] + currentTable <- generateQualityCompleteness(conn, resultsDatabaseSchema) + if (outputFormat == "json") { + data.table::fwrite(currentTable, file = paste0(sourceOutputPath, "/quality-completeness.csv")) + } + if (outputFormat == "duckdb") { + writeReportToTable(duckdbCon, currentTable, "quality_completeness", metadataSchema) } - data.table::fwrite(dataCompleteness, file=paste0(sourceOutputPath, "/quality-completeness.csv")) } - if (length(reports) == 0 || (length(reports) > 0 && "performance" %in% reports)) { - generateAOAchillesPerformanceReport(connectionDetails, cdmDatabaseSchema, resultsDatabaseSchema, vocabDatabaseSchema, sourceOutputPath) + if (length(reports) == 0 || (length(reports) > 0 && "performance" %in% reports)) { + writeLines("Generating achilles performance report") + currentTable <- generateAOAchillesPerformanceReport(conn, cdmDatabaseSchema, resultsDatabaseSchema, vocabDatabaseSchema, sourceOutputPath) + if (outputFormat == "json") { + data.table::fwrite(currentTable, file.path(sourceOutputPath, "achilles-performance.csv")) + } + if (outputFormat == "duckdb") { + writeReportToTable(duckdbCon, currentTable, "achilles_performance", metadataSchema) + } + } - - if (length(reports) == 0 || (length(reports) > 0 && "concept" %in% reports)) { + + if (length(reports) == 0 || (length(reports) > 0 && "concept" %in% reports)) { # concept level reporting - conceptsFolder <- file.path(sourceOutputPath,"concepts") - dir.create(conceptsFolder,showWarnings = F) - generateAOVisitReports(connectionDetails, cdmDatabaseSchema, resultsDatabaseSchema, vocabDatabaseSchema, sourceOutputPath) - generateAOVisitDetailReports(connectionDetails, cdmDatabaseSchema, resultsDatabaseSchema, vocabDatabaseSchema, sourceOutputPath) - generateAOMeasurementReports(connectionDetails, dataMeasurements, cdmDatabaseSchema, resultsDatabaseSchema, vocabDatabaseSchema, sourceOutputPath) - generateAOConditionReports(connectionDetails, dataConditions, cdmDatabaseSchema, resultsDatabaseSchema, vocabDatabaseSchema, sourceOutputPath) - generateAOConditionEraReports(connectionDetails, dataConditionEra, cdmDatabaseSchema, resultsDatabaseSchema, vocabDatabaseSchema, sourceOutputPath) - generateAODrugReports(connectionDetails, dataDrugs, cdmDatabaseSchema, resultsDatabaseSchema, vocabDatabaseSchema, sourceOutputPath) - generateAODeviceReports(connectionDetails, dataDevices, cdmDatabaseSchema, resultsDatabaseSchema, vocabDatabaseSchema, sourceOutputPath) - generateAODrugEraReports(connectionDetails, dataDrugEra, cdmDatabaseSchema, resultsDatabaseSchema, vocabDatabaseSchema, sourceOutputPath) - generateAOProcedureReports(connectionDetails, dataProcedures, cdmDatabaseSchema, resultsDatabaseSchema, vocabDatabaseSchema, sourceOutputPath) - generateAOObservationReports(connectionDetails, dataObservations, cdmDatabaseSchema, resultsDatabaseSchema, vocabDatabaseSchema, sourceOutputPath) + conceptsFolder <- file.path(sourceOutputPath, "concepts") + if (outputFormat == "json") { + dir.create(conceptsFolder, showWarnings = F) + + } + columnsToNormalize <- c("CONCEPT_NAME", "NUM_PERSONS", "PERCENT_PERSONS", "RECORDS_PER_PERSON") + + writeLines("Generating visit reports") + currentTable <- generateAOVisitReports(connectionDetails, cdmDatabaseSchema, resultsDatabaseSchema, vocabDatabaseSchema, sourceOutputPath) + columnsToConvertToDataFrame <- c("PREVALENCE_BY_GENDER_AGE_YEAR", "PREVALENCE_BY_MONTH", "VISIT_DURATION_BY_TYPE", "AGE_AT_FIRST_OCCURRENCE") + dir <- "/concepts/visit_occurrence" + lapply(currentTable$uniqueConcepts$CONCEPT_ID, function(concept_id, ...) { + processAndExportConceptData(concept_id, ...) + }, duckdbCon, currentTable$reports, sourceOutputPath, outputFormat, columnsToNormalize, columnsToConvertToDataFrame, dir, "visit_occurrence", conceptsSchema) + + writeLines("Generating visit_detail reports") + currentTable <- generateAOVisitDetailReports(connectionDetails, cdmDatabaseSchema, resultsDatabaseSchema, vocabDatabaseSchema, sourceOutputPath) + columnsToConvertToDataFrame <- c("PREVALENCE_BY_GENDER_AGE_YEAR", "PREVALENCE_BY_MONTH", "VISIT_DETAIL_DURATION_BY_TYPE", "AGE_AT_FIRST_OCCURRENCE") + dir <- "/concepts/visit_detail" + lapply(currentTable$uniqueConcepts$CONCEPT_ID, function(concept_id, ...) { + processAndExportConceptData(concept_id, ...) + }, duckdbCon, currentTable$reports, sourceOutputPath, outputFormat, columnsToNormalize, columnsToConvertToDataFrame, dir, "visit_detail", conceptsSchema) + + writeLines("Generating Measurement reports") + currentTable <- generateAOMeasurementReports(connectionDetails, dataMeasurements, cdmDatabaseSchema, resultsDatabaseSchema, vocabDatabaseSchema, sourceOutputPath) + columnsToConvertToDataFrame <- c("PREVALENCE_BY_GENDER_AGE_YEAR", "PREVALENCE_BY_MONTH", "FREQUENCY_DISTRIBUTION", "MEASUREMENTS_BY_TYPE", "AGE_AT_FIRST_OCCURRENCE", "RECORDS_BY_UNIT", "MEASUREMENT_VALUE_DISTRIBUTION", "LOWER_LIMIT_DISTRIBUTION", "UPPER_LIMIT_DISTRIBUTION", "VALUES_RELATIVE_TO_NORM") + dir <- "/concepts/measurement" + lapply(currentTable$uniqueConcepts$CONCEPT_ID, function(concept_id, ...) { + processAndExportConceptData(concept_id, ...) + }, duckdbCon, currentTable$reports, sourceOutputPath, outputFormat, columnsToNormalize, columnsToConvertToDataFrame, dir, "measurement", conceptsSchema) + + writeLines("Generating condition reports") + currentTable <- generateAOConditionReports(connectionDetails, duckdbCon, dataConditions, cdmDatabaseSchema, resultsDatabaseSchema, vocabDatabaseSchema, sourceOutputPath) + columnsToConvertToDataFrame <- c("PREVALENCE_BY_GENDER_AGE_YEAR", "PREVALENCE_BY_MONTH", "CONDITIONS_BY_TYPE", "AGE_AT_FIRST_DIAGNOSIS") + dir <- "/concepts/condition_occurrence" + lapply(currentTable$uniqueConcepts$CONCEPT_ID, function(concept_id, ...) { + processAndExportConceptData(concept_id, ...) + }, duckdbCon, currentTable$reports, sourceOutputPath, outputFormat, columnsToNormalize, columnsToConvertToDataFrame, dir, "condition_occurrence", conceptsSchema) + + writeLines("Generating condition era reports") + currentTable <- generateAOConditionEraReports(connectionDetails, dataConditionEra, cdmDatabaseSchema, resultsDatabaseSchema, vocabDatabaseSchema, sourceOutputPath) + columnsToConvertToDataFrame <- c("AGE_AT_FIRST_EXPOSURE", "PREVALENCE_BY_GENDER_AGE_YEAR", "PREVALENCE_BY_MONTH", "LENGTH_OF_ERA") + dir <- "/concepts/condition_era" + lapply(currentTable$uniqueConcepts$CONCEPT_ID, function(concept_id, ...) { + processAndExportConceptData(concept_id, ...) + }, duckdbCon, currentTable$reports, sourceOutputPath, outputFormat, columnsToNormalize, columnsToConvertToDataFrame, dir, "condition_era", conceptsSchema) + + writeLines("Generating drug reports") + currentTable <- generateAODrugReports(connectionDetails, dataDrugs, cdmDatabaseSchema, resultsDatabaseSchema, vocabDatabaseSchema, sourceOutputPath) + columnsToConvertToDataFrame <- c("AGE_AT_FIRST_EXPOSURE", "DAYS_SUPPLY_DISTRIBUTION", "DRUGS_BY_TYPE", "PREVALENCE_BY_GENDER_AGE_YEAR", "PREVALENCE_BY_MONTH", "DRUG_FREQUENCY_DISTRIBUTION", "QUANTITY_DISTRIBUTION", "REFILLS_DISTRIBUTION") + dir <- "/concepts/drug_exposure" + lapply(currentTable$uniqueConcepts$CONCEPT_ID, function(concept_id, ...) { + processAndExportConceptData(concept_id, ...) + }, duckdbCon, currentTable$reports, sourceOutputPath, outputFormat, columnsToNormalize, columnsToConvertToDataFrame, dir, "drug_exposure", conceptsSchema) + + writeLines("Generating device exposure reports") + currentTable <- generateAODeviceReports(connectionDetails, dataDevices, cdmDatabaseSchema, resultsDatabaseSchema, vocabDatabaseSchema, sourceOutputPath) + columnsToConvertToDataFrame <- c("AGE_AT_FIRST_EXPOSURE", "DEVICES_BY_TYPE", "PREVALENCE_BY_GENDER_AGE_YEAR", "PREVALENCE_BY_MONTH", "DEVICE_FREQUENCY_DISTRIBUTION") + dir <- "/concepts/device_exposure" + lapply(currentTable$uniqueConcepts$CONCEPT_ID, function(concept_id, ...) { + processAndExportConceptData(concept_id, ...) + }, duckdbCon, currentTable$reports, sourceOutputPath, outputFormat, columnsToNormalize, columnsToConvertToDataFrame, dir, "device_exposure", conceptsSchema) + + writeLines("Generating drug era reports") + currentTable <- generateAODrugEraReports(connectionDetails, dataDrugEra, cdmDatabaseSchema, resultsDatabaseSchema, vocabDatabaseSchema, sourceOutputPath) + columnsToConvertToDataFrame <- c("AGE_AT_FIRST_EXPOSURE", "PREVALENCE_BY_GENDER_AGE_YEAR", "PREVALENCE_BY_MONTH", "LENGTH_OF_ERA") + dir <- "/concepts/procedure_occurrence" + lapply(currentTable$uniqueConcepts$CONCEPT_ID, function(concept_id, ...) { + processAndExportConceptData(concept_id, ...) + }, duckdbCon, currentTable$reports, sourceOutputPath, outputFormat, columnsToNormalize, columnsToConvertToDataFrame, dir, "drug_era", conceptsSchema) + + writeLines("Generating procedure reports") + currentTable <- generateAOProcedureReports(connectionDetails, dataProcedures, cdmDatabaseSchema, resultsDatabaseSchema, vocabDatabaseSchema, sourceOutputPath) + columnsToConvertToDataFrame <- c('PREVALENCE_BY_GENDER_AGE_YEAR', 'PREVALENCE_BY_MONTH', 'PROCEDURE_FREQUENCY_DISTRIBUTION', 'PROCEDURES_BY_TYPE', 'AGE_AT_FIRST_OCCURRENCE') + dir <- "/concepts/procedure_occurrence" + lapply(currentTable$uniqueConcepts$CONCEPT_ID, function(concept_id, ...) { + processAndExportConceptData(concept_id, ...) + }, duckdbCon, currentTable$reports, sourceOutputPath, outputFormat, columnsToNormalize, columnsToConvertToDataFrame, dir, "procedure_occurrence", conceptsSchema) + + writeLines("Generating Observation reports") + currentTable <- generateAOObservationReports(connectionDetails, dataObservations, cdmDatabaseSchema, resultsDatabaseSchema, vocabDatabaseSchema, sourceOutputPath) + columnsToConvertToDataFrame <- c("PREVALENCE_BY_GENDER_AGE_YEAR", "PREVALENCE_BY_MONTH", "OBS_FREQUENCY_DISTRIBUTION", "OBSERVATIONS_BY_TYPE", "AGE_AT_FIRST_OCCURRENCE") + dir <- "/concepts/observation" + lapply(currentTable$uniqueConcepts$CONCEPT_ID, function(concept_id, ...) { + processAndExportConceptData(concept_id, ...) + }, duckdbCon, currentTable$reports, sourceOutputPath, outputFormat, columnsToNormalize, columnsToConvertToDataFrame, dir, "observation", conceptsSchema) } - - if (length(reports) == 0 || (length(reports) > 0 && "person" %in% reports)) { - generateAOPersonReport(connectionDetails, cdmDatabaseSchema, resultsDatabaseSchema, vocabDatabaseSchema, sourceOutputPath) + + if (length(reports) == 0 || (length(reports) > 0 && "person" %in% reports)) { + writeLines("Generating person report") + currentTable <- generateAOPersonReport(connectionDetails, cdmDatabaseSchema, resultsDatabaseSchema, vocabDatabaseSchema, sourceOutputPath) + if (outputFormat == "json") { + jsonOutput = jsonlite::toJSON(currentTable) + write(jsonOutput, file = paste0(sourceOutputPath, "/person.json")) + } + + if (outputFormat == "duckdb") { + for (key in names(currentTable)) { + print(is.data.frame(currentTable[key])) + writeReportToTable(duckdbCon, as.data.frame(currentTable[[key]]), key, personSchema) + } + } } } + From 98307959573057bff74d9f464d39eb7c9c14cb14 Mon Sep 17 00:00:00 2001 From: Mikhail-iontsev Date: Thu, 9 Nov 2023 21:57:16 +0400 Subject: [PATCH 2/7] feat: added support for duckdb in exportToAres --- R/exportToAres.R | 221 +++++++++-------------------------------------- 1 file changed, 42 insertions(+), 179 deletions(-) diff --git a/R/exportToAres.R b/R/exportToAres.R index 9348cd66..edae1f92 100644 --- a/R/exportToAres.R +++ b/R/exportToAres.R @@ -1945,23 +1945,15 @@ exportToAres <- function( dir.create(sourceOutputPath, showWarnings = F, recursive = T) duckdbCon <- NULL conceptsSchema <- "concepts" - domainSchema <- "domain" - densitySchema <- "data_density" - metadataSchema <- "metadata" - personSchema <- "person" - observationPeriodSchema <- "observation_period" - deathSchema <- "death" + conceptsFolder <- file.path(sourceOutputPath, "concepts") + dir.create(conceptsFolder, showWarnings = F) if (outputFormat == "duckdb") { - duckdbCon <- dbConnect(duckdb::duckdb(), dbdir = file.path(outputPath, sourceKey, releaseDateKey, 'ares.duckdb'), read_only = FALSE) + conceptsDatabasePath <- file.path(conceptsFolder, 'data.duckdb') + if (file.exists(conceptsDatabasePath)) { + unlink(conceptsDatabasePath) + } + duckdbCon <- dbConnect(duckdb::duckdb(), dbdir = conceptsDatabasePath, read_only = FALSE) dbExecute(duckdbCon, paste("CREATE SCHEMA", conceptsSchema)) - dbExecute(duckdbCon, paste("CREATE SCHEMA", domainSchema)) - dbExecute(duckdbCon, paste("CREATE SCHEMA", densitySchema)) - dbExecute(duckdbCon, paste("CREATE SCHEMA", metadataSchema)) - dbExecute(duckdbCon, paste("CREATE SCHEMA", observationPeriodSchema)) - dbExecute(duckdbCon, paste("CREATE SCHEMA", personSchema)) - dbExecute(duckdbCon, paste("CREATE SCHEMA", deathSchema)) - - on.exit(dbDisconnect(duckdbCon, shutdown = TRUE)) } print(paste0("processing AO export to ", sourceOutputPath)) @@ -1971,226 +1963,118 @@ exportToAres <- function( currentTable <- { } # data density - totals currentTable <- generateDataDensityTotal(conn, resultsDatabaseSchema) - if (outputFormat == "json") { - data.table::fwrite(currentTable$totalRecordsData, file = paste0(sourceOutputPath, "/datadensity-total.csv")) - data.table::fwrite(currentTable$domainAggregates, file = paste0(sourceOutputPath, "/records-by-domain.csv")) - } - if (outputFormat == "duckdb") { - writeReportToTable(duckdbCon, currentTable$totalRecordsData, "datadensity_total", densitySchema) - writeReportToTable(duckdbCon, currentTable$totalRecordsData, "records_by_domain", densitySchema) - } + data.table::fwrite(currentTable$totalRecordsData, file = paste0(sourceOutputPath, "/datadensity-total.csv")) + data.table::fwrite(currentTable$domainAggregates, file = paste0(sourceOutputPath, "/records-by-domain.csv")) + # data density - records per person currentTable <- generateDataDensityRecordsPerPerson(conn, resultsDatabaseSchema) - if (outputFormat == "json") { - data.table::fwrite(currentTable, file = paste0(sourceOutputPath, "/datadensity-records-per-person.csv")) - } - if (outputFormat == "duckdb") { - writeReportToTable(duckdbCon, currentTable, "datadensity_records_per_person", densitySchema) - } + data.table::fwrite(currentTable, file = paste0(sourceOutputPath, "/datadensity-records-per-person.csv")) + # data density - concepts per person currentTable <- generateDataDensityConceptsPerPerson(conn, resultsDatabaseSchema) - if (outputFormat == "json") { - data.table::fwrite(currentTable, file = paste0(sourceOutputPath, "/datadensity-concepts-per-person.csv")) - } - if (outputFormat == "duckdb") { - writeReportToTable(duckdbCon, currentTable, "datadensity_concepts_per_person", densitySchema) - } + data.table::fwrite(currentTable, file = paste0(sourceOutputPath, "/datadensity-concepts-per-person.csv")) # data density - domains per person currentTable <- generateDataDensityDomainsPerPerson(conn, resultsDatabaseSchema) - if (outputFormat == "json") { - data.table::fwrite(currentTable, file = paste0(sourceOutputPath, "/datadensity-domains-per-person.csv")) - } - if (outputFormat == "duckdb") { - writeReportToTable(duckdbCon, currentTable, "datadensity_domains_per_person", densitySchema) - } + data.table::fwrite(currentTable, file = paste0(sourceOutputPath, "/datadensity-domains-per-person.csv")) + } if (length(reports) == 0 || (length(reports) > 0 && ("domain" %in% reports || "concept" %in% reports))) { # metadata writeLines("Generating metadata report") currentTable <- generateAOMetadataReport(conn, cdmDatabaseSchema, sourceOutputPath) - if (outputFormat == "json") { - data.table::fwrite(currentTable, file = paste0(sourceOutputPath, "/metadata.csv")) - } - if (outputFormat == "duckdb") { - writeReportToTable(duckdbCon, currentTable, "metadata", metadataSchema) - } + data.table::fwrite(currentTable, file = paste0(sourceOutputPath, "/metadata.csv")) # cdm source writeLines("Generating cdm source report") currentTable <- generateAOCdmSourceReport(conn, cdmDatabaseSchema, sourceOutputPath) - if (outputFormat == "json") { - data.table::fwrite(currentTable, file = paste0(sourceOutputPath, "/cdmsource.csv")) - } - if (outputFormat == "duckdb") { - writeReportToTable(duckdbCon, currentTable, "cdm_source", metadataSchema) - } + data.table::fwrite(currentTable, file = paste0(sourceOutputPath, "/cdmsource.csv")) + # domain summary - observation period writeLines("Generating observation period reports") currentTable <- generateAOObservationPeriodReport(conn, cdmDatabaseSchema, resultsDatabaseSchema, vocabDatabaseSchema, sourceOutputPath) - if (outputFormat == "json") { - filename <- file.path(sourceOutputPath, "observationperiod.json") - write(jsonlite::toJSON(currentTable), filename) - } + filename <- file.path(sourceOutputPath, "observationperiod.json") + write(jsonlite::toJSON(currentTable), filename) - if (outputFormat == "duckdb") { - for (key in names(currentTable)) { - print(is.data.frame(currentTable[key])) - writeReportToTable(duckdbCon, as.data.frame(currentTable[[key]]), key, observationPeriodSchema) - } - } # death report writeLines("Generating death report") currentTable <- generateAODeathReport(conn, cdmDatabaseSchema, resultsDatabaseSchema, vocabDatabaseSchema, sourceOutputPath) - if (outputFormat == "json") { - filename <- file.path(sourceOutputPath, "death.json") - write(jsonlite::toJSON(currentTable), filename) - } - - if (outputFormat == "duckdb") { - for (key in names(currentTable)) { - print(is.data.frame(currentTable[key])) - writeReportToTable(duckdbCon, as.data.frame(currentTable[[key]]), key, deathSchema) - } - } + filename <- file.path(sourceOutputPath, "death.json") + write(jsonlite::toJSON(currentTable), filename) writeLines("Generating domain summary reports") # domain summary - conditions dataConditions <- generateDomainSummaryConditions(conn, resultsDatabaseSchema, vocabDatabaseSchema) - if (outputFormat == "json") { - data.table::fwrite(dataConditions, file = paste0(sourceOutputPath, "/domain-summary-condition_occurrence.csv")) - } - if (outputFormat == "duckdb") { - writeReportToTable(duckdbCon, dataConditions, "condition_occurrence", domainSchema) - } + data.table::fwrite(dataConditions, file = paste0(sourceOutputPath, "/domain-summary-condition_occurrence.csv")) # domain summary - condition eras dataConditionEra <- generateDomainSummaryConditionEras(conn, resultsDatabaseSchema, vocabDatabaseSchema) - if (outputFormat == "json") { - data.table::fwrite(dataConditionEra, file = paste0(sourceOutputPath, "/domain-summary-condition_era.csv")) - } - if (outputFormat == "duckdb") { - writeReportToTable(duckdbCon, dataConditionEra, "condition_era", domainSchema) - } + data.table::fwrite(dataConditionEra, file = paste0(sourceOutputPath, "/domain-summary-condition_era.csv")) # domain summary - drugs dataDrugs <- generateDomainSummaryDrugs(conn, resultsDatabaseSchema, vocabDatabaseSchema) - if (outputFormat == "json") { - data.table::fwrite(dataDrugs, file = paste0(sourceOutputPath, "/domain-summary-drug_exposure.csv")) - } - if (outputFormat == "duckdb") { - writeReportToTable(duckdbCon, dataDrugs, "drug_exposure", domainSchema) - } + data.table::fwrite(dataDrugs, file = paste0(sourceOutputPath, "/domain-summary-drug_exposure.csv")) # domain stratification by drug type concept dataDrugType <- generateDomainDrugStratification(conn, resultsDatabaseSchema, vocabDatabaseSchema) - if (outputFormat == "json") { - data.table::fwrite(dataDrugType, file = paste0(sourceOutputPath, "/domain-drug-stratification.csv")) - } - if (outputFormat == "duckdb") { - writeReportToTable(duckdbCon, dataDrugType, "drug_stratification", domainSchema) - } + data.table::fwrite(dataDrugType, file = paste0(sourceOutputPath, "/domain-drug-stratification.csv")) # domain summary - drug era dataDrugEra <- generateDomainSummaryDrugEra(conn, resultsDatabaseSchema, vocabDatabaseSchema) - if (outputFormat == "json") { - data.table::fwrite(dataDrugEra, file = paste0(sourceOutputPath, "/domain-summary-drug_era.csv")) - } - if (outputFormat == "duckdb") { - writeReportToTable(duckdbCon, dataDrugType, "drug_stratification", domainSchema) - } + data.table::fwrite(dataDrugEra, file = paste0(sourceOutputPath, "/domain-summary-drug_era.csv")) # domain summary - measurements dataMeasurements <- generateDomainSummaryMeasurements(conn, resultsDatabaseSchema, vocabDatabaseSchema) - if (outputFormat == "json") { - data.table::fwrite(dataMeasurements, file = paste0(sourceOutputPath, "/domain-summary-measurement.csv")) - } - if (outputFormat == "duckdb") { - writeReportToTable(duckdbCon, dataMeasurements, "measurement", domainSchema) - } + data.table::fwrite(dataMeasurements, file = paste0(sourceOutputPath, "/domain-summary-measurement.csv")) # domain summary - observations dataObservations <- generateDomainSummaryObservations(conn, resultsDatabaseSchema, vocabDatabaseSchema) - if (outputFormat == "json") { - data.table::fwrite(dataObservations, file = paste0(sourceOutputPath, "/domain-summary-observation.csv")) - } - if (outputFormat == "duckdb") { - writeReportToTable(duckdbCon, dataObservations, "observation", domainSchema) - } + data.table::fwrite(dataObservations, file = paste0(sourceOutputPath, "/domain-summary-observation.csv")) # domain summary - visit details dataVisitDetails <- generateDomainSummaryVisitDetails(conn, resultsDatabaseSchema, vocabDatabaseSchema) - if (outputFormat == "json") { - data.table::fwrite(dataVisitDetails, file = paste0(sourceOutputPath, "/domain-summary-visit_detail.csv")) - } - if (outputFormat == "duckdb") { - writeReportToTable(duckdbCon, dataVisitDetails, "visit_detail", domainSchema) - } + data.table::fwrite(dataVisitDetails, file = paste0(sourceOutputPath, "/domain-summary-visit_detail.csv")) # domain summary - visits dataVisits <- generateDomainSummaryVisits(conn, resultsDatabaseSchema, vocabDatabaseSchema) - if (outputFormat == "json") { - data.table::fwrite(dataVisits, file = paste0(sourceOutputPath, "/domain-summary-visit_occurrence.csv")) - } - if (outputFormat == "duckdb") { - writeReportToTable(duckdbCon, dataVisits, "visit_occurrence", domainSchema) - } + data.table::fwrite(dataVisits, file = paste0(sourceOutputPath, "/domain-summary-visit_occurrence.csv")) + # domain stratification by visit concept currentTable <- generateDomainVisitStratification(conn, resultsDatabaseSchema, vocabDatabaseSchema) - if (outputFormat == "json") { - data.table::fwrite(currentTable, file = paste0(sourceOutputPath, "/domain-visit-stratification.csv")) - } - if (outputFormat == "duckdb") { - writeReportToTable(duckdbCon, currentTable, "visit_stratification", domainSchema) - } + data.table::fwrite(currentTable, file = paste0(sourceOutputPath, "/domain-visit-stratification.csv")) # domain summary - procedures dataProcedures <- generateDomainSummaryProcedures(conn, resultsDatabaseSchema, vocabDatabaseSchema) - if (outputFormat == "json") { - data.table::fwrite(dataProcedures, file = paste0(sourceOutputPath, "/domain-summary-procedure_occurrence.csv")) - } - if (outputFormat == "duckdb") { - writeReportToTable(duckdbCon, dataProcedures, "procedure_occurrence", domainSchema) - } + data.table::fwrite(dataProcedures, file = paste0(sourceOutputPath, "/domain-summary-procedure_occurrence.csv")) # domain summary - devices dataDevices <- generateDomainSummaryDevices(conn, resultsDatabaseSchema, vocabDatabaseSchema) - if (outputFormat == "json") { - data.table::fwrite(dataDevices, file = paste0(sourceOutputPath, "/domain-summary-device_exposure.csv")) - } - if (outputFormat == "duckdb") { - writeReportToTable(duckdbCon, dataDevices, "device_exposure", domainSchema) - } + data.table::fwrite(dataDevices, file = paste0(sourceOutputPath, "/domain-summary-device_exposure.csv")) } # domain summary - provider dataProviders <- generateDomainSummaryProvider(conn, resultsDatabaseSchema, vocabDatabaseSchema) - if (outputFormat == "json") { - data.table::fwrite(dataProviders, file = paste0(sourceOutputPath, "/domain-summary-provider.csv")) - } - if (outputFormat == "duckdb") { - writeReportToTable(duckdbCon, dataProviders, "provider", domainSchema) - } + data.table::fwrite(dataProviders, file = paste0(sourceOutputPath, "/domain-summary-provider.csv")) if (length(reports) == 0 || (length(reports) > 0 && "quality" %in% reports)) { @@ -2198,33 +2082,21 @@ exportToAres <- function( # quality - completeness currentTable <- generateQualityCompleteness(conn, resultsDatabaseSchema) - if (outputFormat == "json") { - data.table::fwrite(currentTable, file = paste0(sourceOutputPath, "/quality-completeness.csv")) - } - if (outputFormat == "duckdb") { - writeReportToTable(duckdbCon, currentTable, "quality_completeness", metadataSchema) - } + data.table::fwrite(currentTable, file = paste0(sourceOutputPath, "/quality-completeness.csv")) + } if (length(reports) == 0 || (length(reports) > 0 && "performance" %in% reports)) { writeLines("Generating achilles performance report") currentTable <- generateAOAchillesPerformanceReport(conn, cdmDatabaseSchema, resultsDatabaseSchema, vocabDatabaseSchema, sourceOutputPath) - if (outputFormat == "json") { - data.table::fwrite(currentTable, file.path(sourceOutputPath, "achilles-performance.csv")) - } - if (outputFormat == "duckdb") { - writeReportToTable(duckdbCon, currentTable, "achilles_performance", metadataSchema) - } + data.table::fwrite(currentTable, file.path(sourceOutputPath, "achilles-performance.csv")) + } if (length(reports) == 0 || (length(reports) > 0 && "concept" %in% reports)) { # concept level reporting - conceptsFolder <- file.path(sourceOutputPath, "concepts") - if (outputFormat == "json") { - dir.create(conceptsFolder, showWarnings = F) - } columnsToNormalize <- c("CONCEPT_NAME", "NUM_PERSONS", "PERCENT_PERSONS", "RECORDS_PER_PERSON") writeLines("Generating visit reports") @@ -2311,17 +2183,8 @@ exportToAres <- function( if (length(reports) == 0 || (length(reports) > 0 && "person" %in% reports)) { writeLines("Generating person report") currentTable <- generateAOPersonReport(connectionDetails, cdmDatabaseSchema, resultsDatabaseSchema, vocabDatabaseSchema, sourceOutputPath) - if (outputFormat == "json") { - jsonOutput = jsonlite::toJSON(currentTable) - write(jsonOutput, file = paste0(sourceOutputPath, "/person.json")) - } - - if (outputFormat == "duckdb") { - for (key in names(currentTable)) { - print(is.data.frame(currentTable[key])) - writeReportToTable(duckdbCon, as.data.frame(currentTable[[key]]), key, personSchema) - } - } + jsonOutput = jsonlite::toJSON(currentTable) + write(jsonOutput, file = paste0(sourceOutputPath, "/person.json")) } } From d58587661c9329f7ed9a5ed0707f2d73362092e9 Mon Sep 17 00:00:00 2001 From: Frank DeFalco Date: Wed, 20 Dec 2023 15:21:13 -0500 Subject: [PATCH 3/7] fix to documentation and function detaults --- NAMESPACE | 1 + R/exportToAres.R | 31 ++++--------------------------- man/achilles.Rd | 20 ++++++++++---------- man/createIndices.Rd | 2 +- man/exportToAres.Rd | 3 +++ 5 files changed, 19 insertions(+), 38 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index bbe5d360..821244e4 100755 --- a/NAMESPACE +++ b/NAMESPACE @@ -34,6 +34,7 @@ export(runMissingAnalyses) export(showReportTypes) export(sumAcrossYears) export(tsCompleteYears) +import(DBI) import(DatabaseConnector) import(ParallelLogger) import(SqlRender) diff --git a/R/exportToAres.R b/R/exportToAres.R index edae1f92..9bd30dcc 100644 --- a/R/exportToAres.R +++ b/R/exportToAres.R @@ -1898,38 +1898,36 @@ generateQualityCompleteness <- function(connection, resultsDatabaseSchema) { } #' @title exportToAres -#' +#' #' @description #' \code{exportToAres} Exports Achilles statistics for ARES #' #' @details #' Creates export files #' -#' #' @param connectionDetails An R object of type ConnectionDetail (details for the function that contains server info, database type, optionally username/password, port) #' @param cdmDatabaseSchema Name of the database schema that contains the OMOP CDM. #' @param resultsDatabaseSchema Name of the database schema that contains the Achilles analysis files. Default is cdmDatabaseSchema #' @param outputPath A folder location to save the JSON files. Default is current working folder #' @param vocabDatabaseSchema string name of database schema that contains OMOP Vocabulary. Default is cdmDatabaseSchema. On SQL Server, this should specifiy both the database and the schema, so for example 'results.dbo'. +#' @param outputFormat default or alternatively "duckdb" to use parquet and duckdb formats. #' @param reports vector of reports to run, c() defaults to all reports #' #' See \code{showReportTypes} for a list of all report types #' #' @return none #' +#'@import DBI #'@importFrom data.table fwrite #'@importFrom dplyr ntile desc #'@export -#' -library("DBI") - exportToAres <- function( connectionDetails, cdmDatabaseSchema, resultsDatabaseSchema, vocabDatabaseSchema, outputPath, - outputFormat, + outputFormat = "default", reports = c()) { conn <- DatabaseConnector::connect(connectionDetails) @@ -1966,21 +1964,17 @@ exportToAres <- function( data.table::fwrite(currentTable$totalRecordsData, file = paste0(sourceOutputPath, "/datadensity-total.csv")) data.table::fwrite(currentTable$domainAggregates, file = paste0(sourceOutputPath, "/records-by-domain.csv")) - # data density - records per person currentTable <- generateDataDensityRecordsPerPerson(conn, resultsDatabaseSchema) data.table::fwrite(currentTable, file = paste0(sourceOutputPath, "/datadensity-records-per-person.csv")) - # data density - concepts per person currentTable <- generateDataDensityConceptsPerPerson(conn, resultsDatabaseSchema) data.table::fwrite(currentTable, file = paste0(sourceOutputPath, "/datadensity-concepts-per-person.csv")) - # data density - domains per person currentTable <- generateDataDensityDomainsPerPerson(conn, resultsDatabaseSchema) data.table::fwrite(currentTable, file = paste0(sourceOutputPath, "/datadensity-domains-per-person.csv")) - } if (length(reports) == 0 || (length(reports) > 0 && ("domain" %in% reports || "concept" %in% reports))) { @@ -1989,84 +1983,69 @@ exportToAres <- function( currentTable <- generateAOMetadataReport(conn, cdmDatabaseSchema, sourceOutputPath) data.table::fwrite(currentTable, file = paste0(sourceOutputPath, "/metadata.csv")) - # cdm source writeLines("Generating cdm source report") currentTable <- generateAOCdmSourceReport(conn, cdmDatabaseSchema, sourceOutputPath) data.table::fwrite(currentTable, file = paste0(sourceOutputPath, "/cdmsource.csv")) - # domain summary - observation period writeLines("Generating observation period reports") currentTable <- generateAOObservationPeriodReport(conn, cdmDatabaseSchema, resultsDatabaseSchema, vocabDatabaseSchema, sourceOutputPath) filename <- file.path(sourceOutputPath, "observationperiod.json") write(jsonlite::toJSON(currentTable), filename) - # death report writeLines("Generating death report") currentTable <- generateAODeathReport(conn, cdmDatabaseSchema, resultsDatabaseSchema, vocabDatabaseSchema, sourceOutputPath) filename <- file.path(sourceOutputPath, "death.json") write(jsonlite::toJSON(currentTable), filename) - writeLines("Generating domain summary reports") # domain summary - conditions dataConditions <- generateDomainSummaryConditions(conn, resultsDatabaseSchema, vocabDatabaseSchema) data.table::fwrite(dataConditions, file = paste0(sourceOutputPath, "/domain-summary-condition_occurrence.csv")) - # domain summary - condition eras dataConditionEra <- generateDomainSummaryConditionEras(conn, resultsDatabaseSchema, vocabDatabaseSchema) data.table::fwrite(dataConditionEra, file = paste0(sourceOutputPath, "/domain-summary-condition_era.csv")) - # domain summary - drugs dataDrugs <- generateDomainSummaryDrugs(conn, resultsDatabaseSchema, vocabDatabaseSchema) data.table::fwrite(dataDrugs, file = paste0(sourceOutputPath, "/domain-summary-drug_exposure.csv")) - # domain stratification by drug type concept dataDrugType <- generateDomainDrugStratification(conn, resultsDatabaseSchema, vocabDatabaseSchema) data.table::fwrite(dataDrugType, file = paste0(sourceOutputPath, "/domain-drug-stratification.csv")) - # domain summary - drug era dataDrugEra <- generateDomainSummaryDrugEra(conn, resultsDatabaseSchema, vocabDatabaseSchema) data.table::fwrite(dataDrugEra, file = paste0(sourceOutputPath, "/domain-summary-drug_era.csv")) - # domain summary - measurements dataMeasurements <- generateDomainSummaryMeasurements(conn, resultsDatabaseSchema, vocabDatabaseSchema) data.table::fwrite(dataMeasurements, file = paste0(sourceOutputPath, "/domain-summary-measurement.csv")) - # domain summary - observations dataObservations <- generateDomainSummaryObservations(conn, resultsDatabaseSchema, vocabDatabaseSchema) data.table::fwrite(dataObservations, file = paste0(sourceOutputPath, "/domain-summary-observation.csv")) - # domain summary - visit details dataVisitDetails <- generateDomainSummaryVisitDetails(conn, resultsDatabaseSchema, vocabDatabaseSchema) data.table::fwrite(dataVisitDetails, file = paste0(sourceOutputPath, "/domain-summary-visit_detail.csv")) - # domain summary - visits dataVisits <- generateDomainSummaryVisits(conn, resultsDatabaseSchema, vocabDatabaseSchema) data.table::fwrite(dataVisits, file = paste0(sourceOutputPath, "/domain-summary-visit_occurrence.csv")) - # domain stratification by visit concept currentTable <- generateDomainVisitStratification(conn, resultsDatabaseSchema, vocabDatabaseSchema) data.table::fwrite(currentTable, file = paste0(sourceOutputPath, "/domain-visit-stratification.csv")) - # domain summary - procedures dataProcedures <- generateDomainSummaryProcedures(conn, resultsDatabaseSchema, vocabDatabaseSchema) data.table::fwrite(dataProcedures, file = paste0(sourceOutputPath, "/domain-summary-procedure_occurrence.csv")) - # domain summary - devices dataDevices <- generateDomainSummaryDevices(conn, resultsDatabaseSchema, vocabDatabaseSchema) data.table::fwrite(dataDevices, file = paste0(sourceOutputPath, "/domain-summary-device_exposure.csv")) @@ -2090,8 +2069,6 @@ exportToAres <- function( writeLines("Generating achilles performance report") currentTable <- generateAOAchillesPerformanceReport(conn, cdmDatabaseSchema, resultsDatabaseSchema, vocabDatabaseSchema, sourceOutputPath) data.table::fwrite(currentTable, file.path(sourceOutputPath, "achilles-performance.csv")) - - } if (length(reports) == 0 || (length(reports) > 0 && "concept" %in% reports)) { diff --git a/man/achilles.Rd b/man/achilles.Rd index 8b826dd9..cdfe0055 100644 --- a/man/achilles.Rd +++ b/man/achilles.Rd @@ -117,12 +117,12 @@ of Achilles when supplied \code{analysisIds}.} \item{excludeAnalysisIds}{(OPTIONAL) A vector containing the set of Achilles analyses to exclude.} -\item{sqlDialect}{(OPTIONAL) String to be used when specifying sqlOnly = TRUE and -NOT supplying the \code{connectionDetails} parameter. -if the \code{connectionDetails} parameter is supplied, \code{sqlDialect} -is ignored. If the \code{connectionDetails} parameter is not supplied, -\code{sqlDialect} must be supplied to enable \code{SqlRender} -to translate properly. \code{sqlDialect} takes the value normally +\item{sqlDialect}{(OPTIONAL) String to be used when specifying sqlOnly = TRUE and +NOT supplying the \code{connectionDetails} parameter. +if the \code{connectionDetails} parameter is supplied, \code{sqlDialect} +is ignored. If the \code{connectionDetails} parameter is not supplied, +\code{sqlDialect} must be supplied to enable \code{SqlRender} +to translate properly. \code{sqlDialect} takes the value normally supplied to connectionDetails$dbms. Default = NULL.} } \value{ @@ -141,10 +141,10 @@ connectionDetails <- createConnectionDetails(dbms = "sql server", server = "some achillesResults <- achilles(connectionDetails = connectionDetails, cdmDatabaseSchema = "cdm", resultsDatabaseSchema = "results", - scratchDatabaseSchema = "scratch", - sourceName = "Some Source", - cdmVersion = "5.3", - numThreads = 10, + scratchDatabaseSchema = "scratch", + sourceName = "Some Source", + cdmVersion = "5.3", + numThreads = 10, outputFolder = "output") } diff --git a/man/createIndices.Rd b/man/createIndices.Rd index ae954741..d81a922f 100755 --- a/man/createIndices.Rd +++ b/man/createIndices.Rd @@ -35,7 +35,7 @@ Default = TRUE} achilles_results and achilles_results_dist.} } \value{ -A collection of queries that were executed to drop any existing indices and create new indicies as +A collection of queries that were executed to drop any existing indices and create new indicies as specified. } \description{ diff --git a/man/exportToAres.Rd b/man/exportToAres.Rd index 103321f2..78df96e2 100644 --- a/man/exportToAres.Rd +++ b/man/exportToAres.Rd @@ -10,6 +10,7 @@ exportToAres( resultsDatabaseSchema, vocabDatabaseSchema, outputPath, + outputFormat = NULL, reports = c() ) } @@ -24,6 +25,8 @@ exportToAres( \item{outputPath}{A folder location to save the JSON files. Default is current working folder} +\item{outputFormat}{Unassigned for default, alternatively "duckdb" to use parquet and duckdb formats.} + \item{reports}{vector of reports to run, c() defaults to all reports See \code{showReportTypes} for a list of all report types} From 6496ac7d5ff9500c5efa1ef454de4c24267ded31 Mon Sep 17 00:00:00 2001 From: Mikhail-iontsev Date: Mon, 4 Mar 2024 15:03:56 +0300 Subject: [PATCH 4/7] fix: error creating the metadata table in case provided with 0 length vectors --- R/exportToAres.R | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/R/exportToAres.R b/R/exportToAres.R index 9bd30dcc..92085534 100644 --- a/R/exportToAres.R +++ b/R/exportToAres.R @@ -10,12 +10,13 @@ normalizeEmptyValue <- function(x) { } createConceptMedatataTable <- function(report, concept_id, domain) { - df <- data.frame(CONCEPT_ID = concept_id, - CONCEPT_NAME = report$CONCEPT_NAME, - DOMAIN = domain, - NUM_PERSONS = report$NUM_PERSONS, - PERCENT_PERSONS = report$PERCENT_PERSONS, - RECORDS_PER_PERSON = report$RECORDS_PER_PERSON + df <- data.frame( + CONCEPT_ID = concept_id, + CONCEPT_NAME = ifelse(length(report$CONCEPT_NAME) == 0, NA, report$CONCEPT_NAME), + DOMAIN = domain, + NUM_PERSONS = ifelse(length(report$NUM_PERSONS) == 0, NA, report$NUM_PERSONS), + PERCENT_PERSONS = ifelse(length(report$PERCENT_PERSONS) == 0, NA, report$PERCENT_PERSONS), + RECORDS_PER_PERSON = ifelse(length(report$RECORDS_PER_PERSON) == 0, NA, report$RECORDS_PER_PERSON) ) return(df) } From 2355cc2fa19d9798ea6f01bfdbd25e2419ce6d38 Mon Sep 17 00:00:00 2001 From: Gennadiy Anisimov Date: Fri, 15 Mar 2024 13:50:00 +0300 Subject: [PATCH 5/7] Trim trailing whitespaces in exportToAres.R --- R/exportToAres.R | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/R/exportToAres.R b/R/exportToAres.R index 92085534..9f69694e 100644 --- a/R/exportToAres.R +++ b/R/exportToAres.R @@ -1680,7 +1680,7 @@ generateDomainSummaryConditions <- function(connection, resultsDatabaseSchema, v dataConditions$RECORDS_PER_PERSON <- format(round(dataConditions$RECORDS_PER_PERSON, 1), nsmall = 1) dataConditions$RECORDS_PER_PERSON_NTILE <- dplyr::ntile(dplyr::desc(dataConditions$RECORDS_PER_PERSON), 10) return(dataConditions) - #data.table::fwrite(dataConditions, file=paste0(sourceOutputPath, "/domain-summary-condition_occurrence.csv")) + #data.table::fwrite(dataConditions, file=paste0(sourceOutputPath, "/domain-summary-condition_occurrence.csv")) #dbWriteTable(duckdbCon, "domain_summary", dataConditions, append = TRUE) } @@ -1728,7 +1728,7 @@ generateDomainDrugStratification <- function(connection, resultsDatabaseSchema, ) dataDrugType <- DatabaseConnector::querySql(connection, queryDrugType) return(dataDrugType) - #data.table::fwrite(dataDrugType, file=paste0(sourceOutputPath, "/domain-drug-stratification.csv")) + #data.table::fwrite(dataDrugType, file=paste0(sourceOutputPath, "/domain-drug-stratification.csv")) } generateDomainSummaryDrugEra <- function(connection, resultsDatabaseSchema, vocabDatabaseSchema) { @@ -1899,13 +1899,13 @@ generateQualityCompleteness <- function(connection, resultsDatabaseSchema) { } #' @title exportToAres -#' +#' #' @description #' \code{exportToAres} Exports Achilles statistics for ARES #' #' @details -#' Creates export files -#' +#' Creates export files +#' #' @param connectionDetails An R object of type ConnectionDetail (details for the function that contains server info, database type, optionally username/password, port) #' @param cdmDatabaseSchema Name of the database schema that contains the OMOP CDM. #' @param resultsDatabaseSchema Name of the database schema that contains the Achilles analysis files. Default is cdmDatabaseSchema @@ -1913,11 +1913,11 @@ generateQualityCompleteness <- function(connection, resultsDatabaseSchema) { #' @param vocabDatabaseSchema string name of database schema that contains OMOP Vocabulary. Default is cdmDatabaseSchema. On SQL Server, this should specifiy both the database and the schema, so for example 'results.dbo'. #' @param outputFormat default or alternatively "duckdb" to use parquet and duckdb formats. #' @param reports vector of reports to run, c() defaults to all reports -#' +#' #' See \code{showReportTypes} for a list of all report types -#' -#' @return none -#' +#' +#' @return none +#' #'@import DBI #'@importFrom data.table fwrite #'@importFrom dplyr ntile desc From 10349d4fd6a8b164f647957ed335311ddf948fd4 Mon Sep 17 00:00:00 2001 From: Gennadiy Anisimov Date: Mon, 18 Mar 2024 14:34:06 +0300 Subject: [PATCH 6/7] Improve performance of exportToAres in DuckDB format --- .lintr | 3 + DESCRIPTION | 19 +- R/exportToAres.R | 1145 ++++++++++++++++++++++++++++++------------- man/exportToAres.Rd | 4 +- 4 files changed, 823 insertions(+), 348 deletions(-) create mode 100644 .lintr diff --git a/.lintr b/.lintr new file mode 100644 index 00000000..7c99ddcd --- /dev/null +++ b/.lintr @@ -0,0 +1,3 @@ +linters: linters_with_defaults( + object_name_linter = object_name_linter(styles = c("camelCase", "snake_case", "symbols"))) +encoding: "UTF-8" diff --git a/DESCRIPTION b/DESCRIPTION index 6f7244bd..1c7359c4 100755 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -4,8 +4,8 @@ Title: Achilles Data Source Characterization Version: 1.7.2 Date: 2023-05-11 Authors@R: c( - person("Frank", "DeFalco", email = "fdefalco@ohdsi.org", role = c("aut","cre")), - person("Patrick", "Ryan", email = "ryan@ohdsi.org", role = c("aut")), + person("Frank", "DeFalco", email = "fdefalco@ohdsi.org", role = c("aut","cre")), + person("Patrick", "Ryan", email = "ryan@ohdsi.org", role = c("aut")), person("Martijn", "Schuemie", email = "schuemie@ohdsi.org", role = c("aut")), person("Vojtech", "Huser", role = c("aut")), person("Chris", "Knoll", role = c("aut")), @@ -16,17 +16,18 @@ Authors@R: c( ) Maintainer: Frank DeFalco LazyData: true -Description: Automated Characterization of Health Information at Large-Scale - Longitudinal Evidence Systems. Creates a descriptive statistics summary for - an Observational Medical Outcomes Partnership Common Data Model standardized - data source. This package includes functions for executing summary queries on - the specified data source and exporting reporting content for use across a - variety of Observational Health Data Sciences and Informatics community - applications. +Description: Automated Characterization of Health Information at Large-Scale + Longitudinal Evidence Systems. Creates a descriptive statistics summary for + an Observational Medical Outcomes Partnership Common Data Model standardized + data source. This package includes functions for executing summary queries on + the specified data source and exporting reporting content for use across a + variety of Observational Health Data Sciences and Informatics community + applications. Depends: DatabaseConnector (>= 2.0.0), R (>= 4.0.0) Imports: + DBI, SqlRender (>= 1.6.0), dplyr, jsonlite, diff --git a/R/exportToAres.R b/R/exportToAres.R index 9f69694e..3c3bf4a7 100644 --- a/R/exportToAres.R +++ b/R/exportToAres.R @@ -9,56 +9,16 @@ normalizeEmptyValue <- function(x) { } } -createConceptMedatataTable <- function(report, concept_id, domain) { - df <- data.frame( - CONCEPT_ID = concept_id, - CONCEPT_NAME = ifelse(length(report$CONCEPT_NAME) == 0, NA, report$CONCEPT_NAME), - DOMAIN = domain, - NUM_PERSONS = ifelse(length(report$NUM_PERSONS) == 0, NA, report$NUM_PERSONS), - PERCENT_PERSONS = ifelse(length(report$PERCENT_PERSONS) == 0, NA, report$PERCENT_PERSONS), - RECORDS_PER_PERSON = ifelse(length(report$RECORDS_PER_PERSON) == 0, NA, report$RECORDS_PER_PERSON) - ) - return(df) -} - -createConceptDataTable <- function(table, concept_id, domain) { - df <- data.frame(table) - df['CONCEPT_ID'] = concept_id - df['DOMAIN'] = domain - return(df) -} - -writeReportToTable <- function(duckdbCon, report, tableName, schema) { - - if (nrow(report) > 0) { - dbWriteTable(duckdbCon, DBI::Id(schema = schema, table = tableName), report, append = TRUE) - } -} - -exportDataToDuckDB <- function(data, duckdbCon = NULL, tableNames = NULL, concept_id = NULL, domain = NULL, schema = NULL) { - if (!is.null(duckdbCon) && - !is.null(tableNames) && - !is.null(concept_id)) { - if (length(data) != length(tableNames)) { - cat("Number of reports and tableNames should match.\n") - return() - } - for (i in seq_along(data)) { - if (nrow(data[[i]]) > 0) { - writeReportToTable(duckdbCon, createConceptDataTable(data[[i]], concept_id, domain), tableNames[[i]], schema) - } - } - } else { - cat("Missing required parameters for DuckDB export.\n") - } -} - -processAndExportConceptData <- function(concept_id, duckdbCon, reports, outputPath, outputFormat, columnsToNormalize, columnsToConvertToDataFrame, dir, domain, schema) { +saveConceptsAsJson <- function( + concept_id, + reports, + columnsToNormalize, + columnsToConvertToDataFrame, + dir +) { report <- reports[reports$CONCEPT_ID == concept_id,] report <- as.list(report) - tableNames <- lapply(columnsToConvertToDataFrame, tolower) - #Normalize the specified columns for (col in columnsToNormalize) { report[[col]] <- normalizeEmptyValue(report[[col]]) @@ -69,24 +29,95 @@ processAndExportConceptData <- function(concept_id, duckdbCon, reports, outputPa report[[col]] <- as.data.frame(report[[col]]) } + filename <- paste( + dir, "/concept_", report$CONCEPT_ID, ".json", + sep = "" + ) + write(jsonlite::toJSON(report), filename) +} + +saveConceptsAsDuckDb <- function( + duckdbCon, + conceptData, + domain, + schema +) { + for (tableName in names(conceptData$reports)) { + ## rename specific concept_id columns + ## e.g. DEVICE_CONCEPT_ID to just CONCEPT_ID + tableData <- + conceptData$reports[[tableName]] %>% + dplyr::rename_with(~ gsub("[^_]+_CONCEPT_ID", "CONCEPT_ID", .x)) + + ## remove orphan records + if (tableName != "concept_metadata") { + tableData <- tableData %>% dplyr::filter( + .data$CONCEPT_ID %in% conceptData$reports$concept_metadata$CONCEPT_ID + ) + } + + tableData <- tableData %>% dplyr::mutate(DOMAIN = domain) - if (outputFormat == "json") { - dir.create(paste0(outputPath, dir), recursive = T, showWarnings = F) - filename <- paste(outputPath, dir, "/concept_", report$CONCEPT_ID, ".json", sep = '') - write(jsonlite::toJSON(report), filename) + dbWriteTable( + duckdbCon, + DBI::Id( + schema = schema, + table = tableName + ), + tableData, + append = TRUE + ) + } +} +processAndExportConceptData <- function( + duckdbCon, + conceptData, + outputPath, + outputFormat, + columnsToNormalize, + columnsToConvertToDataFrame, + domain, + schema +) { + if (is.null(conceptData)) { + return() } - else if (outputFormat == "duckdb") { - metadata <- createConceptMedatataTable(report, concept_id, domain) - dbWriteTable(duckdbCon, DBI::Id(schema = schema, table = "concept_metadata"), metadata, append = TRUE) - tableList <- lapply(columnsToConvertToDataFrame, function(col) report[[col]]) - exportDataToDuckDB(tableList, duckdbCon, tableNames, concept_id, domain, schema) + if (outputFormat == "duckdb") { + saveConceptsAsDuckDb( + duckdbCon, + conceptData, + domain, + schema + ) + } else { + dir <- file.path(outputPath, "concepts", domain) + dir.create( + dir, + recursive = TRUE, + showWarnings = FALSE + ) + + lapply( + conceptData$uniqueConcepts$CONCEPT_ID, + function(concept_id, ...) { + saveConceptsAsJson(concept_id, ...) + }, + conceptData$reports, + columnsToNormalize, + columnsToConvertToDataFrame, + dir + ) } } -generateAOProcedureReports <- function(connectionDetails, proceduresData, cdmDatabaseSchema, resultsDatabaseSchema, vocabDatabaseSchema, outputPath) +generateAOProcedureReports <- function(connectionDetails, proceduresData, cdmDatabaseSchema, resultsDatabaseSchema, vocabDatabaseSchema, outputFormat) { + if (nrow(proceduresData) == 0) { + return(NULL) + } + queryPrevalenceByGenderAgeYear <- SqlRender::loadRenderTranslateSql( sqlFilename = "export/procedure/sqlPrevalenceByGenderAgeYear.sql", packageName = "Achilles", @@ -128,67 +159,90 @@ generateAOProcedureReports <- function(connectionDetails, proceduresData, cdmDat ) conn <- DatabaseConnector::connect(connectionDetails) on.exit(DatabaseConnector::disconnect(connection = conn)) - dataPrevalenceByGenderAgeYear <- DatabaseConnector::querySql(conn, queryPrevalenceByGenderAgeYear) - dataPrevalenceByMonth <- DatabaseConnector::querySql(conn, queryPrevalenceByMonth) - dataProceduresByType <- DatabaseConnector::querySql(conn, queryProceduresByType) - dataAgeAtFirstOccurrence <- DatabaseConnector::querySql(conn, queryAgeAtFirstOccurrence) - dataProcedureFrequencyDistribution <- DatabaseConnector::querySql(conn, queryProcedureFrequencyDistribution) + dataPrevalenceByGenderAgeYear <- + DatabaseConnector::querySql(conn, queryPrevalenceByGenderAgeYear) %>% + dplyr::select(c(1, 3, 4, 5, 6)) + dataPrevalenceByMonth <- + DatabaseConnector::querySql(conn, queryPrevalenceByMonth) %>% + dplyr::select(c(1, 3, 4)) + dataProceduresByType <- + DatabaseConnector::querySql(conn, queryProceduresByType) %>% + dplyr::select(c(1, 4, 5)) + dataAgeAtFirstOccurrence <- + DatabaseConnector::querySql(conn, queryAgeAtFirstOccurrence) %>% + dplyr::select(c(1, 2, 3, 4, 5, 6, 7, 8, 9)) + dataProcedureFrequencyDistribution <- + DatabaseConnector::querySql(conn, queryProcedureFrequencyDistribution) %>% + dplyr::select(c(1, 3, 4)) - if (nrow(proceduresData) == 0) { - return() - } uniqueConcepts <- data.frame( CONCEPT_ID = unique(proceduresData$CONCEPT_ID), CDM_TABLE_NAME = "PROCEDURE_OCCURRENCE" ) - reports <- + conceptMetadata <- uniqueConcepts %>% dplyr::left_join( proceduresData, by = c("CONCEPT_ID" = "CONCEPT_ID") ) %>% - dplyr::select("CONCEPT_ID", "CONCEPT_NAME", "CDM_TABLE_NAME", "NUM_PERSONS", "PERCENT_PERSONS", "RECORDS_PER_PERSON") %>% - dplyr::left_join( - ( - dataPrevalenceByGenderAgeYear %>% - dplyr::select(c(1, 3, 4, 5, 6)) %>% - tidyr::nest(PREVALENCE_BY_GENDER_AGE_YEAR = c(-1)) - ), - by = c("CONCEPT_ID" = "CONCEPT_ID") - ) %>% - dplyr::left_join( - ( - dataPrevalenceByMonth %>% - dplyr::select(c(1, 3, 4)) %>% - tidyr::nest(PREVALENCE_BY_MONTH = c(-1)) - ), - by = c("CONCEPT_ID" = "CONCEPT_ID") - ) %>% - dplyr::left_join( - ( - dataProcedureFrequencyDistribution %>% - dplyr::select(c(1, 3, 4)) %>% - tidyr::nest(PROCEDURE_FREQUENCY_DISTRIBUTION = c(-1)) - ), - by = c("CONCEPT_ID" = "CONCEPT_ID") - ) %>% - dplyr::left_join( - ( - dataProceduresByType %>% - dplyr::select(c(1, 4, 5)) %>% - tidyr::nest(PROCEDURES_BY_TYPE = c(-1)) - ), - by = c("CONCEPT_ID" = "PROCEDURE_CONCEPT_ID") - ) %>% - dplyr::left_join( - ( - dataAgeAtFirstOccurrence %>% - dplyr::select(c(1, 2, 3, 4, 5, 6, 7, 8, 9)) %>% - tidyr::nest(AGE_AT_FIRST_OCCURRENCE = c(-1)) - ), - by = c("CONCEPT_ID" = "CONCEPT_ID") - ) %>% - dplyr::collect() + dplyr::select( + "CONCEPT_ID", + "CONCEPT_NAME", + "CDM_TABLE_NAME", + "NUM_PERSONS", + "PERCENT_PERSONS", + "RECORDS_PER_PERSON" + ) + + if (outputFormat == "duckdb") { + reports <- list( + concept_metadata = conceptMetadata, + prevalence_by_gender_age_year = dataPrevalenceByGenderAgeYear, + prevalence_by_month = dataPrevalenceByMonth, + procedure_frequency_distribution = dataProcedureFrequencyDistribution, + procedures_by_type = dataProceduresByType, + age_at_first_occurrence = dataAgeAtFirstOccurrence + ) + } else { + reports <- + conceptMetadata %>% + dplyr::left_join( + ( + dataPrevalenceByGenderAgeYear %>% + tidyr::nest(PREVALENCE_BY_GENDER_AGE_YEAR = c(-1)) + ), + by = c("CONCEPT_ID" = "CONCEPT_ID") + ) %>% + dplyr::left_join( + ( + dataPrevalenceByMonth %>% + tidyr::nest(PREVALENCE_BY_MONTH = c(-1)) + ), + by = c("CONCEPT_ID" = "CONCEPT_ID") + ) %>% + dplyr::left_join( + ( + dataProcedureFrequencyDistribution %>% + tidyr::nest(PROCEDURE_FREQUENCY_DISTRIBUTION = c(-1)) + ), + by = c("CONCEPT_ID" = "CONCEPT_ID") + ) %>% + dplyr::left_join( + ( + dataProceduresByType %>% + tidyr::nest(PROCEDURES_BY_TYPE = c(-1)) + ), + by = c("CONCEPT_ID" = "PROCEDURE_CONCEPT_ID") + ) %>% + dplyr::left_join( + ( + dataAgeAtFirstOccurrence %>% + tidyr::nest(AGE_AT_FIRST_OCCURRENCE = c(-1)) + ), + by = c("CONCEPT_ID" = "CONCEPT_ID") + ) %>% + dplyr::collect() + } return(list("reports" = reports, "uniqueConcepts" = uniqueConcepts)) } @@ -472,7 +526,7 @@ generateAOObservationPeriodReport <- function(connection, cdmDatabaseSchema, res return(output) } -generateAOVisitReports <- function(connectionDetails, cdmDatabaseSchema, resultsDatabaseSchema, vocabDatabaseSchema, outputPath) +generateAOVisitReports <- function(connectionDetails, cdmDatabaseSchema, resultsDatabaseSchema, vocabDatabaseSchema, outputFormat) { queryVisits <- SqlRender::loadRenderTranslateSql( @@ -516,33 +570,57 @@ generateAOVisitReports <- function(connectionDetails, cdmDatabaseSchema, results ) conn <- DatabaseConnector::connect(connectionDetails) - dataVisits <- DatabaseConnector::querySql(conn, queryVisits) - names(dataVisits)[names(dataVisits) == 'CONCEPT_PATH'] <- 'CONCEPT_NAME' - dataPrevalenceByGenderAgeYear <- DatabaseConnector::querySql(conn, queryPrevalenceByGenderAgeYear) - dataPrevalenceByMonth <- DatabaseConnector::querySql(conn, queryPrevalenceByMonth) - dataVisitDurationByType <- DatabaseConnector::querySql(conn, queryVisitDurationByType) - dataAgeAtFirstOccurrence <- DatabaseConnector::querySql(conn, queryAgeAtFirstOccurrence) - + dataVisits <- + DatabaseConnector::querySql(conn, queryVisits) %>% + dplyr::rename(dplyr::all_of(c("CONCEPT_NAME" = "CONCEPT_PATH"))) %>% + dplyr::select( + "CONCEPT_ID", + "CONCEPT_NAME", + "NUM_PERSONS", + "PERCENT_PERSONS", + "RECORDS_PER_PERSON" + ) if (nrow(dataVisits) == 0) { - return() + return(NULL) } + + dataPrevalenceByGenderAgeYear <- + DatabaseConnector::querySql(conn, queryPrevalenceByGenderAgeYear) %>% + dplyr::select(c(1, 3, 4, 5, 6)) + dataPrevalenceByMonth <- + DatabaseConnector::querySql(conn, queryPrevalenceByMonth) %>% + dplyr::select(c(1, 3, 4)) + dataVisitDurationByType <- + DatabaseConnector::querySql(conn, queryVisitDurationByType) %>% + dplyr::select(c(1, 2, 3, 4, 5, 6, 7, 8, 9)) + dataAgeAtFirstOccurrence <- + DatabaseConnector::querySql(conn, queryAgeAtFirstOccurrence) %>% + dplyr::select(c(1, 2, 3, 4, 5, 6, 7, 8, 9)) + uniqueConcepts <- data.frame( CONCEPT_ID = unique(dataVisits$CONCEPT_ID), CDM_TABLE_NAME = "VISIT_OCCURRENCE" ) - reports <- + conceptMetadata <- uniqueConcepts %>% dplyr::left_join( - ( - dataVisits %>% - dplyr::select("CONCEPT_ID", "CONCEPT_NAME", "NUM_PERSONS", "PERCENT_PERSONS", "RECORDS_PER_PERSON") - ), + dataVisits, by = c("CONCEPT_ID" = "CONCEPT_ID") - ) %>% + ) + if (outputFormat == "duckdb") { + reports <- list( + concept_metadata = conceptMetadata, + prevalence_by_gender_age_year = dataPrevalenceByGenderAgeYear, + prevalence_by_month = dataPrevalenceByMonth, + visit_duration_by_type = dataVisitDurationByType, + age_at_first_occurrence = dataAgeAtFirstOccurrence + ) + } else { + reports <- + conceptMetadata %>% dplyr::left_join( ( dataPrevalenceByGenderAgeYear %>% - dplyr::select(c(1, 3, 4, 5, 6)) %>% tidyr::nest(PREVALENCE_BY_GENDER_AGE_YEAR = c(-1)) ), by = c("CONCEPT_ID" = "CONCEPT_ID") @@ -550,7 +628,6 @@ generateAOVisitReports <- function(connectionDetails, cdmDatabaseSchema, results dplyr::left_join( ( dataPrevalenceByMonth %>% - dplyr::select(c(1, 3, 4)) %>% tidyr::nest(PREVALENCE_BY_MONTH = c(-1)) ), by = c("CONCEPT_ID" = "CONCEPT_ID") @@ -558,7 +635,6 @@ generateAOVisitReports <- function(connectionDetails, cdmDatabaseSchema, results dplyr::left_join( ( dataVisitDurationByType %>% - dplyr::select(c(1, 2, 3, 4, 5, 6, 7, 8, 9)) %>% tidyr::nest(VISIT_DURATION_BY_TYPE = c(-1)) ), by = c("CONCEPT_ID" = "CONCEPT_ID") @@ -566,16 +642,16 @@ generateAOVisitReports <- function(connectionDetails, cdmDatabaseSchema, results dplyr::left_join( ( dataAgeAtFirstOccurrence %>% - dplyr::select(c(1, 2, 3, 4, 5, 6, 7, 8, 9)) %>% tidyr::nest(AGE_AT_FIRST_OCCURRENCE = c(-1)) ), by = c("CONCEPT_ID" = "CONCEPT_ID") ) %>% dplyr::collect() + } return(list("reports" = reports, "uniqueConcepts" = uniqueConcepts)) } -generateAOVisitDetailReports <- function(connectionDetails, cdmDatabaseSchema, resultsDatabaseSchema, vocabDatabaseSchema, outputPath) +generateAOVisitDetailReports <- function(connectionDetails, cdmDatabaseSchema, resultsDatabaseSchema, vocabDatabaseSchema, outputFormat) { queryVisitDetails <- SqlRender::loadRenderTranslateSql( sqlFilename = "export/visitdetail/sqlVisitDetailTreemap.sql", @@ -627,33 +703,59 @@ generateAOVisitDetailReports <- function(connectionDetails, cdmDatabaseSchema, r conn <- DatabaseConnector::connect(connectionDetails) on.exit(DatabaseConnector::disconnect(connection = conn)) - dataVisitDetails <- DatabaseConnector::querySql(conn, queryVisitDetails) - names(dataVisitDetails)[names(dataVisitDetails) == 'CONCEPT_PATH'] <- 'CONCEPT_NAME' - dataPrevalenceByGenderAgeYear <- DatabaseConnector::querySql(conn, queryPrevalenceByGenderAgeYear) - dataPrevalenceByMonth <- DatabaseConnector::querySql(conn, queryPrevalenceByMonth) - dataVisitDetailDurationByType <- DatabaseConnector::querySql(conn, queryVisitDetailDurationByType) - dataAgeAtFirstOccurrence <- DatabaseConnector::querySql(conn, queryAgeAtFirstOccurrence) + dataVisitDetails <- + DatabaseConnector::querySql(conn, queryVisitDetails) %>% + dplyr::rename(dplyr::all_of(c("CONCEPT_NAME" = "CONCEPT_PATH"))) %>% + dplyr::select( + "CONCEPT_ID", + "CONCEPT_NAME", + "NUM_PERSONS", + "PERCENT_PERSONS", + "RECORDS_PER_PERSON" + ) if (nrow(dataVisitDetails) == 0) { - return() + return(NULL) } + + dataPrevalenceByGenderAgeYear <- + DatabaseConnector::querySql(conn, queryPrevalenceByGenderAgeYear) %>% + dplyr::select(c(1, 3, 4, 5, 6)) + dataPrevalenceByMonth <- + DatabaseConnector::querySql(conn, queryPrevalenceByMonth) %>% + dplyr::select(c(1, 3, 4)) + dataVisitDetailDurationByType <- + DatabaseConnector::querySql(conn, queryVisitDetailDurationByType) %>% + dplyr::select(c(1, 2, 3, 4, 5, 6, 7, 8, 9)) + dataAgeAtFirstOccurrence <- + DatabaseConnector::querySql(conn, queryAgeAtFirstOccurrence) %>% + dplyr::select(c(1, 2, 3, 4, 5, 6, 7, 8, 9)) + uniqueConcepts <- data.frame( CONCEPT_ID = unique(dataVisitDetails$CONCEPT_ID), CDM_TABLE_NAME = "VISIT_DETAIL" ) - reports <- + conceptMetadata <- uniqueConcepts %>% dplyr::left_join( - ( - dataVisitDetails %>% - dplyr::select("CONCEPT_ID", "CONCEPT_NAME", "NUM_PERSONS", "PERCENT_PERSONS", "RECORDS_PER_PERSON") - ), + dataVisitDetails, by = c("CONCEPT_ID" = "CONCEPT_ID") - ) %>% + ) + + if (outputFormat == "duckdb") { + reports <- list( + concept_metadata = conceptMetadata, + prevalence_by_gender_age_year = dataPrevalenceByGenderAgeYear, + prevalence_by_month = dataPrevalenceByMonth, + visit_detail_duration_by_type = dataVisitDetailDurationByType, + age_at_first_occurrence = dataAgeAtFirstOccurrence + ) + } else { + reports <- + conceptMetadata %>% dplyr::left_join( ( dataPrevalenceByGenderAgeYear %>% - dplyr::select(c(1, 3, 4, 5, 6)) %>% tidyr::nest(PREVALENCE_BY_GENDER_AGE_YEAR = c(-1)) ), by = c("CONCEPT_ID" = "CONCEPT_ID") @@ -661,7 +763,6 @@ generateAOVisitDetailReports <- function(connectionDetails, cdmDatabaseSchema, r dplyr::left_join( ( dataPrevalenceByMonth %>% - dplyr::select(c(1, 3, 4)) %>% tidyr::nest(PREVALENCE_BY_MONTH = c(-1)) ), by = c("CONCEPT_ID" = "CONCEPT_ID") @@ -669,7 +770,6 @@ generateAOVisitDetailReports <- function(connectionDetails, cdmDatabaseSchema, r dplyr::left_join( ( dataVisitDetailDurationByType %>% - dplyr::select(c(1, 2, 3, 4, 5, 6, 7, 8, 9)) %>% tidyr::nest(VISIT_DETAIL_DURATION_BY_TYPE = c(-1)) ), by = c("CONCEPT_ID" = "CONCEPT_ID") @@ -677,12 +777,12 @@ generateAOVisitDetailReports <- function(connectionDetails, cdmDatabaseSchema, r dplyr::left_join( ( dataAgeAtFirstOccurrence %>% - dplyr::select(c(1, 2, 3, 4, 5, 6, 7, 8, 9)) %>% tidyr::nest(AGE_AT_FIRST_OCCURRENCE = c(-1)) ), by = c("CONCEPT_ID" = "CONCEPT_ID") ) %>% dplyr::collect() + } return(list("reports" = reports, "uniqueConcepts" = uniqueConcepts)) } @@ -701,8 +801,12 @@ generateAOMetadataReport <- function(connection, cdmDatabaseSchema, outputPath) } } -generateAOObservationReports <- function(connectionDetails, observationsData, cdmDatabaseSchema, resultsDatabaseSchema, vocabDatabaseSchema, outputPath) +generateAOObservationReports <- function(connectionDetails, observationsData, cdmDatabaseSchema, resultsDatabaseSchema, vocabDatabaseSchema, outputFormat) { + if (nrow(observationsData) == 0) { + return(NULL) + } + queryPrevalenceByGenderAgeYear <- SqlRender::loadRenderTranslateSql( sqlFilename = "export/observation/sqlPrevalenceByGenderAgeYear.sql", packageName = "Achilles", @@ -745,30 +849,56 @@ generateAOObservationReports <- function(connectionDetails, observationsData, cd conn <- DatabaseConnector::connect(connectionDetails) on.exit(DatabaseConnector::disconnect(connection = conn)) - dataPrevalenceByGenderAgeYear <- DatabaseConnector::querySql(conn, queryPrevalenceByGenderAgeYear) - dataPrevalenceByMonth <- DatabaseConnector::querySql(conn, queryPrevalenceByMonth) - dataObservationsByType <- DatabaseConnector::querySql(conn, queryObservationsByType) - dataAgeAtFirstOccurrence <- DatabaseConnector::querySql(conn, queryAgeAtFirstOccurrence) - dataObsFrequencyDistribution <- DatabaseConnector::querySql(conn, queryObsFrequencyDistribution) + dataPrevalenceByGenderAgeYear <- + DatabaseConnector::querySql(conn, queryPrevalenceByGenderAgeYear) %>% + dplyr::select(c(1, 3, 4, 5, 6)) + dataPrevalenceByMonth <- + DatabaseConnector::querySql(conn, queryPrevalenceByMonth) %>% + dplyr::select(c(1, 3, 4)) + dataObservationsByType <- + DatabaseConnector::querySql(conn, queryObservationsByType) %>% + dplyr::select(c(1, 4, 5)) + dataAgeAtFirstOccurrence <- + DatabaseConnector::querySql(conn, queryAgeAtFirstOccurrence) %>% + dplyr::select(c(1, 2, 3, 4, 5, 6, 7, 8, 9)) + dataObsFrequencyDistribution <- + DatabaseConnector::querySql(conn, queryObsFrequencyDistribution) %>% + dplyr::select(c(1, 3, 4)) - if (nrow(observationsData) == 0) { - return() - } uniqueConcepts <- data.frame( CONCEPT_ID = unique(observationsData$CONCEPT_ID), CDM_TABLE_NAME = "OBSERVATION" ) - reports <- + conceptMetadata <- uniqueConcepts %>% dplyr::left_join( observationsData, by = c("CONCEPT_ID" = "CONCEPT_ID") ) %>% - dplyr::select("CONCEPT_ID", "CONCEPT_NAME", "CDM_TABLE_NAME", "NUM_PERSONS", "PERCENT_PERSONS", "RECORDS_PER_PERSON") %>% + dplyr::select( + "CONCEPT_ID", + "CONCEPT_NAME", + "CDM_TABLE_NAME", + "NUM_PERSONS", + "PERCENT_PERSONS", + "RECORDS_PER_PERSON" + ) + + if (outputFormat == "duckdb") { + reports <- list( + concept_metadata = conceptMetadata, + prevalence_by_gender_age_year = dataPrevalenceByGenderAgeYear, + prevalence_by_month = dataPrevalenceByMonth, + obs_frequency_distribution = dataObsFrequencyDistribution, + observations_by_type = dataObservationsByType, + age_at_first_occurrence = dataAgeAtFirstOccurrence + ) + } else { + reports <- + conceptMetadata %>% dplyr::left_join( ( dataPrevalenceByGenderAgeYear %>% - dplyr::select(c(1, 3, 4, 5, 6)) %>% tidyr::nest(PREVALENCE_BY_GENDER_AGE_YEAR = c(-1)) ), by = c("CONCEPT_ID" = "CONCEPT_ID") @@ -776,7 +906,6 @@ generateAOObservationReports <- function(connectionDetails, observationsData, cd dplyr::left_join( ( dataPrevalenceByMonth %>% - dplyr::select(c(1, 3, 4)) %>% tidyr::nest(PREVALENCE_BY_MONTH = c(-1)) ), by = c("CONCEPT_ID" = "CONCEPT_ID") @@ -784,7 +913,6 @@ generateAOObservationReports <- function(connectionDetails, observationsData, cd dplyr::left_join( ( dataObsFrequencyDistribution %>% - dplyr::select(c(1, 3, 4)) %>% tidyr::nest(OBS_FREQUENCY_DISTRIBUTION = c(-1)) ), by = c("CONCEPT_ID" = "CONCEPT_ID") @@ -792,7 +920,6 @@ generateAOObservationReports <- function(connectionDetails, observationsData, cd dplyr::left_join( ( dataObservationsByType %>% - dplyr::select(c(1, 4, 5)) %>% tidyr::nest(OBSERVATIONS_BY_TYPE = c(-1)) ), by = c("CONCEPT_ID" = "OBSERVATION_CONCEPT_ID") @@ -800,12 +927,12 @@ generateAOObservationReports <- function(connectionDetails, observationsData, cd dplyr::left_join( ( dataAgeAtFirstOccurrence %>% - dplyr::select(c(1, 2, 3, 4, 5, 6, 7, 8, 9)) %>% tidyr::nest(AGE_AT_FIRST_OCCURRENCE = c(-1)) ), by = c("CONCEPT_ID" = "CONCEPT_ID") ) %>% dplyr::collect() + } return(list("reports" = reports, "uniqueConcepts" = uniqueConcepts)) } @@ -841,9 +968,8 @@ generateAODashboardReport <- function(outputPath) write(jsonOutput, file = paste(outputPath, "/dashboard.json", sep = "")) } -generateAOMeasurementReports <- function(connectionDetails, dataMeasurements, cdmDatabaseSchema, resultsDatabaseSchema, vocabDatabaseSchema, outputPath) +generateAOMeasurementReports <- function(connectionDetails, dataMeasurements, cdmDatabaseSchema, resultsDatabaseSchema, vocabDatabaseSchema, outputFormat) { - writeLines("Generating Measurement reports") queryPrevalenceByGenderAgeYear <- SqlRender::loadRenderTranslateSql( sqlFilename = "export/measurement/sqlPrevalenceByGenderAgeYear.sql", packageName = "Achilles", @@ -926,37 +1052,81 @@ generateAOMeasurementReports <- function(connectionDetails, dataMeasurements, cd conn <- DatabaseConnector::connect(connectionDetails) on.exit(DatabaseConnector::disconnect(connection = conn)) - dataPrevalenceByGenderAgeYear <- DatabaseConnector::querySql(conn, queryPrevalenceByGenderAgeYear) - dataPrevalenceByMonth <- DatabaseConnector::querySql(conn, queryPrevalenceByMonth) - dataMeasurementsByType <- DatabaseConnector::querySql(conn, queryMeasurementsByType) - dataAgeAtFirstOccurrence <- DatabaseConnector::querySql(conn, queryAgeAtFirstOccurrence) - dataRecordsByUnit <- DatabaseConnector::querySql(conn, queryRecordsByUnit) - dataMeasurementValueDistribution <- DatabaseConnector::querySql(conn, queryMeasurementValueDistribution) - dataLowerLimitDistribution <- DatabaseConnector::querySql(conn, queryLowerLimitDistribution) - dataUpperLimitDistribution <- DatabaseConnector::querySql(conn, queryUpperLimitDistribution) - dataValuesRelativeToNorm <- DatabaseConnector::querySql(conn, queryValuesRelativeToNorm) - dataFrequencyDistribution <- DatabaseConnector::querySql(conn, queryFrequencyDistribution) - + dataPrevalenceByMonth <- + DatabaseConnector::querySql(conn, queryPrevalenceByMonth) %>% + dplyr::select(c(1, 3, 4)) if (nrow(dataPrevalenceByMonth) == 0) { - return() + return(NULL) } + + dataPrevalenceByGenderAgeYear <- + DatabaseConnector::querySql(conn, queryPrevalenceByGenderAgeYear) %>% + dplyr::select(c(1, 3, 4, 5, 6)) + dataMeasurementsByType <- + DatabaseConnector::querySql(conn, queryMeasurementsByType) %>% + dplyr::select(c(1, 4, 5)) + dataAgeAtFirstOccurrence <- + DatabaseConnector::querySql(conn, queryAgeAtFirstOccurrence) %>% + dplyr::select(c(1, 2, 3, 4, 5, 6, 7, 8, 9)) + dataRecordsByUnit <- + DatabaseConnector::querySql(conn, queryRecordsByUnit) %>% + dplyr::select(c(1, 4, 5)) + dataMeasurementValueDistribution <- + DatabaseConnector::querySql(conn, queryMeasurementValueDistribution) %>% + dplyr::select(c(1, 2, 3, 4, 5, 6, 7, 8, 9)) + dataLowerLimitDistribution <- + DatabaseConnector::querySql(conn, queryLowerLimitDistribution) %>% + dplyr::select(c(1, 2, 3, 4, 5, 6, 7, 8, 9)) + dataUpperLimitDistribution <- + DatabaseConnector::querySql(conn, queryUpperLimitDistribution) %>% + dplyr::select(c(1, 2, 3, 4, 5, 6, 7, 8, 9)) + dataValuesRelativeToNorm <- + DatabaseConnector::querySql(conn, queryValuesRelativeToNorm) %>% + dplyr::select(c(1, 4, 5)) + dataFrequencyDistribution <- + DatabaseConnector::querySql(conn, queryFrequencyDistribution) %>% + dplyr::select(c(1, 3, 4)) + uniqueConcepts <- data.frame( CONCEPT_ID = unique(dataPrevalenceByMonth$CONCEPT_ID), CDM_TABLE_NAME = "MEASUREMENT" ) - reports <- + conceptMetadata <- uniqueConcepts %>% dplyr::left_join( ( dataMeasurements %>% - dplyr::select("CONCEPT_ID", "CONCEPT_NAME", "NUM_PERSONS", "PERCENT_PERSONS", "RECORDS_PER_PERSON") + dplyr::select( + "CONCEPT_ID", + "CONCEPT_NAME", + "NUM_PERSONS", + "PERCENT_PERSONS", + "RECORDS_PER_PERSON" + ) ), by = c("CONCEPT_ID" = "CONCEPT_ID") - ) %>% + ) + + if (outputFormat == "duckdb") { + reports <- list( + concept_metadata = conceptMetadata, + prevalence_by_gender_age_year = dataPrevalenceByGenderAgeYear, + prevalence_by_month = dataPrevalenceByMonth, + frequency_distribution = dataFrequencyDistribution, + measurements_by_type = dataMeasurementsByType, + age_at_first_occurrence = dataAgeAtFirstOccurrence, + records_by_unit = dataRecordsByUnit, + measurement_value_distribution = dataMeasurementValueDistribution, + lower_limit_distribution = dataLowerLimitDistribution, + upper_limit_distribution = dataUpperLimitDistribution, + values_relative_to_norm = dataValuesRelativeToNorm + ) + } else { + reports <- + conceptMetadata %>% dplyr::left_join( ( dataPrevalenceByGenderAgeYear %>% - dplyr::select(c(1, 3, 4, 5, 6)) %>% tidyr::nest(PREVALENCE_BY_GENDER_AGE_YEAR = c(-1)) ), by = c("CONCEPT_ID" = "CONCEPT_ID") @@ -964,7 +1134,6 @@ generateAOMeasurementReports <- function(connectionDetails, dataMeasurements, cd dplyr::left_join( ( dataPrevalenceByMonth %>% - dplyr::select(c(1, 3, 4)) %>% tidyr::nest(PREVALENCE_BY_MONTH = c(-1)) ), by = c("CONCEPT_ID" = "CONCEPT_ID") @@ -972,7 +1141,6 @@ generateAOMeasurementReports <- function(connectionDetails, dataMeasurements, cd dplyr::left_join( ( dataFrequencyDistribution %>% - dplyr::select(c(1, 3, 4)) %>% tidyr::nest(FREQUENCY_DISTRIBUTION = c(-1)) ), by = c("CONCEPT_ID" = "CONCEPT_ID") @@ -980,7 +1148,6 @@ generateAOMeasurementReports <- function(connectionDetails, dataMeasurements, cd dplyr::left_join( ( dataMeasurementsByType %>% - dplyr::select(c(1, 4, 5)) %>% tidyr::nest(MEASUREMENTS_BY_TYPE = c(-1)) ), by = c("CONCEPT_ID" = "MEASUREMENT_CONCEPT_ID") @@ -988,7 +1155,6 @@ generateAOMeasurementReports <- function(connectionDetails, dataMeasurements, cd dplyr::left_join( ( dataAgeAtFirstOccurrence %>% - dplyr::select(c(1, 2, 3, 4, 5, 6, 7, 8, 9)) %>% tidyr::nest(AGE_AT_FIRST_OCCURRENCE = c(-1)) ), by = c("CONCEPT_ID" = "CONCEPT_ID") @@ -996,7 +1162,6 @@ generateAOMeasurementReports <- function(connectionDetails, dataMeasurements, cd dplyr::left_join( ( dataRecordsByUnit %>% - dplyr::select(c(1, 4, 5)) %>% tidyr::nest(RECORDS_BY_UNIT = c(-1)) ), by = c("CONCEPT_ID" = "MEASUREMENT_CONCEPT_ID") @@ -1004,7 +1169,6 @@ generateAOMeasurementReports <- function(connectionDetails, dataMeasurements, cd dplyr::left_join( ( dataMeasurementValueDistribution %>% - dplyr::select(c(1, 2, 3, 4, 5, 6, 7, 8, 9)) %>% tidyr::nest(MEASUREMENT_VALUE_DISTRIBUTION = c(-1)) ), by = c("CONCEPT_ID" = "CONCEPT_ID") @@ -1012,7 +1176,6 @@ generateAOMeasurementReports <- function(connectionDetails, dataMeasurements, cd dplyr::left_join( ( dataLowerLimitDistribution %>% - dplyr::select(c(1, 2, 3, 4, 5, 6, 7, 8, 9)) %>% tidyr::nest(LOWER_LIMIT_DISTRIBUTION = c(-1)) ), by = c("CONCEPT_ID" = "CONCEPT_ID") @@ -1020,7 +1183,6 @@ generateAOMeasurementReports <- function(connectionDetails, dataMeasurements, cd dplyr::left_join( ( dataUpperLimitDistribution %>% - dplyr::select(c(1, 2, 3, 4, 5, 6, 7, 8, 9)) %>% tidyr::nest(UPPER_LIMIT_DISTRIBUTION = c(-1)) ), by = c("CONCEPT_ID" = "CONCEPT_ID") @@ -1028,17 +1190,20 @@ generateAOMeasurementReports <- function(connectionDetails, dataMeasurements, cd dplyr::left_join( ( dataValuesRelativeToNorm %>% - dplyr::select(c(1, 4, 5)) %>% tidyr::nest(VALUES_RELATIVE_TO_NORM = c(-1)) ), by = c("CONCEPT_ID" = "MEASUREMENT_CONCEPT_ID") ) %>% dplyr::collect() + } return(list("reports" = reports, "uniqueConcepts" = uniqueConcepts)) } -generateAODrugEraReports <- function(connectionDetails, dataDrugEra, cdmDatabaseSchema, resultsDatabaseSchema, vocabDatabaseSchema, outputPath) +generateAODrugEraReports <- function(connectionDetails, dataDrugEra, cdmDatabaseSchema, resultsDatabaseSchema, vocabDatabaseSchema, outputFormat) { + if (nrow(dataDrugEra) == 0) { + return(NULL) + } queryAgeAtFirstExposure <- SqlRender::loadRenderTranslateSql( sqlFilename = "export/drugera/sqlAgeAtFirstExposure.sql", @@ -1074,31 +1239,53 @@ generateAODrugEraReports <- function(connectionDetails, dataDrugEra, cdmDatabase conn <- DatabaseConnector::connect(connectionDetails) on.exit(DatabaseConnector::disconnect(connection = conn)) - dataAgeAtFirstExposure <- DatabaseConnector::querySql(conn, queryAgeAtFirstExposure) - dataPrevalenceByGenderAgeYear <- DatabaseConnector::querySql(conn, queryPrevalenceByGenderAgeYear) - dataPrevalenceByMonth <- DatabaseConnector::querySql(conn, queryPrevalenceByMonth) - dataLengthOfEra <- DatabaseConnector::querySql(conn, queryLengthOfEra) + dataAgeAtFirstExposure <- + DatabaseConnector::querySql(conn, queryAgeAtFirstExposure) %>% + dplyr::select(c(1, 2, 3, 4, 5, 6, 7, 8, 9)) + dataPrevalenceByGenderAgeYear <- + DatabaseConnector::querySql(conn, queryPrevalenceByGenderAgeYear) %>% + dplyr::select(c(1, 2, 3, 4, 5)) + dataPrevalenceByMonth <- + DatabaseConnector::querySql(conn, queryPrevalenceByMonth) %>% + dplyr::select(c(1, 2, 3)) + dataLengthOfEra <- + DatabaseConnector::querySql(conn, queryLengthOfEra) %>% + dplyr::select(c(1, 2, 3, 4, 5, 6, 7, 8, 9)) - if (nrow(dataDrugEra) == 0) { - return() - } uniqueConcepts <- data.frame( CONCEPT_ID = unique(dataDrugEra$CONCEPT_ID), CDM_TABLE_NAME = "DRUG_ERA" ) - reports <- + conceptMetadata <- uniqueConcepts %>% - dplyr::left_join( - ( - dataDrugEra %>% - dplyr::select("CONCEPT_ID", "CONCEPT_NAME", "NUM_PERSONS", "PERCENT_PERSONS", "RECORDS_PER_PERSON") - ), - by = c("CONCEPT_ID" = "CONCEPT_ID") - ) %>% + dplyr::left_join( + ( + dataDrugEra %>% + dplyr::select( + "CONCEPT_ID", + "CONCEPT_NAME", + "NUM_PERSONS", + "PERCENT_PERSONS", + "RECORDS_PER_PERSON" + ) + ), + by = c("CONCEPT_ID" = "CONCEPT_ID") + ) + + if (outputFormat == "duckdb") { + reports <- list( + concept_metadata = conceptMetadata, + age_at_first_exposure = dataAgeAtFirstExposure, + prevalence_by_gender_age_year = dataPrevalenceByGenderAgeYear, + prevalence_by_month = dataPrevalenceByMonth, + length_of_era = dataLengthOfEra + ) + } else { + reports <- + conceptMetadata %>% dplyr::left_join( ( dataAgeAtFirstExposure %>% - dplyr::select(c(1, 2, 3, 4, 5, 6, 7, 8, 9)) %>% tidyr::nest(AGE_AT_FIRST_EXPOSURE = c(-1)) ), by = c("CONCEPT_ID" = "CONCEPT_ID") @@ -1106,7 +1293,6 @@ generateAODrugEraReports <- function(connectionDetails, dataDrugEra, cdmDatabase dplyr::left_join( ( dataPrevalenceByGenderAgeYear %>% - dplyr::select(c(1, 2, 3, 4, 5)) %>% tidyr::nest(PREVALENCE_BY_GENDER_AGE_YEAR = c(-1)) ), by = c("CONCEPT_ID" = "CONCEPT_ID") @@ -1114,7 +1300,6 @@ generateAODrugEraReports <- function(connectionDetails, dataDrugEra, cdmDatabase dplyr::left_join( ( dataPrevalenceByMonth %>% - dplyr::select(c(1, 2, 3)) %>% tidyr::nest(PREVALENCE_BY_MONTH = c(-1)) ), by = c("CONCEPT_ID" = "CONCEPT_ID") @@ -1122,16 +1307,16 @@ generateAODrugEraReports <- function(connectionDetails, dataDrugEra, cdmDatabase dplyr::left_join( ( dataLengthOfEra %>% - dplyr::select(c(1, 2, 3, 4, 5, 6, 7, 8, 9)) %>% tidyr::nest(LENGTH_OF_ERA = c(-1)) ), by = c("CONCEPT_ID" = "CONCEPT_ID") ) %>% dplyr::collect() + } return(list("reports" = reports, "uniqueConcepts" = uniqueConcepts)) } -generateAODrugReports <- function(connectionDetails, dataDrugs, cdmDatabaseSchema, resultsDatabaseSchema, vocabDatabaseSchema, outputPath) +generateAODrugReports <- function(connectionDetails, dataDrugs, cdmDatabaseSchema, resultsDatabaseSchema, vocabDatabaseSchema, outputFormat) { queryAgeAtFirstExposure <- SqlRender::loadRenderTranslateSql( @@ -1200,35 +1385,73 @@ generateAODrugReports <- function(connectionDetails, dataDrugs, cdmDatabaseSchem conn <- DatabaseConnector::connect(connectionDetails) on.exit(DatabaseConnector::disconnect(connection = conn)) - dataAgeAtFirstExposure <- DatabaseConnector::querySql(conn, queryAgeAtFirstExposure) - dataDaysSupplyDistribution <- DatabaseConnector::querySql(conn, queryDaysSupplyDistribution) - dataDrugsByType <- DatabaseConnector::querySql(conn, queryDrugsByType) - dataPrevalenceByGenderAgeYear <- DatabaseConnector::querySql(conn, queryPrevalenceByGenderAgeYear) - dataPrevalenceByMonth <- DatabaseConnector::querySql(conn, queryPrevalenceByMonth) - dataQuantityDistribution <- DatabaseConnector::querySql(conn, queryQuantityDistribution) - dataRefillsDistribution <- DatabaseConnector::querySql(conn, queryRefillsDistribution) - dataDrugFrequencyDistribution <- DatabaseConnector::querySql(conn, queryDrugFrequencyDistribution) - + dataPrevalenceByMonth <- + DatabaseConnector::querySql(conn, queryPrevalenceByMonth) %>% + dplyr::select(c(1, 3, 4)) if (nrow(dataPrevalenceByMonth) == 0) { - return() + return(NULL) } + + dataAgeAtFirstExposure <- + DatabaseConnector::querySql(conn, queryAgeAtFirstExposure) %>% + dplyr::select(c(1, 2, 3, 4, 5, 6, 7, 8, 9)) + dataDaysSupplyDistribution <- + DatabaseConnector::querySql(conn, queryDaysSupplyDistribution) %>% + dplyr::select(c(1, 2, 3, 4, 5, 6, 7, 8, 9)) + dataDrugsByType <- + DatabaseConnector::querySql(conn, queryDrugsByType) %>% + dplyr::select(c(1, 3, 4)) + dataPrevalenceByGenderAgeYear <- + DatabaseConnector::querySql(conn, queryPrevalenceByGenderAgeYear) %>% + dplyr::select(c(1, 3, 4, 5, 6)) + dataQuantityDistribution <- + DatabaseConnector::querySql(conn, queryQuantityDistribution) %>% + dplyr::select(c(1, 2, 3, 4, 5, 6, 7, 8, 9)) + dataRefillsDistribution <- + DatabaseConnector::querySql(conn, queryRefillsDistribution) %>% + dplyr::select(c(1, 2, 3, 4, 5, 6, 7, 8, 9)) + dataDrugFrequencyDistribution <- + DatabaseConnector::querySql(conn, queryDrugFrequencyDistribution) %>% + dplyr::select(c(1, 3, 4)) + uniqueConcepts <- data.frame( CONCEPT_ID = unique(dataPrevalenceByMonth$CONCEPT_ID), CDM_TABLE_NAME = "DRUG_EXPOSURE" ) - reports <- + conceptMetadata <- uniqueConcepts %>% dplyr::left_join( ( dataDrugs %>% - dplyr::select("CONCEPT_ID", "CONCEPT_NAME", "NUM_PERSONS", "PERCENT_PERSONS", "RECORDS_PER_PERSON") + dplyr::select( + "CONCEPT_ID", + "CONCEPT_NAME", + "NUM_PERSONS", + "PERCENT_PERSONS", + "RECORDS_PER_PERSON" + ) ), by = c("CONCEPT_ID" = "CONCEPT_ID") - ) %>% + ) + + if (outputFormat == "duckdb") { + reports <- list( + concept_metadata = conceptMetadata, + age_at_first_exposure = dataAgeAtFirstExposure, + days_supply_distribution = dataDaysSupplyDistribution, + drugs_by_type = dataDrugsByType, + prevalence_by_gender_age_year = dataPrevalenceByGenderAgeYear, + prevalence_by_month = dataPrevalenceByMonth, + drug_frequency_distribution = dataDrugFrequencyDistribution, + quantity_distribution = dataQuantityDistribution, + refills_distribution = dataRefillsDistribution + ) + } else { + reports <- + conceptMetadata %>% dplyr::left_join( ( dataAgeAtFirstExposure %>% - dplyr::select(c(1, 2, 3, 4, 5, 6, 7, 8, 9)) %>% tidyr::nest(AGE_AT_FIRST_EXPOSURE = c(-1)) ), by = c("CONCEPT_ID" = "DRUG_CONCEPT_ID") @@ -1236,7 +1459,6 @@ generateAODrugReports <- function(connectionDetails, dataDrugs, cdmDatabaseSchem dplyr::left_join( ( dataDaysSupplyDistribution %>% - dplyr::select(c(1, 2, 3, 4, 5, 6, 7, 8, 9)) %>% tidyr::nest(DAYS_SUPPLY_DISTRIBUTION = c(-1)) ), by = c("CONCEPT_ID" = "DRUG_CONCEPT_ID") @@ -1244,7 +1466,6 @@ generateAODrugReports <- function(connectionDetails, dataDrugs, cdmDatabaseSchem dplyr::left_join( ( dataDrugsByType %>% - dplyr::select(c(1, 3, 4)) %>% tidyr::nest(DRUGS_BY_TYPE = c(-1)) ), by = c("CONCEPT_ID" = "DRUG_CONCEPT_ID") @@ -1252,7 +1473,6 @@ generateAODrugReports <- function(connectionDetails, dataDrugs, cdmDatabaseSchem dplyr::left_join( ( dataPrevalenceByGenderAgeYear %>% - dplyr::select(c(1, 3, 4, 5, 6)) %>% tidyr::nest(PREVALENCE_BY_GENDER_AGE_YEAR = c(-1)) ), by = c("CONCEPT_ID" = "CONCEPT_ID") @@ -1260,7 +1480,6 @@ generateAODrugReports <- function(connectionDetails, dataDrugs, cdmDatabaseSchem dplyr::left_join( ( dataPrevalenceByMonth %>% - dplyr::select(c(1, 3, 4)) %>% tidyr::nest(PREVALENCE_BY_MONTH = c(-1)) ), by = c("CONCEPT_ID" = "CONCEPT_ID") @@ -1268,7 +1487,6 @@ generateAODrugReports <- function(connectionDetails, dataDrugs, cdmDatabaseSchem dplyr::left_join( ( dataDrugFrequencyDistribution %>% - dplyr::select(c(1, 3, 4)) %>% tidyr::nest(DRUG_FREQUENCY_DISTRIBUTION = c(-1)) ), by = c("CONCEPT_ID" = "CONCEPT_ID") @@ -1276,7 +1494,6 @@ generateAODrugReports <- function(connectionDetails, dataDrugs, cdmDatabaseSchem dplyr::left_join( ( dataQuantityDistribution %>% - dplyr::select(c(1, 2, 3, 4, 5, 6, 7, 8, 9)) %>% tidyr::nest(QUANTITY_DISTRIBUTION = c(-1)) ), by = c("CONCEPT_ID" = "DRUG_CONCEPT_ID") @@ -1284,17 +1501,21 @@ generateAODrugReports <- function(connectionDetails, dataDrugs, cdmDatabaseSchem dplyr::left_join( ( dataRefillsDistribution %>% - dplyr::select(c(1, 2, 3, 4, 5, 6, 7, 8, 9)) %>% tidyr::nest(REFILLS_DISTRIBUTION = c(-1)) ), by = c("CONCEPT_ID" = "DRUG_CONCEPT_ID") ) %>% dplyr::collect() + } return(list("reports" = reports, "uniqueConcepts" = uniqueConcepts)) } -generateAODeviceReports <- function(connectionDetails, dataDevices, cdmDatabaseSchema, resultsDatabaseSchema, vocabDatabaseSchema, outputPath) +generateAODeviceReports <- function(connectionDetails, dataDevices, cdmDatabaseSchema, resultsDatabaseSchema, vocabDatabaseSchema, outputFormat) { + if (nrow(dataDevices) == 0) { + return(NULL) + } + queryAgeAtFirstExposure <- SqlRender::loadRenderTranslateSql( sqlFilename = "export/device/sqlAgeAtFirstExposure.sql", packageName = "Achilles", @@ -1337,32 +1558,57 @@ generateAODeviceReports <- function(connectionDetails, dataDevices, cdmDatabaseS conn <- DatabaseConnector::connect(connectionDetails) on.exit(DatabaseConnector::disconnect(connection = conn)) - dataAgeAtFirstExposure <- DatabaseConnector::querySql(conn, queryAgeAtFirstExposure) - dataDevicesByType <- DatabaseConnector::querySql(conn, queryDevicesByType) - dataPrevalenceByGenderAgeYear <- DatabaseConnector::querySql(conn, queryPrevalenceByGenderAgeYear) - dataPrevalenceByMonth <- DatabaseConnector::querySql(conn, queryPrevalenceByMonth) - dataDeviceFrequencyDistribution <- DatabaseConnector::querySql(conn, queryDeviceFrequencyDistribution) + dataAgeAtFirstExposure <- + DatabaseConnector::querySql(conn, queryAgeAtFirstExposure) %>% + dplyr::select(c(1, 2, 3, 4, 5, 6, 7, 8, 9)) + dataDevicesByType <- + DatabaseConnector::querySql(conn, queryDevicesByType) %>% + dplyr::select(c(1, 4, 5)) + dataPrevalenceByGenderAgeYear <- + DatabaseConnector::querySql(conn, queryPrevalenceByGenderAgeYear) %>% + dplyr::select(c(1, 3, 4, 5, 6)) + dataPrevalenceByMonth <- + DatabaseConnector::querySql(conn, queryPrevalenceByMonth) %>% + dplyr::select(c(1, 3, 4)) + dataDeviceFrequencyDistribution <- + DatabaseConnector::querySql(conn, queryDeviceFrequencyDistribution) %>% + dplyr::select(c(1, 3, 4)) - if (nrow(dataDevices) == 0) { - return() - } uniqueConcepts <- data.frame( CONCEPT_ID = unique(dataDevices$CONCEPT_ID), CDM_TABLE_NAME = "DEVICE_EXPOSURE" ) - reports <- + conceptMetadata <- uniqueConcepts %>% dplyr::left_join( ( dataDevices %>% - dplyr::select("CONCEPT_ID", "CONCEPT_NAME", "NUM_PERSONS", "PERCENT_PERSONS", "RECORDS_PER_PERSON") + dplyr::select( + "CONCEPT_ID", + "CONCEPT_NAME", + "NUM_PERSONS", + "PERCENT_PERSONS", + "RECORDS_PER_PERSON" + ) ), by = c("CONCEPT_ID" = "CONCEPT_ID") - ) %>% + ) + + if (outputFormat == "duckdb") { + reports <- list( + concept_metadata = conceptMetadata, + age_at_first_exposure = dataAgeAtFirstExposure, + devices_by_type = dataDevicesByType, + prevalence_by_gender_age_year = dataPrevalenceByGenderAgeYear, + prevalence_by_month = dataPrevalenceByMonth, + device_frequency_distribution = dataDeviceFrequencyDistribution + ) + } else { + reports <- + conceptMetadata %>% dplyr::left_join( ( dataAgeAtFirstExposure %>% - dplyr::select(c(1, 2, 3, 4, 5, 6, 7, 8, 9)) %>% tidyr::nest(AGE_AT_FIRST_EXPOSURE = c(-1)) ), by = c("CONCEPT_ID" = "CONCEPT_ID") @@ -1370,7 +1616,6 @@ generateAODeviceReports <- function(connectionDetails, dataDevices, cdmDatabaseS dplyr::left_join( ( dataDevicesByType %>% - dplyr::select(c(1, 4, 5)) %>% tidyr::nest(DEVICES_BY_TYPE = c(-1)) ), by = c("CONCEPT_ID" = "DEVICE_CONCEPT_ID") @@ -1378,7 +1623,6 @@ generateAODeviceReports <- function(connectionDetails, dataDevices, cdmDatabaseS dplyr::left_join( ( dataPrevalenceByGenderAgeYear %>% - dplyr::select(c(1, 3, 4, 5, 6)) %>% tidyr::nest(PREVALENCE_BY_GENDER_AGE_YEAR = c(-1)) ), by = c("CONCEPT_ID" = "CONCEPT_ID") @@ -1386,7 +1630,6 @@ generateAODeviceReports <- function(connectionDetails, dataDevices, cdmDatabaseS dplyr::left_join( ( dataPrevalenceByMonth %>% - dplyr::select(c(1, 3, 4)) %>% tidyr::nest(PREVALENCE_BY_MONTH = c(-1)) ), by = c("CONCEPT_ID" = "CONCEPT_ID") @@ -1394,16 +1637,16 @@ generateAODeviceReports <- function(connectionDetails, dataDevices, cdmDatabaseS dplyr::left_join( ( dataDeviceFrequencyDistribution %>% - dplyr::select(c(1, 3, 4)) %>% tidyr::nest(DEVICE_FREQUENCY_DISTRIBUTION = c(-1)) ), by = c("CONCEPT_ID" = "CONCEPT_ID") ) %>% dplyr::collect() + } return(list("reports" = reports, "uniqueConcepts" = uniqueConcepts)) } -generateAOConditionReports <- function(connectionDetails, duckdbCon, dataConditions, cdmDatabaseSchema, resultsDatabaseSchema, vocabDatabaseSchema, outputPath) +generateAOConditionReports <- function(connectionDetails, dataConditions, cdmDatabaseSchema, resultsDatabaseSchema, vocabDatabaseSchema, outputFormat) { queryPrevalenceByGenderAgeYear <- SqlRender::loadRenderTranslateSql( sqlFilename = "export/condition/sqlPrevalenceByGenderAgeYear.sql", @@ -1447,31 +1690,57 @@ generateAOConditionReports <- function(connectionDetails, duckdbCon, dataConditi conn <- DatabaseConnector::connect(connectionDetails) on.exit(DatabaseConnector::disconnect(connection = conn)) - dataPrevalenceByGenderAgeYear <- DatabaseConnector::querySql(conn, queryPrevalenceByGenderAgeYear) - dataPrevalenceByMonth <- DatabaseConnector::querySql(conn, queryPrevalenceByMonth) - dataConditionsByType <- DatabaseConnector::querySql(conn, queryConditionsByType) - dataAgeAtFirstDiagnosis <- DatabaseConnector::querySql(conn, queryAgeAtFirstDiagnosis) - + dataPrevalenceByMonth <- + DatabaseConnector::querySql(conn, queryPrevalenceByMonth) %>% + dplyr::select(c(1, 3, 4)) if (nrow(dataPrevalenceByMonth) == 0) { - return() + return(NULL) } + + dataPrevalenceByGenderAgeYear <- + DatabaseConnector::querySql(conn, queryPrevalenceByGenderAgeYear) %>% + dplyr::select(c(1, 3, 4, 5, 6)) + dataConditionsByType <- + DatabaseConnector::querySql(conn, queryConditionsByType) %>% + dplyr::select(c(1, 2, 3)) + dataAgeAtFirstDiagnosis <- + DatabaseConnector::querySql(conn, queryAgeAtFirstDiagnosis) %>% + dplyr::select(c(1, 2, 3, 4, 5, 6, 7, 8, 9)) + uniqueConcepts <- data.frame( CONCEPT_ID = unique(dataPrevalenceByMonth$CONCEPT_ID), CDM_TABLE_NAME = "CONDITION_OCCURRENCE" ) - reports <- + conceptMetadata <- uniqueConcepts %>% dplyr::left_join( ( dataConditions %>% - dplyr::select("CONCEPT_ID", "CONCEPT_NAME", "NUM_PERSONS", "PERCENT_PERSONS", "RECORDS_PER_PERSON") + dplyr::select( + "CONCEPT_ID", + "CONCEPT_NAME", + "NUM_PERSONS", + "PERCENT_PERSONS", + "RECORDS_PER_PERSON" + ) ), by = c("CONCEPT_ID" = "CONCEPT_ID") - ) %>% + ) + + if (outputFormat == "duckdb") { + reports <- list( + concept_metadata = conceptMetadata, + prevalence_by_gender_age_year = dataPrevalenceByGenderAgeYear, + prevalence_by_month = dataPrevalenceByMonth, + conditions_by_type = dataConditionsByType, + age_at_first_diagnosis = dataAgeAtFirstDiagnosis + ) + } else { + reports <- + conceptMetadata %>% dplyr::left_join( ( dataPrevalenceByGenderAgeYear %>% - dplyr::select(c(1, 3, 4, 5, 6)) %>% tidyr::nest(PREVALENCE_BY_GENDER_AGE_YEAR = c(-1)) ), by = c("CONCEPT_ID" = "CONCEPT_ID") @@ -1479,7 +1748,6 @@ generateAOConditionReports <- function(connectionDetails, duckdbCon, dataConditi dplyr::left_join( ( dataPrevalenceByMonth %>% - dplyr::select(c(1, 3, 4)) %>% tidyr::nest(PREVALENCE_BY_MONTH = c(-1)) ), by = c("CONCEPT_ID" = "CONCEPT_ID") @@ -1487,7 +1755,6 @@ generateAOConditionReports <- function(connectionDetails, duckdbCon, dataConditi dplyr::left_join( ( dataConditionsByType %>% - dplyr::select(c(1, 2, 3)) %>% tidyr::nest(CONDITIONS_BY_TYPE = c(-1)) ), by = c("CONCEPT_ID" = "CONDITION_CONCEPT_ID") @@ -1495,17 +1762,21 @@ generateAOConditionReports <- function(connectionDetails, duckdbCon, dataConditi dplyr::left_join( ( dataAgeAtFirstDiagnosis %>% - dplyr::select(c(1, 2, 3, 4, 5, 6, 7, 8, 9)) %>% tidyr::nest(AGE_AT_FIRST_DIAGNOSIS = c(-1)) ), by = c("CONCEPT_ID" = "CONCEPT_ID") ) %>% dplyr::collect() + } return(list("reports" = reports, "uniqueConcepts" = uniqueConcepts)) } -generateAOConditionEraReports <- function(connectionDetails, dataConditionEra, cdmDatabaseSchema, resultsDatabaseSchema, vocabDatabaseSchema, outputPath) +generateAOConditionEraReports <- function(connectionDetails, dataConditionEra, cdmDatabaseSchema, resultsDatabaseSchema, vocabDatabaseSchema, outputFormat) { + if (nrow(dataConditionEra) == 0) { + return(NULL) + } + queryPrevalenceByGenderAgeYear <- SqlRender::loadRenderTranslateSql( sqlFilename = "export/conditionera/sqlPrevalenceByGenderAgeYear.sql", packageName = "Achilles", @@ -1548,31 +1819,53 @@ generateAOConditionEraReports <- function(connectionDetails, dataConditionEra, c conn <- DatabaseConnector::connect(connectionDetails) on.exit(DatabaseConnector::disconnect(connection = conn)) - dataPrevalenceByGenderAgeYear <- DatabaseConnector::querySql(conn, queryPrevalenceByGenderAgeYear) - dataPrevalenceByMonth <- DatabaseConnector::querySql(conn, queryPrevalenceByMonth) - dataLengthOfEra <- DatabaseConnector::querySql(conn, queryLengthOfEra) - dataAgeAtFirstDiagnosis <- DatabaseConnector::querySql(conn, queryAgeAtFirstDiagnosis) + dataPrevalenceByGenderAgeYear <- + DatabaseConnector::querySql(conn, queryPrevalenceByGenderAgeYear) %>% + dplyr::select(c(1, 2, 3, 4, 5)) + dataPrevalenceByMonth <- + DatabaseConnector::querySql(conn, queryPrevalenceByMonth) %>% + dplyr::select(c(1, 2, 3)) + dataLengthOfEra <- + DatabaseConnector::querySql(conn, queryLengthOfEra) %>% + dplyr::select(c(1, 2, 3, 4, 5, 6, 7, 8, 9)) + dataAgeAtFirstDiagnosis <- + DatabaseConnector::querySql(conn, queryAgeAtFirstDiagnosis) %>% + dplyr::select(c(1, 2, 3, 4, 5, 6, 7, 8, 9)) - if (nrow(dataConditionEra) == 0) { - return() - } uniqueConcepts <- data.frame( CONCEPT_ID = unique(dataConditionEra$CONCEPT_ID), CDM_TABLE_NAME = "CONDITION_ERA" ) - reports <- + conceptMetadata <- uniqueConcepts %>% dplyr::left_join( ( dataConditionEra %>% - dplyr::select("CONCEPT_ID", "CONCEPT_NAME", "NUM_PERSONS", "PERCENT_PERSONS", "RECORDS_PER_PERSON") + dplyr::select( + "CONCEPT_ID", + "CONCEPT_NAME", + "NUM_PERSONS", + "PERCENT_PERSONS", + "RECORDS_PER_PERSON" + ) ), by = c("CONCEPT_ID" = "CONCEPT_ID") - ) %>% + ) + + if (outputFormat == "duckdb") { + reports <- list( + concept_metadata = conceptMetadata, + age_at_first_exposure = dataAgeAtFirstDiagnosis, + prevalence_by_gender_age_year = dataPrevalenceByGenderAgeYear, + prevalence_by_month = dataPrevalenceByMonth, + length_of_era = dataLengthOfEra + ) + } else { + reports <- + conceptMetadata %>% dplyr::left_join( ( dataAgeAtFirstDiagnosis %>% - dplyr::select(c(1, 2, 3, 4, 5, 6, 7, 8, 9)) %>% tidyr::nest(AGE_AT_FIRST_EXPOSURE = c(-1)) ), by = c("CONCEPT_ID" = "CONCEPT_ID") @@ -1580,7 +1873,6 @@ generateAOConditionEraReports <- function(connectionDetails, dataConditionEra, c dplyr::left_join( ( dataPrevalenceByGenderAgeYear %>% - dplyr::select(c(1, 2, 3, 4, 5)) %>% tidyr::nest(PREVALENCE_BY_GENDER_AGE_YEAR = c(-1)) ), by = c("CONCEPT_ID" = "CONCEPT_ID") @@ -1588,7 +1880,6 @@ generateAOConditionEraReports <- function(connectionDetails, dataConditionEra, c dplyr::left_join( ( dataPrevalenceByMonth %>% - dplyr::select(c(1, 2, 3)) %>% tidyr::nest(PREVALENCE_BY_MONTH = c(-1)) ), by = c("CONCEPT_ID" = "CONCEPT_ID") @@ -1596,12 +1887,12 @@ generateAOConditionEraReports <- function(connectionDetails, dataConditionEra, c dplyr::left_join( ( dataLengthOfEra %>% - dplyr::select(c(1, 2, 3, 4, 5, 6, 7, 8, 9)) %>% tidyr::nest(LENGTH_OF_ERA = c(-1)) ), by = c("CONCEPT_ID" = "CONCEPT_ID") ) %>% dplyr::collect() + } return(list("reports" = reports, "uniqueConcepts" = uniqueConcepts)) } @@ -2078,84 +2369,265 @@ exportToAres <- function( columnsToNormalize <- c("CONCEPT_NAME", "NUM_PERSONS", "PERCENT_PERSONS", "RECORDS_PER_PERSON") writeLines("Generating visit reports") - currentTable <- generateAOVisitReports(connectionDetails, cdmDatabaseSchema, resultsDatabaseSchema, vocabDatabaseSchema, sourceOutputPath) - columnsToConvertToDataFrame <- c("PREVALENCE_BY_GENDER_AGE_YEAR", "PREVALENCE_BY_MONTH", "VISIT_DURATION_BY_TYPE", "AGE_AT_FIRST_OCCURRENCE") - dir <- "/concepts/visit_occurrence" - lapply(currentTable$uniqueConcepts$CONCEPT_ID, function(concept_id, ...) { - processAndExportConceptData(concept_id, ...) - }, duckdbCon, currentTable$reports, sourceOutputPath, outputFormat, columnsToNormalize, columnsToConvertToDataFrame, dir, "visit_occurrence", conceptsSchema) + conceptData <- generateAOVisitReports( + connectionDetails, + cdmDatabaseSchema, + resultsDatabaseSchema, + vocabDatabaseSchema, + outputFormat + ) + processAndExportConceptData( + duckdbCon = duckdbCon, + conceptData = conceptData, + outputPath = sourceOutputPath, + outputFormat = outputFormat, + columnsToNormalize = columnsToNormalize, + columnsToConvertToDataFrame = c( + "PREVALENCE_BY_GENDER_AGE_YEAR", + "PREVALENCE_BY_MONTH", + "VISIT_DURATION_BY_TYPE", + "AGE_AT_FIRST_OCCURRENCE" + ), + domain = "visit_occurrence", + schema = conceptsSchema + ) writeLines("Generating visit_detail reports") - currentTable <- generateAOVisitDetailReports(connectionDetails, cdmDatabaseSchema, resultsDatabaseSchema, vocabDatabaseSchema, sourceOutputPath) - columnsToConvertToDataFrame <- c("PREVALENCE_BY_GENDER_AGE_YEAR", "PREVALENCE_BY_MONTH", "VISIT_DETAIL_DURATION_BY_TYPE", "AGE_AT_FIRST_OCCURRENCE") - dir <- "/concepts/visit_detail" - lapply(currentTable$uniqueConcepts$CONCEPT_ID, function(concept_id, ...) { - processAndExportConceptData(concept_id, ...) - }, duckdbCon, currentTable$reports, sourceOutputPath, outputFormat, columnsToNormalize, columnsToConvertToDataFrame, dir, "visit_detail", conceptsSchema) + conceptData <- generateAOVisitDetailReports( + connectionDetails, + cdmDatabaseSchema, + resultsDatabaseSchema, + vocabDatabaseSchema, + outputFormat + ) + processAndExportConceptData( + duckdbCon = duckdbCon, + conceptData = conceptData, + outputPath = sourceOutputPath, + outputFormat = outputFormat, + columnsToNormalize = columnsToNormalize, + columnsToConvertToDataFrame = c( + "PREVALENCE_BY_GENDER_AGE_YEAR", + "PREVALENCE_BY_MONTH", + "VISIT_DETAIL_DURATION_BY_TYPE", + "AGE_AT_FIRST_OCCURRENCE" + ), + domain = "visit_detail", + schema = conceptsSchema + ) writeLines("Generating Measurement reports") - currentTable <- generateAOMeasurementReports(connectionDetails, dataMeasurements, cdmDatabaseSchema, resultsDatabaseSchema, vocabDatabaseSchema, sourceOutputPath) - columnsToConvertToDataFrame <- c("PREVALENCE_BY_GENDER_AGE_YEAR", "PREVALENCE_BY_MONTH", "FREQUENCY_DISTRIBUTION", "MEASUREMENTS_BY_TYPE", "AGE_AT_FIRST_OCCURRENCE", "RECORDS_BY_UNIT", "MEASUREMENT_VALUE_DISTRIBUTION", "LOWER_LIMIT_DISTRIBUTION", "UPPER_LIMIT_DISTRIBUTION", "VALUES_RELATIVE_TO_NORM") - dir <- "/concepts/measurement" - lapply(currentTable$uniqueConcepts$CONCEPT_ID, function(concept_id, ...) { - processAndExportConceptData(concept_id, ...) - }, duckdbCon, currentTable$reports, sourceOutputPath, outputFormat, columnsToNormalize, columnsToConvertToDataFrame, dir, "measurement", conceptsSchema) + conceptData <- generateAOMeasurementReports( + connectionDetails, + dataMeasurements, + cdmDatabaseSchema, + resultsDatabaseSchema, + vocabDatabaseSchema, + outputFormat + ) + processAndExportConceptData( + duckdbCon = duckdbCon, + conceptData = conceptData, + outputPath = sourceOutputPath, + outputFormat = outputFormat, + columnsToNormalize = columnsToNormalize, + columnsToConvertToDataFrame = c( + "PREVALENCE_BY_GENDER_AGE_YEAR", + "PREVALENCE_BY_MONTH", + "FREQUENCY_DISTRIBUTION", + "MEASUREMENTS_BY_TYPE", + "AGE_AT_FIRST_OCCURRENCE", + "RECORDS_BY_UNIT", + "MEASUREMENT_VALUE_DISTRIBUTION", + "LOWER_LIMIT_DISTRIBUTION", + "UPPER_LIMIT_DISTRIBUTION", + "VALUES_RELATIVE_TO_NORM" + ), + domain = "measurement", + schema = conceptsSchema + ) writeLines("Generating condition reports") - currentTable <- generateAOConditionReports(connectionDetails, duckdbCon, dataConditions, cdmDatabaseSchema, resultsDatabaseSchema, vocabDatabaseSchema, sourceOutputPath) - columnsToConvertToDataFrame <- c("PREVALENCE_BY_GENDER_AGE_YEAR", "PREVALENCE_BY_MONTH", "CONDITIONS_BY_TYPE", "AGE_AT_FIRST_DIAGNOSIS") - dir <- "/concepts/condition_occurrence" - lapply(currentTable$uniqueConcepts$CONCEPT_ID, function(concept_id, ...) { - processAndExportConceptData(concept_id, ...) - }, duckdbCon, currentTable$reports, sourceOutputPath, outputFormat, columnsToNormalize, columnsToConvertToDataFrame, dir, "condition_occurrence", conceptsSchema) + conceptData <- generateAOConditionReports( + connectionDetails, + dataConditions, + cdmDatabaseSchema, + resultsDatabaseSchema, + vocabDatabaseSchema, + outputFormat + ) + processAndExportConceptData( + duckdbCon = duckdbCon, + conceptData = conceptData, + outputPath = sourceOutputPath, + outputFormat = outputFormat, + columnsToNormalize = columnsToNormalize, + columnsToConvertToDataFrame = c( + "PREVALENCE_BY_GENDER_AGE_YEAR", + "PREVALENCE_BY_MONTH", + "CONDITIONS_BY_TYPE", + "AGE_AT_FIRST_DIAGNOSIS" + ), + domain = "condition_occurrence", + schema = conceptsSchema + ) writeLines("Generating condition era reports") - currentTable <- generateAOConditionEraReports(connectionDetails, dataConditionEra, cdmDatabaseSchema, resultsDatabaseSchema, vocabDatabaseSchema, sourceOutputPath) - columnsToConvertToDataFrame <- c("AGE_AT_FIRST_EXPOSURE", "PREVALENCE_BY_GENDER_AGE_YEAR", "PREVALENCE_BY_MONTH", "LENGTH_OF_ERA") - dir <- "/concepts/condition_era" - lapply(currentTable$uniqueConcepts$CONCEPT_ID, function(concept_id, ...) { - processAndExportConceptData(concept_id, ...) - }, duckdbCon, currentTable$reports, sourceOutputPath, outputFormat, columnsToNormalize, columnsToConvertToDataFrame, dir, "condition_era", conceptsSchema) + conceptData <- generateAOConditionEraReports( + connectionDetails, + dataConditionEra, + cdmDatabaseSchema, + resultsDatabaseSchema, + vocabDatabaseSchema, + outputFormat + ) + processAndExportConceptData( + duckdbCon = duckdbCon, + conceptData = conceptData, + outputPath = sourceOutputPath, + outputFormat = outputFormat, + columnsToNormalize = columnsToNormalize, + columnsToConvertToDataFrame = c( + "AGE_AT_FIRST_EXPOSURE", + "PREVALENCE_BY_GENDER_AGE_YEAR", + "PREVALENCE_BY_MONTH", + "LENGTH_OF_ERA" + ), + domain = "condition_era", + schema = conceptsSchema + ) writeLines("Generating drug reports") - currentTable <- generateAODrugReports(connectionDetails, dataDrugs, cdmDatabaseSchema, resultsDatabaseSchema, vocabDatabaseSchema, sourceOutputPath) - columnsToConvertToDataFrame <- c("AGE_AT_FIRST_EXPOSURE", "DAYS_SUPPLY_DISTRIBUTION", "DRUGS_BY_TYPE", "PREVALENCE_BY_GENDER_AGE_YEAR", "PREVALENCE_BY_MONTH", "DRUG_FREQUENCY_DISTRIBUTION", "QUANTITY_DISTRIBUTION", "REFILLS_DISTRIBUTION") - dir <- "/concepts/drug_exposure" - lapply(currentTable$uniqueConcepts$CONCEPT_ID, function(concept_id, ...) { - processAndExportConceptData(concept_id, ...) - }, duckdbCon, currentTable$reports, sourceOutputPath, outputFormat, columnsToNormalize, columnsToConvertToDataFrame, dir, "drug_exposure", conceptsSchema) + conceptData <- generateAODrugReports( + connectionDetails, + dataDrugs, + cdmDatabaseSchema, + resultsDatabaseSchema, + vocabDatabaseSchema, + outputFormat + ) + processAndExportConceptData( + duckdbCon = duckdbCon, + conceptData = conceptData, + outputPath = sourceOutputPath, + outputFormat = outputFormat, + columnsToNormalize = columnsToNormalize, + columnsToConvertToDataFrame = c( + "AGE_AT_FIRST_EXPOSURE", + "DAYS_SUPPLY_DISTRIBUTION", + "DRUGS_BY_TYPE", + "PREVALENCE_BY_GENDER_AGE_YEAR", + "PREVALENCE_BY_MONTH", + "DRUG_FREQUENCY_DISTRIBUTION", + "QUANTITY_DISTRIBUTION", + "REFILLS_DISTRIBUTION" + ), + domain = "drug_exposure", + schema = conceptsSchema + ) writeLines("Generating device exposure reports") - currentTable <- generateAODeviceReports(connectionDetails, dataDevices, cdmDatabaseSchema, resultsDatabaseSchema, vocabDatabaseSchema, sourceOutputPath) - columnsToConvertToDataFrame <- c("AGE_AT_FIRST_EXPOSURE", "DEVICES_BY_TYPE", "PREVALENCE_BY_GENDER_AGE_YEAR", "PREVALENCE_BY_MONTH", "DEVICE_FREQUENCY_DISTRIBUTION") - dir <- "/concepts/device_exposure" - lapply(currentTable$uniqueConcepts$CONCEPT_ID, function(concept_id, ...) { - processAndExportConceptData(concept_id, ...) - }, duckdbCon, currentTable$reports, sourceOutputPath, outputFormat, columnsToNormalize, columnsToConvertToDataFrame, dir, "device_exposure", conceptsSchema) + conceptData <- generateAODeviceReports( + connectionDetails, + dataDevices, + cdmDatabaseSchema, + resultsDatabaseSchema, + vocabDatabaseSchema, + outputFormat + ) + processAndExportConceptData( + duckdbCon = duckdbCon, + conceptData = conceptData, + outputPath = sourceOutputPath, + outputFormat = outputFormat, + columnsToNormalize = columnsToNormalize, + columnsToConvertToDataFrame = c( + "AGE_AT_FIRST_EXPOSURE", + "DEVICES_BY_TYPE", + "PREVALENCE_BY_GENDER_AGE_YEAR", + "PREVALENCE_BY_MONTH", + "DEVICE_FREQUENCY_DISTRIBUTION" + ), + domain = "device_exposure", + schema = conceptsSchema + ) writeLines("Generating drug era reports") - currentTable <- generateAODrugEraReports(connectionDetails, dataDrugEra, cdmDatabaseSchema, resultsDatabaseSchema, vocabDatabaseSchema, sourceOutputPath) - columnsToConvertToDataFrame <- c("AGE_AT_FIRST_EXPOSURE", "PREVALENCE_BY_GENDER_AGE_YEAR", "PREVALENCE_BY_MONTH", "LENGTH_OF_ERA") - dir <- "/concepts/procedure_occurrence" - lapply(currentTable$uniqueConcepts$CONCEPT_ID, function(concept_id, ...) { - processAndExportConceptData(concept_id, ...) - }, duckdbCon, currentTable$reports, sourceOutputPath, outputFormat, columnsToNormalize, columnsToConvertToDataFrame, dir, "drug_era", conceptsSchema) + conceptData <- generateAODrugEraReports( + connectionDetails, + dataDrugEra, + cdmDatabaseSchema, + resultsDatabaseSchema, + vocabDatabaseSchema, + outputFormat + ) + processAndExportConceptData( + duckdbCon = duckdbCon, + conceptData = conceptData, + outputPath = sourceOutputPath, + outputFormat = outputFormat, + columnsToNormalize = columnsToNormalize, + columnsToConvertToDataFrame = c( + "AGE_AT_FIRST_EXPOSURE", + "PREVALENCE_BY_GENDER_AGE_YEAR", + "PREVALENCE_BY_MONTH", + "LENGTH_OF_ERA" + ), + domain = "drug_era", + schema = conceptsSchema + ) writeLines("Generating procedure reports") - currentTable <- generateAOProcedureReports(connectionDetails, dataProcedures, cdmDatabaseSchema, resultsDatabaseSchema, vocabDatabaseSchema, sourceOutputPath) - columnsToConvertToDataFrame <- c('PREVALENCE_BY_GENDER_AGE_YEAR', 'PREVALENCE_BY_MONTH', 'PROCEDURE_FREQUENCY_DISTRIBUTION', 'PROCEDURES_BY_TYPE', 'AGE_AT_FIRST_OCCURRENCE') - dir <- "/concepts/procedure_occurrence" - lapply(currentTable$uniqueConcepts$CONCEPT_ID, function(concept_id, ...) { - processAndExportConceptData(concept_id, ...) - }, duckdbCon, currentTable$reports, sourceOutputPath, outputFormat, columnsToNormalize, columnsToConvertToDataFrame, dir, "procedure_occurrence", conceptsSchema) + conceptData <- generateAOProcedureReports( + connectionDetails, + dataProcedures, + cdmDatabaseSchema, + resultsDatabaseSchema, + vocabDatabaseSchema, + outputFormat + ) + processAndExportConceptData( + duckdbCon = duckdbCon, + conceptData = conceptData, + outputPath = sourceOutputPath, + outputFormat = outputFormat, + columnsToNormalize = columnsToNormalize, + columnsToConvertToDataFrame = c( + "PREVALENCE_BY_GENDER_AGE_YEAR", + "PREVALENCE_BY_MONTH", + "PROCEDURE_FREQUENCY_DISTRIBUTION", + "PROCEDURES_BY_TYPE", + "AGE_AT_FIRST_OCCURRENCE" + ), + domain = "procedure_occurrence", + schema = conceptsSchema + ) writeLines("Generating Observation reports") - currentTable <- generateAOObservationReports(connectionDetails, dataObservations, cdmDatabaseSchema, resultsDatabaseSchema, vocabDatabaseSchema, sourceOutputPath) - columnsToConvertToDataFrame <- c("PREVALENCE_BY_GENDER_AGE_YEAR", "PREVALENCE_BY_MONTH", "OBS_FREQUENCY_DISTRIBUTION", "OBSERVATIONS_BY_TYPE", "AGE_AT_FIRST_OCCURRENCE") - dir <- "/concepts/observation" - lapply(currentTable$uniqueConcepts$CONCEPT_ID, function(concept_id, ...) { - processAndExportConceptData(concept_id, ...) - }, duckdbCon, currentTable$reports, sourceOutputPath, outputFormat, columnsToNormalize, columnsToConvertToDataFrame, dir, "observation", conceptsSchema) + conceptData <- generateAOObservationReports( + connectionDetails, + dataObservations, + cdmDatabaseSchema, + resultsDatabaseSchema, + vocabDatabaseSchema, + outputFormat + ) + processAndExportConceptData( + duckdbCon = duckdbCon, + conceptData = conceptData, + outputPath = sourceOutputPath, + outputFormat = outputFormat, + columnsToNormalize = columnsToNormalize, + columnsToConvertToDataFrame = c( + "PREVALENCE_BY_GENDER_AGE_YEAR", + "PREVALENCE_BY_MONTH", + "OBS_FREQUENCY_DISTRIBUTION", + "OBSERVATIONS_BY_TYPE", + "AGE_AT_FIRST_OCCURRENCE" + ), + domain = "observation", + schema = conceptsSchema + ) } if (length(reports) == 0 || (length(reports) > 0 && "person" %in% reports)) { @@ -2165,4 +2637,3 @@ exportToAres <- function( write(jsonOutput, file = paste0(sourceOutputPath, "/person.json")) } } - diff --git a/man/exportToAres.Rd b/man/exportToAres.Rd index 78df96e2..f07dbd57 100644 --- a/man/exportToAres.Rd +++ b/man/exportToAres.Rd @@ -10,7 +10,7 @@ exportToAres( resultsDatabaseSchema, vocabDatabaseSchema, outputPath, - outputFormat = NULL, + outputFormat = "default", reports = c() ) } @@ -25,7 +25,7 @@ exportToAres( \item{outputPath}{A folder location to save the JSON files. Default is current working folder} -\item{outputFormat}{Unassigned for default, alternatively "duckdb" to use parquet and duckdb formats.} +\item{outputFormat}{default or alternatively "duckdb" to use parquet and duckdb formats.} \item{reports}{vector of reports to run, c() defaults to all reports From bf47864d079e8f1795d353bdd4837e37397b774a Mon Sep 17 00:00:00 2001 From: Gennadiy Anisimov Date: Thu, 21 Mar 2024 15:25:09 +0300 Subject: [PATCH 7/7] Fix R check warnings/notes --- .Rbuildignore | 1 + DESCRIPTION | 1 + R/exportToAres.R | 72 ++++++++++++++++++++++++------------------------ 3 files changed, 38 insertions(+), 36 deletions(-) diff --git a/.Rbuildignore b/.Rbuildignore index 368098ed..d083d472 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -8,3 +8,4 @@ deploy.sh _pkgdown.yml ^CRAN-SUBMISSION$ ^cran-comments\.md$ +^.lintr \ No newline at end of file diff --git a/DESCRIPTION b/DESCRIPTION index 1c7359c4..59c5a5ba 100755 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -28,6 +28,7 @@ Depends: R (>= 4.0.0) Imports: DBI, + duckdb, SqlRender (>= 1.6.0), dplyr, jsonlite, diff --git a/R/exportToAres.R b/R/exportToAres.R index 3c3bf4a7..eaa618c6 100644 --- a/R/exportToAres.R +++ b/R/exportToAres.R @@ -333,7 +333,7 @@ generateAOAchillesPerformanceReport <- function(connection, cdmDatabaseSchema, r queryAchillesPerformance <- SqlRender::loadRenderTranslateSql(sqlFilename = "export/performance/sqlAchillesPerformance.sql", packageName = "Achilles", - dbms = connectionDetails$dbms, + dbms = connection@dbms, warnOnMissingParameters = FALSE, cdm_database_schema = cdmDatabaseSchema, results_database_schema = resultsDatabaseSchema, @@ -352,7 +352,7 @@ generateAODeathReport <- function(connection, cdmDatabaseSchema, resultsDatabase queryPrevalenceByGenderAgeYear <- SqlRender::loadRenderTranslateSql( sqlFilename = "export/death/sqlPrevalenceByGenderAgeYear.sql", packageName = "Achilles", - dbms = connectionDetails$dbms, + dbms = connection@dbms, results_database_schema = resultsDatabaseSchema, vocab_database_schema = vocabDatabaseSchema ) @@ -360,14 +360,14 @@ generateAODeathReport <- function(connection, cdmDatabaseSchema, resultsDatabase queryPrevalenceByMonth <- SqlRender::loadRenderTranslateSql( sqlFilename = "export/death/sqlPrevalenceByMonth.sql", packageName = "Achilles", - dbms = connectionDetails$dbms, + dbms = connection@dbms, results_database_schema = resultsDatabaseSchema ) queryDeathByType <- SqlRender::loadRenderTranslateSql( sqlFilename = "export/death/sqlDeathByType.sql", packageName = "Achilles", - dbms = connectionDetails$dbms, + dbms = connection@dbms, results_database_schema = resultsDatabaseSchema, vocab_database_schema = vocabDatabaseSchema ) @@ -375,7 +375,7 @@ generateAODeathReport <- function(connection, cdmDatabaseSchema, resultsDatabase queryAgeAtDeath <- SqlRender::loadRenderTranslateSql( sqlFilename = "export/death/sqlAgeAtDeath.sql", packageName = "Achilles", - dbms = connectionDetails$dbms, + dbms = connection@dbms, results_database_schema = resultsDatabaseSchema, vocab_database_schema = vocabDatabaseSchema ) @@ -398,7 +398,7 @@ generateAOObservationPeriodReport <- function(connection, cdmDatabaseSchema, res renderedSql <- SqlRender::loadRenderTranslateSql( sqlFilename = "export/observationperiod/ageatfirst.sql", packageName = "Achilles", - dbms = connectionDetails$dbms, + dbms = connection@dbms, results_database_schema = resultsDatabaseSchema ) ageAtFirstObservationData <- DatabaseConnector::querySql(connection, renderedSql) @@ -407,7 +407,7 @@ generateAOObservationPeriodReport <- function(connection, cdmDatabaseSchema, res renderedSql <- SqlRender::loadRenderTranslateSql( sqlFilename = "export/observationperiod/agebygender.sql", packageName = "Achilles", - dbms = connectionDetails$dbms, + dbms = connection@dbms, results_database_schema = resultsDatabaseSchema, vocab_database_schema = vocabDatabaseSchema ) @@ -418,7 +418,7 @@ generateAOObservationPeriodReport <- function(connection, cdmDatabaseSchema, res renderedSql <- SqlRender::loadRenderTranslateSql( sqlFilename = "export/observationperiod/observationlength_stats.sql", packageName = "Achilles", - dbms = connectionDetails$dbms, + dbms = connection@dbms, results_database_schema = resultsDatabaseSchema ) observationLengthStats <- DatabaseConnector::querySql(connection, renderedSql) @@ -430,7 +430,7 @@ generateAOObservationPeriodReport <- function(connection, cdmDatabaseSchema, res renderedSql <- SqlRender::loadRenderTranslateSql( sqlFilename = "export/observationperiod/observationlength_data.sql", packageName = "Achilles", - dbms = connectionDetails$dbms, + dbms = connection@dbms, results_database_schema = resultsDatabaseSchema ) observationLengthData <- DatabaseConnector::querySql(connection, renderedSql) @@ -439,7 +439,7 @@ generateAOObservationPeriodReport <- function(connection, cdmDatabaseSchema, res renderedSql <- SqlRender::loadRenderTranslateSql( sqlFilename = "export/observationperiod/cumulativeduration.sql", packageName = "Achilles", - dbms = connectionDetails$dbms, + dbms = connection@dbms, results_database_schema = resultsDatabaseSchema ) cumulativeDurationData <- DatabaseConnector::querySql(connection, renderedSql) @@ -451,7 +451,7 @@ generateAOObservationPeriodReport <- function(connection, cdmDatabaseSchema, res renderedSql <- SqlRender::loadRenderTranslateSql( sqlFilename = "export/observationperiod/observationlengthbygender.sql", packageName = "Achilles", - dbms = connectionDetails$dbms, + dbms = connection@dbms, results_database_schema = resultsDatabaseSchema, vocab_database_schema = vocabDatabaseSchema ) @@ -469,7 +469,7 @@ generateAOObservationPeriodReport <- function(connection, cdmDatabaseSchema, res renderedSql <- SqlRender::loadRenderTranslateSql( sqlFilename = "export/observationperiod/observationlengthbyage.sql", packageName = "Achilles", - dbms = connectionDetails$dbms, + dbms = connection@dbms, results_database_schema = resultsDatabaseSchema ) opLengthByAgeData <- DatabaseConnector::querySql(connection, renderedSql) @@ -486,7 +486,7 @@ generateAOObservationPeriodReport <- function(connection, cdmDatabaseSchema, res renderedSql <- SqlRender::loadRenderTranslateSql( sqlFilename = "export/observationperiod/observedbyyear_stats.sql", packageName = "Achilles", - dbms = connectionDetails$dbms, + dbms = connection@dbms, results_database_schema = resultsDatabaseSchema ) observedByYearStats <- DatabaseConnector::querySql(connection, renderedSql) @@ -498,7 +498,7 @@ generateAOObservationPeriodReport <- function(connection, cdmDatabaseSchema, res renderedSql <- SqlRender::loadRenderTranslateSql( sqlFilename = "export/observationperiod/observedbyyear_data.sql", packageName = "Achilles", - dbms = connectionDetails$dbms, + dbms = connection@dbms, results_database_schema = resultsDatabaseSchema ) observedByYearData <- DatabaseConnector::querySql(connection, renderedSql) @@ -509,7 +509,7 @@ generateAOObservationPeriodReport <- function(connection, cdmDatabaseSchema, res renderedSql <- SqlRender::loadRenderTranslateSql( sqlFilename = "export/observationperiod/observedbymonth.sql", packageName = "Achilles", - dbms = connectionDetails$dbms, + dbms = connection@dbms, results_database_schema = resultsDatabaseSchema ) observedByMonth <- DatabaseConnector::querySql(connection, renderedSql) @@ -518,7 +518,7 @@ generateAOObservationPeriodReport <- function(connection, cdmDatabaseSchema, res renderedSql <- SqlRender::loadRenderTranslateSql( sqlFilename = "export/observationperiod/periodsperperson.sql", packageName = "Achilles", - dbms = connectionDetails$dbms, + dbms = connection@dbms, results_database_schema = resultsDatabaseSchema ) personPeriodsData <- DatabaseConnector::querySql(connection, renderedSql) @@ -793,7 +793,7 @@ generateAOMetadataReport <- function(connection, cdmDatabaseSchema, outputPath) queryMetadata <- SqlRender::loadRenderTranslateSql( sqlFilename = "export/metadata/sqlMetadata.sql", packageName = "Achilles", - dbms = connectionDetails$dbms, + dbms = connection@dbms, cdm_database_schema = cdmDatabaseSchema ) dataMetadata <- DatabaseConnector::querySql(connection, queryMetadata) @@ -943,7 +943,7 @@ generateAOCdmSourceReport <- function(connection, cdmDatabaseSchema, outputPath) queryCdmSource <- SqlRender::loadRenderTranslateSql( sqlFilename = "export/metadata/sqlCdmSource.sql", packageName = "Achilles", - dbms = connectionDetails$dbms, + dbms = connection@dbms, cdm_database_schema = cdmDatabaseSchema ) @@ -1901,7 +1901,7 @@ generateDataDensityTotal <- function(connection, resultsDatabaseSchema) { renderedSql <- SqlRender::loadRenderTranslateSql( sqlFilename = "export/datadensity/totalrecords.sql", packageName = "Achilles", - dbms = connectionDetails$dbms, + dbms = connection@dbms, results_database_schema = resultsDatabaseSchema ) @@ -1919,7 +1919,7 @@ generateDataDensityRecordsPerPerson <- function(connection, resultsDatabaseSchem renderedSql <- SqlRender::loadRenderTranslateSql( sqlFilename = "export/datadensity/recordsperperson.sql", packageName = "Achilles", - dbms = connectionDetails$dbms, + dbms = connection@dbms, results_database_schema = resultsDatabaseSchema ) @@ -1934,7 +1934,7 @@ generateDataDensityConceptsPerPerson <- function(connection, resultsDatabaseSche renderedSql <- SqlRender::loadRenderTranslateSql( sqlFilename = "export/datadensity/conceptsperperson.sql", packageName = "Achilles", - dbms = connectionDetails$dbms, + dbms = connection@dbms, results_database_schema = resultsDatabaseSchema ) conceptsPerPerson <- DatabaseConnector::querySql(connection, renderedSql) @@ -1947,7 +1947,7 @@ generateDataDensityDomainsPerPerson <- function(connection, resultsDatabaseSchem renderedSql <- SqlRender::loadRenderTranslateSql( sqlFilename = "export/datadensity/domainsperperson.sql", packageName = "Achilles", - dbms = connectionDetails$dbms, + dbms = connection@dbms, results_database_schema = resultsDatabaseSchema ) domainsPerPerson <- DatabaseConnector::querySql(connection, renderedSql) @@ -1961,7 +1961,7 @@ generateDomainSummaryConditions <- function(connection, resultsDatabaseSchema, v queryConditions <- SqlRender::loadRenderTranslateSql( sqlFilename = "export/condition/sqlConditionTable.sql", packageName = "Achilles", - dbms = connectionDetails$dbms, + dbms = connection@dbms, results_database_schema = resultsDatabaseSchema, vocab_database_schema = vocabDatabaseSchema ) @@ -1979,7 +1979,7 @@ generateDomainSummaryConditionEras <- function(connection, resultsDatabaseSchema queryConditionEra <- SqlRender::loadRenderTranslateSql( sqlFilename = "export/conditionera/sqlConditionEraTable.sql", packageName = "Achilles", - dbms = connectionDetails$dbms, + dbms = connection@dbms, results_database_schema = resultsDatabaseSchema, vocab_database_schema = vocabDatabaseSchema ) @@ -1996,7 +1996,7 @@ generateDomainSummaryDrugs <- function(connection, resultsDatabaseSchema, vocabD queryDrugs <- SqlRender::loadRenderTranslateSql( sqlFilename = "export/drug/sqlDrugTable.sql", packageName = "Achilles", - dbms = connectionDetails$dbms, + dbms = connection@dbms, results_database_schema = resultsDatabaseSchema, vocab_database_schema = vocabDatabaseSchema ) @@ -2013,7 +2013,7 @@ generateDomainDrugStratification <- function(connection, resultsDatabaseSchema, queryDrugType <- SqlRender::loadRenderTranslateSql( sqlFilename = "export/drug/sqlDomainDrugStratification.sql", packageName = "Achilles", - dbms = connectionDetails$dbms, + dbms = connection@dbms, results_database_schema = resultsDatabaseSchema, vocab_database_schema = vocabDatabaseSchema ) @@ -2026,7 +2026,7 @@ generateDomainSummaryDrugEra <- function(connection, resultsDatabaseSchema, voca queryDrugEra <- SqlRender::loadRenderTranslateSql( sqlFilename = "export/drugera/sqlDrugEraTable.sql", packageName = "Achilles", - dbms = connectionDetails$dbms, + dbms = connection@dbms, results_database_schema = resultsDatabaseSchema, vocab_database_schema = vocabDatabaseSchema ) @@ -2043,7 +2043,7 @@ generateDomainSummaryMeasurements <- function(connection, resultsDatabaseSchema, queryMeasurements <- SqlRender::loadRenderTranslateSql( sqlFilename = "export/measurement/sqlMeasurementTable.sql", packageName = "Achilles", - dbms = connectionDetails$dbms, + dbms = connection@dbms, results_database_schema = resultsDatabaseSchema, vocab_database_schema = vocabDatabaseSchema ) @@ -2060,7 +2060,7 @@ generateDomainSummaryObservations <- function(connection, resultsDatabaseSchema, queryObservations <- SqlRender::loadRenderTranslateSql( sqlFilename = "export/observation/sqlObservationTable.sql", packageName = "Achilles", - dbms = connectionDetails$dbms, + dbms = connection@dbms, results_database_schema = resultsDatabaseSchema, vocab_database_schema = vocabDatabaseSchema ) @@ -2077,7 +2077,7 @@ generateDomainSummaryVisitDetails <- function(connection, resultsDatabaseSchema, queryVisitDetails <- SqlRender::loadRenderTranslateSql( sqlFilename = "export/visitdetail/sqlVisitDetailTreemap.sql", packageName = "Achilles", - dbms = connectionDetails$dbms, + dbms = connection@dbms, results_database_schema = resultsDatabaseSchema, vocab_database_schema = vocabDatabaseSchema ) @@ -2095,7 +2095,7 @@ generateDomainSummaryVisits <- function(connection, resultsDatabaseSchema, vocab queryVisits <- SqlRender::loadRenderTranslateSql( sqlFilename = "export/visit/sqlVisitTreemap.sql", packageName = "Achilles", - dbms = connectionDetails$dbms, + dbms = connection@dbms, results_database_schema = resultsDatabaseSchema, vocab_database_schema = vocabDatabaseSchema ) @@ -2113,7 +2113,7 @@ generateDomainVisitStratification <- function(connection, resultsDatabaseSchema, queryVisits <- SqlRender::loadRenderTranslateSql( sqlFilename = "export/visit/sqlDomainVisitStratification.sql", packageName = "Achilles", - dbms = connectionDetails$dbms, + dbms = connection@dbms, results_database_schema = resultsDatabaseSchema, vocab_database_schema = vocabDatabaseSchema ) @@ -2126,7 +2126,7 @@ generateDomainSummaryProcedures <- function(connection, resultsDatabaseSchema, v queryProcedures <- SqlRender::loadRenderTranslateSql( sqlFilename = "export/procedure/sqlProcedureTable.sql", packageName = "Achilles", - dbms = connectionDetails$dbms, + dbms = connection@dbms, results_database_schema = resultsDatabaseSchema, vocab_database_schema = vocabDatabaseSchema ) @@ -2143,7 +2143,7 @@ generateDomainSummaryDevices <- function(connection, resultsDatabaseSchema, voca queryDevices <- SqlRender::loadRenderTranslateSql( sqlFilename = "export/device/sqlDeviceTable.sql", packageName = "Achilles", - dbms = connectionDetails$dbms, + dbms = connection@dbms, results_database_schema = resultsDatabaseSchema, vocab_database_schema = vocabDatabaseSchema ) @@ -2160,7 +2160,7 @@ generateDomainSummaryProvider <- function(connection, resultsDatabaseSchema, voc queryProviders <- SqlRender::loadRenderTranslateSql( sqlFilename = "export/provider/sqlProviderSpecialty.sql", packageName = "Achilles", - dbms = connectionDetails$dbms, + dbms = connection@dbms, results_database_schema = resultsDatabaseSchema, vocab_database_schema = vocabDatabaseSchema ) @@ -2176,7 +2176,7 @@ generateQualityCompleteness <- function(connection, resultsDatabaseSchema) { queryCompleteness <- SqlRender::loadRenderTranslateSql( sqlFilename = "export/quality/sqlCompletenessTable.sql", packageName = "Achilles", - dbms = connectionDetails$dbms, + dbms = connection@dbms, results_database_schema = resultsDatabaseSchema ) dataCompleteness <- DatabaseConnector::querySql(connection, queryCompleteness)