In [38]:
library(httr)
library(data.table)
library(dplyr)
library(jsonlite)
getwd()
CTS_V2_API_KEY <- Sys.getenv("CTS_V2_API_KEY")


#### Setup


In [39]:
PAGE_SIZE <- 50
INCLUDE_FIELDS <- c(
  "nct_id",
  "diseases",
  "current_trial_status",
  "primary_purpose",
  "sites.recruitment_status"
)

poc_disease_search <- function(
    ncit_code,
    from = 0,
    size = PAGE_SIZE) {
  response <- httr::POST(
    "https://clinicaltrialsapi.cancer.gov/api/v2/trials",
    # Copied from get_api_studies_for_disease.R
    body = list(
      current_trial_status = "Active",
      primary_purpose = c("TREATMENT", "SCREENING"),
      diseases.nci_thesaurus_concept_id = ncit_code,
      sites.recruitment_status = "ACTIVE",
      include = INCLUDE_FIELDS,
      from = from,
      size = size
    ),
    encode = "json",
    httr::add_headers(`x-api-key` = CTS_V2_API_KEY, `Content-Type` = "application/json"),
    httr::timeout(5)
  )
  data <- httr::content(response)
  assertthat::assert_that(response$status_code == 200, msg = paste("Response status is", response$status_code))
  return(data)
}

paginate_cts_api <- function(paged_data, total_expected, FUN, ...) {
  print(paste("    Expecting:", total_expected))
  while (length(paged_data) < total_expected) {
    data <- FUN(from = length(paged_data), ...)
    paged_data <- append(paged_data, data$data)
    print(paste("          Got:", length(data$data)))
    print(paste("        Total:", length(paged_data)))
  }
  return(paged_data)
}


#### Fetch trials from CTS API


In [40]:
search_term <- "Stage IA Breast Cancer"
breast_carcinoma <- c("C4872")
# ncit_code <- c("C4872") # Breast Carcinoma
# ncit_code <- c("C153238") # Metastatic Breast Carcinoma
# ncit_code <- c("C3641") # Stage 0 Breast Cancer AJCC v6 and v7
ncit_code <- c("C85835", "C139557", "C139536") # Stage IA BC (w/ AJCC versions)
trials_p1 <- poc_disease_search(ncit_code)


In [41]:
all_trials <- paginate_cts_api(trials_p1$data, trials_p1$total, poc_disease_search, ncit_code = ncit_code)


[1] "    Expecting: 34"


#### Prepare the diseases


In [42]:
count <- 0
disease_count <- 0
disease_list_of_lists <- Map(function(t) {
  count <<- count + 1
  diseases <- Map(function(d) {
    disease_count <<- disease_count + 1
    return(
      list(
        nct_id = t$nct_id,
        disease = d$name,
        inclusion_indicator = d$inclusion_indicator,
        code = d$nci_thesaurus_concept_id,
        type = toString(d$type),
        parents = toString(d$parents)
      )
    )
  }, t$diseases)
  return(diseases)
}, all_trials)
disease_list_of_tables <- lapply(disease_list_of_lists, rbindlist)
count
disease_count


#### Check the diseases


In [57]:
#' Make sure that the search term appears in each trial
noop <- lapply(disease_list_of_tables, function(trial_diseases) {
  assertthat::assert_that(search_term %in% trial_diseases$disease)
})

#' Apply the following if checking leaf-node searches (e.g. Stage search with no children terms)
noop <- lapply(disease_list_of_tables, function(trial_diseases) {
  search_term_idxs <- which(trial_diseases$disease == search_term)
  terms <- trial_diseases[search_term_idxs]
  assertthat::assert_that(length(terms) >= 1,
    msg = "Expecting at least one instance of search_term"
  )
  assertthat::assert_that(unique(terms$inclusion_indicator) == "TRIAL",
    msg = "Expecting that leaf-node terms should all be found at TRIAL level"
  )
})

diseases_df <- rbindlist(disease_list_of_tables)


#### View relationship of `type`

In [None]:
diseases_df[diseases_df$code %in% c(breast_carcinoma, ncit_code), c("code", "disease", "type")] %>%
  arrange(type, disease) %>%
  unique()


In [58]:
lapply(disease_list_of_tables, function(dt) {
  dt[
    dt$inclusion_indicator == "TRIAL" &
      grepl("Stage IA", dt$disease, ignore.case = TRUE),
  ] %>% arrange(disease)
})


nct_id,disease,inclusion_indicator,code,type,parents
<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
NCT04457596,Stage IA Breast Cancer,TRIAL,C139536,stage,C139535

nct_id,disease,inclusion_indicator,code,type,parents
<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
NCT04852887,Stage IA Breast Cancer,TRIAL,C139536,stage,C139535

nct_id,disease,inclusion_indicator,code,type,parents
<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
NCT05705401,Stage IA Breast Cancer,TRIAL,C139536,stage,C139535

nct_id,disease,inclusion_indicator,code,type,parents
<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
NCT01570998,Stage IA Breast Cancer,TRIAL,C85835,stage,C88375

nct_id,disease,inclusion_indicator,code,type,parents
<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
NCT02206984,Stage IA Breast Cancer,TRIAL,C85835,stage,C88375

nct_id,disease,inclusion_indicator,code,type,parents
<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
NCT04584255,Stage IA Breast Cancer,TRIAL,C139536,stage,C139535

nct_id,disease,inclusion_indicator,code,type,parents
<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
NCT02945579,Stage IA Breast Cancer,TRIAL,C85835,stage,C88375

nct_id,disease,inclusion_indicator,code,type,parents
<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
NCT02912312,Stage IA Breast Cancer,TRIAL,C85835,stage,C88375

nct_id,disease,inclusion_indicator,code,type,parents
<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
NCT03562637,Stage IA Breast Cancer,TRIAL,C139536,stage,C139535
NCT03562637,Stage IA Breast Cancer,TRIAL,C139557,stage,C139556

nct_id,disease,inclusion_indicator,code,type,parents
<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
NCT02276443,Stage IA Breast Cancer,TRIAL,C85835,stage,C88375

nct_id,disease,inclusion_indicator,code,type,parents
<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
NCT04234386,Stage IA Breast Cancer,TRIAL,C139536,stage,C139535
NCT04234386,Stage IA Breast Cancer,TRIAL,C139557,stage,C139556

nct_id,disease,inclusion_indicator,code,type,parents
<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
NCT01766297,Stage IA Breast Cancer,TRIAL,C85835,stage,C88375

nct_id,disease,inclusion_indicator,code,type,parents
<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
NCT03937154,Stage IA Breast Cancer,TRIAL,C139536,stage,C139535
NCT03937154,Stage IA Breast Cancer,TRIAL,C139557,stage,C139556
NCT03937154,Stage IA Ovarian Cancer,TRIAL,C139965,stage,C139964
NCT03937154,Stage IA1 Lung Cancer,TRIAL,C136471,stage,C136470
NCT03937154,Stage IA2 Lung Cancer,TRIAL,C136472,stage,C136470
NCT03937154,Stage IA3 Lung Cancer,TRIAL,C136473,stage,C136470

nct_id,disease,inclusion_indicator,code,type,parents
<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
NCT00924027,Stage IA Breast Cancer,TRIAL,C139536,stage,C139535
NCT00924027,Stage IA Breast Cancer,TRIAL,C139557,stage,C139556

nct_id,disease,inclusion_indicator,code,type,parents
<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
NCT04272801,Stage IA Breast Cancer,TRIAL,C139536,stage,C139535

nct_id,disease,inclusion_indicator,code,type,parents
<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
NCT03796559,Stage IA Breast Cancer,TRIAL,C139536,stage,C139535
NCT03796559,Stage IA Breast Cancer,TRIAL,C139557,stage,C139556

nct_id,disease,inclusion_indicator,code,type,parents
<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
NCT04849871,Stage IA Breast Cancer,TRIAL,C85835,stage,C88375

nct_id,disease,inclusion_indicator,code,type,parents
<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
NCT04329065,Stage IA Breast Cancer,TRIAL,C139536,stage,C139535
NCT04329065,Stage IA Breast Cancer,TRIAL,C139557,stage,C139556

nct_id,disease,inclusion_indicator,code,type,parents
<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
NCT04614194,Stage IA Breast Cancer,TRIAL,C139536,stage,C139535
NCT04614194,Stage IA Breast Cancer,TRIAL,C139557,stage,C139556

nct_id,disease,inclusion_indicator,code,type,parents
<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
NCT06008158,Stage IA Breast Cancer,TRIAL,C139536,stage,C139535

nct_id,disease,inclusion_indicator,code,type,parents
<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
NCT05472792,Stage IA Breast Cancer,TRIAL,C139536,stage,C139535

nct_id,disease,inclusion_indicator,code,type,parents
<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
NCT02476786,Stage IA Breast Cancer,TRIAL,C85835,stage,C88375

nct_id,disease,inclusion_indicator,code,type,parents
<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
NCT04454528,Stage IA Breast Cancer,TRIAL,C139536,stage,C139535
NCT04454528,Stage IA Breast Cancer,TRIAL,C139557,stage,C139556

nct_id,disease,inclusion_indicator,code,type,parents
<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
NCT04360330,Stage IA Breast Cancer,TRIAL,C139536,stage,C139535
NCT04360330,Stage IA Breast Cancer,TRIAL,C139557,stage,C139556

nct_id,disease,inclusion_indicator,code,type,parents
<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
NCT04270149,Stage IA Breast Cancer,TRIAL,C139536,stage,C139535
NCT04270149,Stage IA Breast Cancer,TRIAL,C139557,stage,C139556

nct_id,disease,inclusion_indicator,code,type,parents
<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
NCT04040569,Stage IA Breast Cancer,TRIAL,C139536,stage,C139535
NCT04040569,Stage IA Breast Cancer,TRIAL,C139557,stage,C139556

nct_id,disease,inclusion_indicator,code,type,parents
<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
NCT05417308,Stage IA Breast Cancer,TRIAL,C139536,stage,C139535
NCT05417308,Stage IA Breast Cancer,TRIAL,C139557,stage,C139556

nct_id,disease,inclusion_indicator,code,type,parents
<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
NCT05183126,Stage IA Breast Cancer,TRIAL,C139536,stage,C139535
NCT05183126,Stage IA Breast Cancer,TRIAL,C139557,stage,C139556

nct_id,disease,inclusion_indicator,code,type,parents
<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
NCT05592938,Stage IA Breast Cancer,TRIAL,C139536,stage,C139535

nct_id,disease,inclusion_indicator,code,type,parents
<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
NCT03643861,Stage IA Breast Cancer,TRIAL,C139536,stage,C139535
NCT03643861,Stage IA Breast Cancer,TRIAL,C139557,stage,C139556

nct_id,disease,inclusion_indicator,code,type,parents
<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
NCT05505643,Stage IA Breast Cancer,TRIAL,C139536,stage,C139535

nct_id,disease,inclusion_indicator,code,type,parents
<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
NCT06295744,Stage IA Breast Cancer,TRIAL,C139536,stage,C139535

nct_id,disease,inclusion_indicator,code,type,parents
<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
NCT03567330,Stage IA Breast Cancer,TRIAL,C139536,stage,C139535
NCT03567330,Stage IA Breast Cancer,TRIAL,C139557,stage,C139556

nct_id,disease,inclusion_indicator,code,type,parents
<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
NCT02666378,Stage IA Breast Cancer,TRIAL,C85835,stage,C88375
