In [7]:
options(repr.matrix.max.cols=50, repr.matrix.max.rows=100)
library(tidyverse)
library(data.table)

In [8]:
source(paste0(dirname(getwd()),'/map.r'))
source("helpers/clinical_help.r")

In [9]:
META_DIR <- paste0(I_DIR, 'metadata/')

#### 0 - Read metadata files

In [10]:
meta <- read.csv( paste0( META_DIR, "metadata.tsv"), sep = "\t")
response <- read.csv( paste0( META_DIR, "treatment_responses.tsv"), sep = "\t")

In [5]:
#table(meta$primaryTumorSubLocation)

#### 1 - Prep Metadata

- Remove duplicate samples, select that with higher purity
- Derive sex and age numeric variable
- Remove granular field not used for analysis

In [11]:
meta[meta == 'null'] <- NA
meta[meta == 'No'] <- 0
meta[meta == 'Yes'] <- 1

In [12]:
meta_prep <- 
meta %>% 
  group_by(hmfPatientId) %>% 
  mutate(rk = row_number(desc(tumorPurity))) %>% 
  filter(rk == 1) %>% 
  ungroup() %>% 
  select(-rk, -setName, -hmfPatientId, -hmfSampleId, -primaryTumorExtraDetails, -biopsyLocation) %>% 
  rowwise() %>% 
  mutate(sex = sex(gender), age = age(birthYear, biopsyDate)) %>% 
  ungroup() %>% 
  select(-gender, -birthYear) 

#### 2 - Response

- Derive best overall response
- Derive last known alive time (used to derive OS endpoint)
- Forget PFS for now

In [13]:
response[response == 'null'] <- NA

In [14]:
response_prep <- 
response %>%
  drop_na(response) %>% 
  rowwise() %>% 
  group_by(sampleId, name, type, mechanism) %>% 
  summarise(bor = responder(response), 
            last_response = max(responseDate), 
            last_treatment = max(endDate),
            last_date = max(last_response, last_treatment, na.rm = TRUE)
           ) %>%
  ungroup() %>% 
  transmute(sampleId, name, mechanism, bor, last_date)

[1m[22m`summarise()` has grouped output by 'sampleId', 'name', 'type'. You can override using the `.groups` argument.


#### 3 - Together

- Join metadata and response to get BOR and derive OS endpoints

In [15]:
clin_ready <-
meta_prep %>% 
  left_join(response_prep, by = "sampleId") %>% 
  mutate(treatment_type = consolidatedTreatmentType, 
         os_event = as.numeric(!is.na(deathDate)), 
         os_days = ifelse( is.na(deathDate), 
                           as.numeric(as.Date(last_date) - as.Date(biopsyDate)),
                           as.numeric(as.Date(deathDate) - as.Date(biopsyDate)))) %>% 
  select(-contains("Date"), -firstResponse, -name) %>% 
  transmute( sampleId, 
             clin_filter_doids = doids, 
             clin_primaryTumorLocation = primaryTumorLocation,
             clin_primaryTumorSubLocation = primaryTumorSubLocation,
             clin_primaryTumorType = primaryTumorType,
             clin_primaryTumorSubType = primaryTumorSubType,
             clin_biopsySite = biopsySite, 
             clin_filter_treatmentGiven = ifelse(responseMeasured == 1, TRUE, FALSE), 
             clin_treatment = treatment, 
             clin_treatment_type = treatment_type, 
             clin_treatment_mechanism = mechanism, 
             clin_filter_responseMeasured = ifelse(responseMeasured == 1, TRUE, FALSE), Y_bor = bor, 
             clin_filter_osAvail = !is.na(os_days), Y_os_event = os_event, Y_os_days = os_days,
             clin_purity = tumorPurity,
             clin_sex = sex, 
             clin_age = age, 
             clin_hasSystemicPreTreatment = hasSystemicPreTreatment,
             clin_hasRadiotherapyPreTreatment = hasRadiotherapyPreTreatment)

In [20]:
clin_ready <- 
clin_ready %>% 
  mutate(clin_primaryTumorLocation2 = 
         ifelse(clin_primaryTumorLocation %in% (clin_ready %>% group_by(clin_primaryTumorLocation) %>% summarise(ct = n()) %>% filter(ct > 50) %>% pull(clin_primaryTumorLocation)),
                clin_primaryTumorLocation, "Other"))

#### 4 - Add indicators for treatment type
- Adding annotation for multiple therapy columns

In [21]:
anti_pd_trts <- c("Atezolizumab", "Avelumab", "Durvalumab", "Nivolumab", "Pembrolizumab")

In [22]:
clin_ready$clin_anti_pd_treated <- ifelse(trt_indicator(anti_pd_trts, clin_ready$clin_treatment) > 0, 1, 0)

#### 5 - Send it!

In [23]:
fwrite( clin_ready, paste0(READY_DIR, "clinical_ready.csv") )