# Notebook to impute motor symptoms from MDS-UPDRS III measures

## Loading libraries

In [1]:
# Load the tidyverse because it is the model we should follow
library(tidyverse)

# Load reticulate for calling the FireCloud Python API
library(reticulate)

# Load ggplot2 for graphs
library(ggplot2)

library(stats)

── [1mAttaching packages[22m ─────────────────────────────────────── tidyverse 1.3.1 ──

[32m✔[39m [34mggplot2[39m 3.3.5     [32m✔[39m [34mpurrr  [39m 0.3.4
[32m✔[39m [34mtibble [39m 3.1.4     [32m✔[39m [34mdplyr  [39m 1.0.7
[32m✔[39m [34mtidyr  [39m 1.1.3     [32m✔[39m [34mstringr[39m 1.4.0
[32m✔[39m [34mreadr  [39m 2.0.1     [32m✔[39m [34mforcats[39m 0.5.1

── [1mConflicts[22m ────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()



## Setup utility functions

In [2]:
# Utility routine for printing a shell command before executing it
shell_do <- function(command) {
    print(paste('Executing: ', command))
    system(command, intern = TRUE)
}

# Utility routines for reading files from Google Cloud Storage
gcs_read_file <- function(path) {
    pipe(str_glue('gsutil -u {BILLING_PROJECT_ID} cat {path}'))
}
gcs_read_csv <- function(path, sep=',') {
    readr::read_csv(gcs_read_file(path))
}

# Utility routine for display a message and a link
display_html_link <- function(description, link_text, url) {
    html = str_glue('
    <p>
    </p>
    <p>
    {description}
    <a target=_blank href="{url}">{link_text}</a>.
    </p>
    ')

    IRdisplay::display_html(html)
}

# Utility routine for displaying a message and link to Cloud Console
link_to_cloud_console_gcs <- function(description, link_text, gcs_path) {
    url_path <- file.path('https://console.cloud.google.com/storage/browser',
                          str_replace(gcs_path, "gs://",""))
    url_query <- str_glue('userProject={URLencode(BILLING_PROJECT_ID)}')

    url = str_glue('{url_path}?{url_query}')

    display_html_link(description, link_text, url)
}

## Setup notebook globals

In [3]:
BILLING_PROJECT_ID <- Sys.getenv('GOOGLE_PROJECT')
WORKSPACE_NAMESPACE <- Sys.getenv('WORKSPACE_NAMESPACE')
WORKSPACE_NAME <- Sys.getenv('WORKSPACE_NAME')

fapi <- import("firecloud.api")
WORKSPACE_ATTRIBUTES <- fapi$get_workspace(WORKSPACE_NAMESPACE, WORKSPACE_NAME)$json()$workspace$attributes


# Setting the path to my workspace
PATH_MYWORKSPACE = 'gs://fc-cd759889-2702-4f72-a832-0be756073417'
print(PATH_MYWORKSPACE)

print(shell_do(str_glue('gsutil -u {BILLING_PROJECT_ID} ls {PATH_MYWORKSPACE}')))
print(shell_do(str_glue('gsutil -u {BILLING_PROJECT_ID} ls {PATH_MYWORKSPACE}/notebooks')))
print(shell_do(str_glue('gsutil -u {BILLING_PROJECT_ID} ls {PATH_MYWORKSPACE}/files')))

[1] "gs://fc-cd759889-2702-4f72-a832-0be756073417"
[1] "Executing:  gsutil -u terra-ed19e231 ls gs://fc-cd759889-2702-4f72-a832-0be756073417"
[1] "gs://fc-cd759889-2702-4f72-a832-0be756073417/files/"    
[2] "gs://fc-cd759889-2702-4f72-a832-0be756073417/notebooks/"
[1] "Executing:  gsutil -u terra-ed19e231 ls gs://fc-cd759889-2702-4f72-a832-0be756073417/notebooks"
 [1] "gs://fc-cd759889-2702-4f72-a832-0be756073417/notebooks/PD_MDSUPDRSIII.csv"                             
 [2] "gs://fc-cd759889-2702-4f72-a832-0be756073417/notebooks/Py - 1. Exploration_Filtering.ipynb"            
 [3] "gs://fc-cd759889-2702-4f72-a832-0be756073417/notebooks/Py - 2. data_QC.ipynb"                          
 [4] "gs://fc-cd759889-2702-4f72-a832-0be756073417/notebooks/Py - data_QC.ipynb"                             
 [5] "gs://fc-cd759889-2702-4f72-a832-0be756073417/notebooks/R - 0. Start here.ipynb"                        
 [6] "gs://fc-cd759889-2702-4f72-a832-0be756073417/notebooks/R - 1. MDS_UPDRSIII_im

## Load filtered PD MDS UPDRS III data and reshape

In [7]:
PD_MDSUPDRSIII <- gcs_read_csv(file.path(PATH_MYWORKSPACE, 'files/PD_MDSUPDRSIII.csv'))
str(PD_MDSUPDRSIII)

PD_MDSUPDRSIII_all <- gcs_read_csv(file.path(PATH_MYWORKSPACE, 'files/PD_MDSUPDRSIII_allVisits.csv'))
str(PD_MDSUPDRSIII)

# Conveting to long format
PD_MDSUPDRSIII_long <- PD_MDSUPDRSIII %>% 
  select(-mds_updrs_part_iii_summary_score) %>%
  tidyr::gather(UPDRSIII_measure, score ,starts_with("code")) 

PD_MDSUPDRSIII_long_all <- PD_MDSUPDRSIII_all %>% 
  select(-mds_updrs_part_iii_summary_score) %>%
  tidyr::gather(UPDRSIII_measure, score ,starts_with("code")) 



[1m[1mRows: [1m[22m[34m[34m4894[34m[39m [1m[1mColumns: [1m[22m[34m[34m38[34m[39m

[36m──[39m [1m[1mColumn specification[1m[22m [36m────────────────────────────────────────────────────────[39m
[1mDelimiter:[22m ","
[31mchr[39m  (3): ID, COHORT, visit_name
[32mdbl[39m (35): visit_month, code_upd2301_speech_problems, code_upd2302_facial_exp...


[36mℹ[39m Use [30m[47m[30m[47m`spec()`[47m[30m[49m[39m to retrieve the full column specification for this data.
[36mℹ[39m Specify the column types or set [30m[47m[30m[47m`show_col_types = FALSE`[47m[30m[49m[39m to quiet this message.



spec_tbl_df [4,894 × 38] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
 $ ID                                                       : chr [1:4894] "PD-PDAA503EF5" "PD-PDAA503EF5" "PD-PDAA503EF5" "PD-PDAA503EF5" ...
 $ COHORT                                                   : chr [1:4894] "PDBP" "PDBP" "PDBP" "PDBP" ...
 $ visit_name                                               : chr [1:4894] "M0" "M12" "M18" "M24" ...
 $ visit_month                                              : num [1:4894] 0 12 18 24 30 36 6 0 0 12 ...
 $ code_upd2301_speech_problems                             : num [1:4894] 2 2 1 1 1 1 1 2 1 0 ...
 $ code_upd2302_facial_expression                           : num [1:4894] 0 1 0 0 1 0 0 2 0 1 ...
 $ code_upd2303a_rigidity_neck                              : num [1:4894] 0 2 1 0 1 2 1 1 0 1 ...
 $ code_upd2303b_rigidity_rt_upper_extremity                : num [1:4894] 1 1 1 1 1 1 1 1 1 1 ...
 $ code_upd2303c_rigidity_left_upper_extremity              : num [1:4894] 2 1 1 1

[1m[1mRows: [1m[22m[34m[34m5155[34m[39m [1m[1mColumns: [1m[22m[34m[34m38[34m[39m

[36m──[39m [1m[1mColumn specification[1m[22m [36m────────────────────────────────────────────────────────[39m
[1mDelimiter:[22m ","
[31mchr[39m  (3): ID, COHORT, visit_name
[32mdbl[39m (35): visit_month, code_upd2301_speech_problems, code_upd2302_facial_exp...


[36mℹ[39m Use [30m[47m[30m[47m`spec()`[47m[30m[49m[39m to retrieve the full column specification for this data.
[36mℹ[39m Specify the column types or set [30m[47m[30m[47m`show_col_types = FALSE`[47m[30m[49m[39m to quiet this message.



spec_tbl_df [4,894 × 38] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
 $ ID                                                       : chr [1:4894] "PD-PDAA503EF5" "PD-PDAA503EF5" "PD-PDAA503EF5" "PD-PDAA503EF5" ...
 $ COHORT                                                   : chr [1:4894] "PDBP" "PDBP" "PDBP" "PDBP" ...
 $ visit_name                                               : chr [1:4894] "M0" "M12" "M18" "M24" ...
 $ visit_month                                              : num [1:4894] 0 12 18 24 30 36 6 0 0 12 ...
 $ code_upd2301_speech_problems                             : num [1:4894] 2 2 1 1 1 1 1 2 1 0 ...
 $ code_upd2302_facial_expression                           : num [1:4894] 0 1 0 0 1 0 0 2 0 1 ...
 $ code_upd2303a_rigidity_neck                              : num [1:4894] 0 2 1 0 1 2 1 1 0 1 ...
 $ code_upd2303b_rigidity_rt_upper_extremity                : num [1:4894] 1 1 1 1 1 1 1 1 1 1 ...
 $ code_upd2303c_rigidity_left_upper_extremity              : num [1:4894] 2 1 1 1

In [8]:

head(PD_MDSUPDRSIII_long)
dim(PD_MDSUPDRSIII_long)

head(PD_MDSUPDRSIII_long_all)
dim(PD_MDSUPDRSIII_long_all)

ID,COHORT,visit_name,visit_month,UPDRSIII_measure,score
<chr>,<chr>,<chr>,<dbl>,<chr>,<dbl>
PD-PDAA503EF5,PDBP,M0,0,code_upd2301_speech_problems,2
PD-PDAA503EF5,PDBP,M12,12,code_upd2301_speech_problems,2
PD-PDAA503EF5,PDBP,M18,18,code_upd2301_speech_problems,1
PD-PDAA503EF5,PDBP,M24,24,code_upd2301_speech_problems,1
PD-PDAA503EF5,PDBP,M30,30,code_upd2301_speech_problems,1
PD-PDAA503EF5,PDBP,M36,36,code_upd2301_speech_problems,1


ID,COHORT,visit_name,visit_month,UPDRSIII_measure,score
<chr>,<chr>,<chr>,<dbl>,<chr>,<dbl>
PD-PDAA503EF5,PDBP,M0,0,code_upd2301_speech_problems,2
PD-PDAA503EF5,PDBP,M12,12,code_upd2301_speech_problems,2
PD-PDAA503EF5,PDBP,M18,18,code_upd2301_speech_problems,1
PD-PDAA503EF5,PDBP,M24,24,code_upd2301_speech_problems,1
PD-PDAA503EF5,PDBP,M30,30,code_upd2301_speech_problems,1
PD-PDAA503EF5,PDBP,M36,36,code_upd2301_speech_problems,1


In [9]:
unique(PD_MDSUPDRSIII_long$UPDRSIII_measure) %>% length
unique(PD_MDSUPDRSIII_long$UPDRSIII_measure) 

## Defining imputation function and impute

In [6]:
impute_UPDRSIII_v4 <- function(df, ID = "ID", score_column = "score", visit_column = "visit_number",
                               UPDRSIII_column = "UPDRSIII_measure", UPDRS_type = "total",
                               missing_threshold = c(6,5,1)) {
  # libs loading
  library(hash)      # To create key-value pairs
  library(tidyverse) # For tidy evaluation and wrangling
  library(glue)      # To dynamically name variables
  
  # Safety checks
  if(!"score" %in% colnames(df)) stop("Warning: score column not found")
  #if(!"visit_number" %in% colnames(df)) stop("Warining: visit_number column not found")
  if(length(colnames) > 4) stop("Consider using tidyr::gather to get your data
                               in long format as input for this function")
  
  # vector with UPDRS_limb terms
  limb_terms <- c("III_3a", "III_3b", "III_3c", "III_3d", "III_3e", 
                  "III_15a", "III_15b", "III_16a","III_16b", "III_17a", "III_17b",
                  "III_17c", "III_17d", "III_17e", "III_4a", "III_4b",
                  "III_5a", "III_5b", "III_6a", "III_6b", "III_7a", "III_7b",
                  "III_8a", "III_8b", "III_18",
                  "3_3a", "3_3b", "3_3c", "3_3d", "3_3e", 
                  "3_15a", "3_15b", "3_16a","3_16b", "3_17a", "3_17b",
                  "3_17c", "3_17d", "3_17e", "3_4a", "3_4b",
                  "3_5a", "3_5b", "3_6a", "3_6b", "3_7a", "3_7b",
                  "3_8a", "3_8b", "3_18",
                  "NP3RIGN", "NP3RIGRU", "NP3RIGLU", "NP3RIGRL", "NP3RIGLL",
                  "NP3PTRMR", "NP3PTRML", "NP3KTRMR", "NP3KTRML", "NP3RTARU", "NP3RTALU",
                  "NP3RTARL", "NP3RTALL", "NP3RTALJ", "NP3FTAPR", "NP3FTAPL",
                  "NP3HMOVR", "NP3HMOVL", "NP3PRSPR", "NP3PRSPL", 
                  "NP3TTAPR", "NP3TTAPL", "NP3LGAGR", "NP3LGAGL",
                  "NP3RTCON",
                  "RigidityNeck", "RigidityRightUpper", "RigidityRightLower", "RigidityLeftUpper", "RigidityLeftLower",
                  "PosturalTremorHandsRight", "PosturalTremorHandsLeft",
                  "KineticTremorHandsRight", "KineticTremorHandsLeft",
                  "RestTremorAmplitudeRightUpper", "RestTremorAmplitudeRightLower", "RestTremorAmplitudeLeftUpper", 
                  "RestTremorAmplitudeLeftLower", "RestTremorAmplitudeLipJaw",
                  "FingerTappingRight","FingerTappingLeft",
                  "HandMovementsRight", "HandMovementsLeft", "PSHandsRight", "PSHandsLeft",
                  "ToeTappingRight", "ToeTappingLeft", "LegAgilityRight", "LegAgilityLeft",
                  "ConstancyOfRestTremor",
                  "MOTOREXAM3NECK", "MOTOREXAM3RUE", "MOTOREXAM3LUE", "MOTOREXAM3RLE", "MOTOREXAM3LLE",
                  "MOTOREXAM15R", "MOTOREXAM15L", "MOTOREXAM16R", "MOTOREXAM16L", 
                  "MOTOREXAM17LIPJ", "MOTOREXAM17RUE", "MOTOREXAM17LUE", "MOTOREXAM17RLE", "MOTOREXAM17LLE",
                  "MOTOREXAM4R", "MOTOREXAM4L", "MOTOREXAM5R", "MOTOREXAM5L", "MOTOREXAM6R", "MOTOREXAM6L",
                  "MOTOREXAM7R", "MOTOREXAM7L", "MOTOREXAM8R", "MOTOREXAM8L", "MOTOREXAM18")
  
  # Vector with UPDRS_axial terms
  axial_terms <- c("III_1", "III_2", "III_9", "III_10", "III_11", "III_12", "III_13", "III_14", 
                   "3_1", "3_2", "3_9", "3_10", "3_11", "3_12", "3_13", "3_14",
                   "NP3SPCH", "NP3FACXP", "NP3RISNG", "NP3GAIT", "NP3FRZGT", "NP3PSTBL", 
                   "NP3POSTR", "NP3BRADY",
                   "Speech", "FacialExpression", "ArisingFromChair", "Gait", "FreezingOfGait",
                   "PosturalStability", "Posture", "GSM",
                   "MOTOREXAM1", "MOTOREXAM2", "MOTOREXAM9", "MOTOREXAM10", "MOTOREXAM11", "MOTOREXAM12", "MOTOREXAM13", "MOTOREXAM14")
  
  # Dictionary to use at the end of the function. Improvement not implemented yet
  measure_type <- hash(total = "_UPDRS_III_total", limb = "_UPDRS_III_limb",
                       axial = "_UPDRS_III_axial")
  
  # List with different terms between the matching part of the if statements.
  # Like this, we can take the common part of the code out of the if statements
  # Go on a for loop for all of the UPDRS_type we are passing on the input
  
  
  # Removing UPDRSIII_total measure if present
  df <- df %>%
    dplyr::select(!ends_with(c("UPDRS_III", "total")))
  
  final_df = df %>% dplyr::select(.data[[ID]], .data[[visit_column]]) %>% 
    distinct()
  
  for (type_index in 1:length(UPDRS_type)) {
    
    # To be used when naming the imputed columns
    term = UPDRS_type[type_index]
    
    # UPDRSIII_total imputation
    if (term  == "total") {
      
      mydf <- df %>% 
        dplyr::select(.data[[ID]], .data[[visit_column]], .data[[score_column]], contains(c("UPDRS")))
      
      mydf <- mydf %>%
        dplyr::group_by(.data[[ID]], .data[[visit_column]]) %>% 
        dplyr::mutate(missing_total = sum(is.na(.data[[score_column]])),
                      UPDRSIIItotal_imputed = ifelse(missing_total > missing_threshold[1], NA,
                                                     (sum(score, na.rm = TRUE)) / (33-missing_total) * 33 )) 
      
      # UPDRSIII_limb imputation
    } else if (term == "limb") {
      
      mydf <- df %>%
        dplyr::select(.data[[ID]], .data[[score_column]], .data[[visit_column]], contains(c("UPDRS"))) %>%
        dplyr::filter(grepl(paste(limb_terms, collapse = "|"), .data[[UPDRSIII_column]]))
      
      mydf <- mydf %>%
        dplyr::group_by(.data[[ID]], .data[[visit_column]]) %>% 
        dplyr::mutate(missing_total = sum(is.na(.data[[score_column]])),
                      UPDRSIIIlimb_imputed = ifelse(missing_total > missing_threshold[2], NA,
                                                    (sum(score, na.rm = TRUE)) / (25-missing_total) * 25))
      # UPDRSIII_axial imputation
    } else if (term == "axial") {
      
      mydf <- df %>%
        dplyr::select(.data[[ID]], .data[[score_column]], .data[[visit_column]], contains(c("UPDRS"))) %>%
        dplyr::filter(!grepl(paste(limb_terms, collapse = "|"), .data[[UPDRSIII_column]]))
      
      mydf <- mydf %>%
        dplyr::group_by(.data[[ID]], .data[[visit_column]]) %>% 
        dplyr::mutate(missing_total = sum(is.na(.data[[score_column]])),
                      UPDRSIIIaxial_imputed = ifelse(missing_total > missing_threshold[3], NA,
                                                     (sum(score, na.rm = TRUE)) / (8-missing_total) * 8)) 
      
    } else {
      stop("Only UPDRS_total, UPDRS_limb, or UPDRS_axial supported")
    }
    
    measure_name = paste0("UPDRSIII_measure_", term)
    mydf <- mydf %>%
      dplyr::arrange(.data[[ID]], .data[[visit_column]]) %>%
      dplyr::filter(row_number() == 1) %>% 
      dplyr::ungroup() %>%
      dplyr::mutate("{measure_name}" := paste0("V", .data[[visit_column]], "_UPDRS_III_", term)) %>%
      dplyr::select(-c(UPDRSIII_measure, score, missing_total))
    
    # final_df <- final_df %>% 
    #   dplyr::left_join(mydf, by = c(.data[[ID]], .data[[visit_column]]))
    
    final_df <- final_df %>% 
      dplyr::left_join(mydf, by = c(ID, visit_column))
    
  }
  return(final_df)
}


In [7]:
# Impute each cohort sepparately 

AMPPD_PDonly_36months_long_updrsIII_imputed_PDBP <- impute_UPDRSIII(df = PD_MDSUPDRSIII_long %>% filter(COHORT == "PDBP"), 
                                                           ID = "ID", 
                                                           score_column = "score", 
                                                           visit_column = "visit_month",
                                                           UPDRSIII_column = "UPDRSIII_measure",
                                                           UPDRS_type = c("total", "axial", "limb"))
AMPPD_PDonly_36months_long_updrsIII_imputed_STEADY <- impute_UPDRSIII(df = PD_MDSUPDRSIII_long %>% filter(COHORT == "STEADY-PD3"), 
                                                           ID = "ID", 
                                                           score_column = "score", 
                                                           visit_column = "visit_month",
                                                           UPDRSIII_column = "UPDRSIII_measure",
                                                           UPDRS_type = c("total", "axial", "limb"))
AMPPD_PDonly_36months_long_updrsIII_imputed_SURE <- impute_UPDRSIII(df = PD_MDSUPDRSIII_long %>% filter(COHORT == "SURE-PD3"),
                                                           ID = "ID", 
                                                           score_column = "score", 
                                                           visit_column = "visit_month",
                                                           UPDRSIII_column = "UPDRSIII_measure",
                                                           UPDRS_type = c("total", "axial", "limb"))

#head(AMPPD_PDonly_36months_long_updrsIII_imputed_PDBP)
#head(AMPPD_PDonly_36months_long_updrsIII_imputed_STEADY)
#head(AMPPD_PDonly_36months_long_updrsIII_imputed_SURE)

ERROR: Error in impute_UPDRSIII(df = PD_MDSUPDRSIII_long %>% filter(COHORT == : could not find function "impute_UPDRSIII"


## Explore the UPDRS measures across cohorts

In [10]:
AMPPD_PDonly_36months_long_updrsIII_imputed_ALL <- rbind(AMPPD_PDonly_36months_long_updrsIII_imputed_PDBP,
                                                        AMPPD_PDonly_36months_long_updrsIII_imputed_STEADY,
                                                        AMPPD_PDonly_36months_long_updrsIII_imputed_SURE)

AMPPD_PDonly_36months_long_updrsIII_imputed_ALL <- AMPPD_PDonly_36months_long_updrsIII_imputed_ALL %>% 
    mutate(COHORT = case_when(grepl("PD-", ID) ~ "PDBP",
                              grepl("SU-", ID) ~ "SURE-PD3",
                              grepl("SY-", ID) ~ "STEADY-PD3",
                              TRUE ~ "NA")) %>%
    relocate(COHORT, .after=ID)

head(AMPPD_PDonly_36months_long_updrsIII_imputed_ALL)
dim(AMPPD_PDonly_36months_long_updrsIII_imputed_ALL)

ID,COHORT,visit_month,UPDRSIIItotal_imputed,UPDRSIII_measure_total,UPDRSIIIaxial_imputed,UPDRSIII_measure_axial,UPDRSIIIlimb_imputed,UPDRSIII_measure_limb
<chr>,<chr>,<dbl>,<dbl>,<chr>,<dbl>,<chr>,<dbl>,<chr>
PD-PDAA503EF5,PDBP,0,20,V0_UPDRS_III_total,5,V0_UPDRS_III_axial,15,V0_UPDRS_III_limb
PD-PDAA503EF5,PDBP,12,31,V12_UPDRS_III_total,9,V12_UPDRS_III_axial,22,V12_UPDRS_III_limb
PD-PDAA503EF5,PDBP,18,27,V18_UPDRS_III_total,7,V18_UPDRS_III_axial,20,V18_UPDRS_III_limb
PD-PDAA503EF5,PDBP,24,20,V24_UPDRS_III_total,5,V24_UPDRS_III_axial,15,V24_UPDRS_III_limb
PD-PDAA503EF5,PDBP,30,25,V30_UPDRS_III_total,5,V30_UPDRS_III_axial,20,V30_UPDRS_III_limb
PD-PDAA503EF5,PDBP,36,34,V36_UPDRS_III_total,4,V36_UPDRS_III_axial,30,V36_UPDRS_III_limb


In [11]:
# Getting the most relevant summary statistics

mean_no_na <- function(x){
    mean(x, na.rm = TRUE)
}
sd_no_na <- function(x){
    sd(x, na.rm = TRUE)
}

max_no_na <- function(x){
    max(x, na.rm=TRUE)
}



# AMPPD_PDonly_36months_long_updrsIII_imputed_ALL %>% group_by(COHORT, visit_month) %>%
#     summarise(count =  n(), .groups = 'drop')

# We need to filter patients that have two visits complete. Otherwise, only one time point is included


samples_count_visit = AMPPD_PDonly_36months_long_updrsIII_imputed_ALL %>% filter(COHORT == "STEADY-PD3") %>%
    group_by(ID, visit_month) %>%
    mutate(count =  n()) 

# Checking the number of samples with more than one record. 
cat("Number of samples with more than record per time point:", length(which(samples_count_visit$count > 1)))


# # Mean and sd scores across COHORT-VISITS
AMPPD_PDonly_36months_long_updrsIII_imputed_ALL %>% group_by(COHORT, visit_month) %>%
      summarise(across(ends_with("imputed"), list(mean = mean_no_na, sd = sd_no_na, max = max_no_na)))

Number of samples with more than record per time point: 0

`summarise()` has grouped output by 'COHORT'. You can override using the `.groups` argument.



COHORT,visit_month,UPDRSIIItotal_imputed_mean,UPDRSIIItotal_imputed_sd,UPDRSIIItotal_imputed_max,UPDRSIIIaxial_imputed_mean,UPDRSIIIaxial_imputed_sd,UPDRSIIIaxial_imputed_max,UPDRSIIIlimb_imputed_mean,UPDRSIIIlimb_imputed_sd,UPDRSIIIlimb_imputed_max
<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
PDBP,0,25.48289,13.696911,83,5.908362,4.61284,30,19.5746,10.727984,61
PDBP,6,22.7056,13.377573,74,4.472019,4.101287,28,18.23358,10.20994,53
PDBP,12,22.45036,13.085625,83,4.547215,4.20827,26,17.90315,9.956718,58
PDBP,18,22.36389,12.787891,91,4.411111,4.217999,29,17.95278,9.555613,62
PDBP,24,23.22543,12.561549,80,4.419075,4.067629,24,18.80636,9.577756,56
PDBP,30,23.55195,12.936599,89,4.38961,4.417631,26,19.16234,9.501908,63
PDBP,36,24.88727,12.565377,79,5.021818,4.568964,25,19.86545,9.210836,64
STEADY-PD3,0,22.12287,8.551531,52,3.843003,2.241253,10,18.27986,7.556874,45
STEADY-PD3,36,21.91541,11.696572,57,4.270758,2.825026,24,17.6238,9.964769,46
SURE-PD3,0,21.99209,9.076321,54,3.888889,2.351577,11,18.10278,7.701282,48


In [12]:
#dim(AMPPD_PDonly_36months_long_updrsIII_imputed_PDBP)
dim(AMPPD_PDonly_36months_long_updrsIII_imputed_PDBP)

In [13]:
# Cross check with the part III total score from AMP-PD data
str(PD_MDSUPDRSIII)
# Mean and sd scores across COHORT-VISITS
PD_MDSUPDRSIII %>% group_by(COHORT, visit_month) %>%
     summarise(across("mds_updrs_part_iii_summary_score", list(mean = mean_no_na, sd = sd_no_na, max = max_no_na)))

spec_tbl_df [4,894 × 38] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
 $ ID                                                       : chr [1:4894] "PD-PDAA503EF5" "PD-PDAA503EF5" "PD-PDAA503EF5" "PD-PDAA503EF5" ...
 $ COHORT                                                   : chr [1:4894] "PDBP" "PDBP" "PDBP" "PDBP" ...
 $ visit_name                                               : chr [1:4894] "M0" "M12" "M18" "M24" ...
 $ visit_month                                              : num [1:4894] 0 12 18 24 30 36 6 0 0 12 ...
 $ code_upd2301_speech_problems                             : num [1:4894] 2 2 1 1 1 1 1 2 1 0 ...
 $ code_upd2302_facial_expression                           : num [1:4894] 0 1 0 0 1 0 0 2 0 1 ...
 $ code_upd2303a_rigidity_neck                              : num [1:4894] 0 2 1 0 1 2 1 1 0 1 ...
 $ code_upd2303b_rigidity_rt_upper_extremity                : num [1:4894] 1 1 1 1 1 1 1 1 1 1 ...
 $ code_upd2303c_rigidity_left_upper_extremity              : num [1:4894] 2 1 1 1

`summarise()` has grouped output by 'COHORT'. You can override using the `.groups` argument.



COHORT,visit_month,mds_updrs_part_iii_summary_score_mean,mds_updrs_part_iii_summary_score_sd,mds_updrs_part_iii_summary_score_max
<chr>,<dbl>,<dbl>,<dbl>,<dbl>
PDBP,0,25.48225,13.69725,83
PDBP,6,22.7056,13.377573,74
PDBP,12,22.45036,13.085625,83
PDBP,18,22.36389,12.787891,91
PDBP,24,23.22543,12.561549,80
PDBP,30,23.55195,12.936599,89
PDBP,36,24.88727,12.565377,79
STEADY-PD3,0,22.12287,8.551531,52
STEADY-PD3,36,21.90647,11.698617,57
SURE-PD3,0,21.98467,9.068954,54


In [14]:
head(AMPPD_PDonly_36months_long_updrsIII_imputed_PDBP)

ID,visit_month,UPDRSIIItotal_imputed,UPDRSIII_measure_total,UPDRSIIIaxial_imputed,UPDRSIII_measure_axial,UPDRSIIIlimb_imputed,UPDRSIII_measure_limb
<chr>,<dbl>,<dbl>,<chr>,<dbl>,<chr>,<dbl>,<chr>
PD-PDAA503EF5,0,20,V0_UPDRS_III_total,5,V0_UPDRS_III_axial,15,V0_UPDRS_III_limb
PD-PDAA503EF5,12,31,V12_UPDRS_III_total,9,V12_UPDRS_III_axial,22,V12_UPDRS_III_limb
PD-PDAA503EF5,18,27,V18_UPDRS_III_total,7,V18_UPDRS_III_axial,20,V18_UPDRS_III_limb
PD-PDAA503EF5,24,20,V24_UPDRS_III_total,5,V24_UPDRS_III_axial,15,V24_UPDRS_III_limb
PD-PDAA503EF5,30,25,V30_UPDRS_III_total,5,V30_UPDRS_III_axial,20,V30_UPDRS_III_limb
PD-PDAA503EF5,36,34,V36_UPDRS_III_total,4,V36_UPDRS_III_axial,30,V36_UPDRS_III_limb


# Saving the PDBP imputed data

In [16]:
write.csv(AMPPD_PDonly_36months_long_updrsIII_imputed_PDBP, "/home/jupyter/notebooks/working_dir/PDBPoutput_imputed.csv", row.names = FALSE)