# Investigating correlation between features and tumor stage

In [6]:
setwd("/home/data/project_code/landstrom_core/prognostic_model_development/r/notebooks")
library(ggplot2)
library(tidyverse)
source("../getTCGAData.R")
source("../preprocessTCGAData.R")
source("../KM_analysis.R")
source("../Heatmaps.R")
source("../enet.R")
library(ggpubr)
library(rstatix)

── [1mAttaching packages[22m ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.3.1 ──

[32m✔[39m [34mtibble [39m 3.1.7     [32m✔[39m [34mdplyr  [39m 1.0.9
[32m✔[39m [34mtidyr  [39m 1.2.0     [32m✔[39m [34mstringr[39m 1.4.0
[32m✔[39m [34mreadr  [39m 2.1.2     [32m✔[39m [34mforcats[39m 0.5.1
[32m✔[39m [34mpurrr  [39m 0.3.4     

── [1mConflicts[22m ────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()

Loading required package: S4Vectors

Loading required package: stats4

Loading required package: BiocGenerics


Attaching package: ‘BiocGenerics’


The following objects are masked from ‘package:dplyr’:

    combine, intersect, setdiff, un

# Function

In [7]:
#
# Define function for adding the clinical variables 
#
addClinVar = function(data, clin.var) {
    if ("Age.oth" %in% clin.var) {
        data$Age.oth <- data$age_at_diagnosis.clin
    } 
    if ("Tumor.stage.oth" %in% clin.var){
        data$Tumor.stage.oth = factor(map_chr(data$ajcc_pathologic_stage.clin, reformatTumorStage))
    }
    if ("Gender.oth" %in% clin.var){
        data$Gender.oth <- factor(data$gender.clin)    
    } 
    if ("Gleason.group.oth" %in% clin.var) {
        
        # Determine the Gleason group 
        data$Gleason.group.oth = map2_chr(data$primary_gleason_grade.clin, 
                                           data$secondary_gleason_grade.clin, 
                                           determineGleasonGroup)

        # Set up the factor levels 
        data$Gleason.group.oth = factor(data$Gleason.group, 
                                    levels = c("Gleason group 1", "Gleason group 2"))
    }
    return(data)
}

In [64]:
# Helper 
incModel = function(x, models){
    res = c()
    x = paste(x, "expression")
    print(models %>% select(End.point, Feature))
    for (i in 1:nrow(models)){
        # Get model features as vector
        model.feat = unlist(str_split(models$Feature[i], ","))
        model.clin.end = models$End.point[i]
        print(model.clin.end)
        if (x %in% model.feat){
            res = c(res, model.clin.end)
        }
    }
    if (is.null(res) == F){
        return(paste(res, collapse = ","))
    }
    else 
        return("")
}

# Check features included
checkFeaturesIncluded = function(stats, eval.results){
    clin.models.inc.feat = unlist(map(stats$Feature, incModel, models = eval.results))
    stats$Included.in.models = clin.models.inc.feat
    return(stats)
}

# Setting up paths and variables

In [41]:
# Read in the table including the clinical features for each cancer type
clin.feat.tb = read.table("/workstation/project_data/landstrom_core/clin_features_final.csv", sep = "\t", header = T)

# Ensembl id mapping file 
ens.id.mapping = "/home/organisms/Human/hg38/Homo_sapiens.GRCh38_March2022/ENSEMBLE_to_SYMBOL.csv"

# Input dir data 
input.dir.data.path = file.path("/workstation/project_data/landstrom_core/rdata/manuscript_work/")

# Output directory
dir.res.root = file.path("/workstation/project_results/landstrom_core/Features_vs_tumor_stage/")
dir.create(dir.res.root)

# Gene list  
gene.list.file = read.table("/workstation/project_data/landstrom_core/Customer_genes.tsv", 
                            sep = "\t", header = F)
gene.list = gene.list.file$V1

# Read in the prognostic model results 
eval.results.with.features = read.csv("/workstation/project_results/landstrom_core/prognostic_model_development/Manuscript_tables/Table_evaluation_best_clin_end_with_reference_features_inc.csv")

“'/workstation/project_results/landstrom_core/Features_vs_tumor_stage' already exists”


# Prepare boxplots and test for significance

Output dir :

In [42]:
# Output dir :
boxplt.results.dir = file.path(dir.res.root, "Features_and_tumor_stage_boxplots")
dir.create(boxplt.results.dir)

“'/workstation/project_results/landstrom_core/Features_vs_tumor_stage//Features_and_tumor_stage_boxplots' already exists”


In [43]:
# Store all statistical test results to a table 
stat.results.ls = list()

In [44]:
eval.results.with.features$Cancer.type

Run analysis

In [65]:
plots.ls = list()
i = 1

for (cancer.type in clin.feat.tb$Ctype){
    
    # Get Clinical variables
    clin.var = unlist(strsplit(clin.feat.tb$Features[clin.feat.tb$Ctype == cancer.type], split = ","))
    
    # Add oth-suffix 
    clin.var = paste0(clin.var, ".oth")
    
    # Read in the preprocessed dataset if continued 
    tcga.dataset = readRDS(file.path(input.dir.data.path, cancer.type, "tcga.dataset.rds"))
    
    # Add clinical variables to dataset
    tcga.dataset = addClinVar(tcga.dataset, clin.var)
    
    # Selected variables 
    variables.selected = c(paste0(gene.list, ".exp"), "Tumor.stage.oth")
    
    # Selected variables         
    data.selected.exp = tcga.dataset %>% 
                        dplyr::select(one_of(variables.selected))
    
    if ("Tumor.stage.oth" %in% colnames(data.selected.exp)){
        
        data.selected.exp.long = tidyr::pivot_longer(data.selected.exp, -Tumor.stage.oth,
                                                         names_to = "Feature", 
                                                         values_to = "Expression")
        
        data.selected.exp.long = data.selected.exp.long %>% filter(is.na(Tumor.stage.oth) == F)
        
        # Fix feature names 
        data.selected.exp.long$Feature = stringr::str_replace(data.selected.exp.long$Feature,".exp","")

        # Calculate the max expression for each feature 
        y.max = data.selected.exp.long %>% 
             group_by(Feature) %>% 
             summarise(ymax = max(Expression, na.rm = T))

        y.max$ymax = y.max$ymax + 0.2

        stat.test <- data.selected.exp.long %>%
                  group_by(Feature) %>%
                  kruskal_test(Expression ~ Tumor.stage.oth)

        stat.test = dplyr::left_join(stat.test, y.max, by = "Feature")
        
        # Add 
        eval.results.cancer.type = eval.results.with.features %>% 
                                                filter(Cancer.type == cancer.type)
        
        # Check if features are included in the models
        # Returns stats.test with added column indicating in which models by Clin end point 
        # the feature occurs
        stat.test = checkFeaturesIncluded(stat.test, eval.results.cancer.type)
        print(stat.test)
        
        # Store result df to list
        stat.results.ls[[cancer.type]] = stat.test
        
        # Store the data
        plot.data = list(stat = stat.test, 
                         exp.data = data.selected.exp.long)
        saveRDS(object = plot.data, file = file.path(boxplt.results.dir, paste(cancer.type, sep = "_", "data_for_plot", ".rds")))
        
        # Prepare boxplot
        gg = data.selected.exp.long %>% 
               ggplot(aes(x = Feature)) + geom_boxplot(aes(y = Expression, fill = Tumor.stage.oth))+
        geom_text(data = stat.test, aes(x = Feature, label = paste0("p = ", p), y = ymax), position = position_dodge(0.8)) + 
        theme_pubr() + scale_fill_manual(values = c("#c6dbef", "#6baed6", "#2171b5", "#08306b"))


        plots.ls[[i]] = gg 
        
        ggsave(plot = gg,
              filename = file.path(boxplt.results.dir, paste0(cancer.type, "_boxplot.pdf")),
               height = 12, width = 16)
        
        i = i + 1
    
    }
}

“Unknown columns: `Tumor.stage.oth`”


  End.point                                  Feature
1        OS Age,Gender,Tumor stage,TGFBR1 expression
2       DSS        Age,Tumor stage,TGFBR1 expression
3       PFI            Tumor stage,TGFBR1 expression
[1] "OS"
[1] "DSS"
[1] "PFI"
  End.point                                  Feature
1        OS Age,Gender,Tumor stage,TGFBR1 expression
2       DSS        Age,Tumor stage,TGFBR1 expression
3       PFI            Tumor stage,TGFBR1 expression
[1] "OS"
[1] "DSS"
[1] "PFI"
  End.point                                  Feature
1        OS Age,Gender,Tumor stage,TGFBR1 expression
2       DSS        Age,Tumor stage,TGFBR1 expression
3       PFI            Tumor stage,TGFBR1 expression
[1] "OS"
[1] "DSS"
[1] "PFI"
  End.point                                  Feature
1        OS Age,Gender,Tumor stage,TGFBR1 expression
2       DSS        Age,Tumor stage,TGFBR1 expression
3       PFI            Tumor stage,TGFBR1 expression
[1] "OS"
[1] "DSS"
[1] "PFI"
  End.point                         

“Removed 52 rows containing non-finite values (stat_boxplot).”


  End.point                                  Feature
1        OS Age,Gender,Tumor stage,MYC amplification
2       DSS         Age,Tumor stage,AURKB expression
3       DFI  Age,Gender,Tumor stage,APPL2 expression
4       PFI             Tumor stage,KIF23 expression
[1] "OS"
[1] "DSS"
[1] "DFI"
[1] "PFI"
  End.point                                  Feature
1        OS Age,Gender,Tumor stage,MYC amplification
2       DSS         Age,Tumor stage,AURKB expression
3       DFI  Age,Gender,Tumor stage,APPL2 expression
4       PFI             Tumor stage,KIF23 expression
[1] "OS"
[1] "DSS"
[1] "DFI"
[1] "PFI"
  End.point                                  Feature
1        OS Age,Gender,Tumor stage,MYC amplification
2       DSS         Age,Tumor stage,AURKB expression
3       DFI  Age,Gender,Tumor stage,APPL2 expression
4       PFI             Tumor stage,KIF23 expression
[1] "OS"
[1] "DSS"
[1] "DFI"
[1] "PFI"
  End.point                                  Feature
1        OS Age,Gender,Tumor stage,

“Removed 169 rows containing non-finite values (stat_boxplot).”
“Unknown columns: `Tumor.stage.oth`”


  End.point                                       Feature
1        OS Tumor stage,KIF23 expression,AURKB expression
2       PFI                              KIF23 expression
[1] "OS"
[1] "PFI"
  End.point                                       Feature
1        OS Tumor stage,KIF23 expression,AURKB expression
2       PFI                              KIF23 expression
[1] "OS"
[1] "PFI"
  End.point                                       Feature
1        OS Tumor stage,KIF23 expression,AURKB expression
2       PFI                              KIF23 expression
[1] "OS"
[1] "PFI"
  End.point                                       Feature
1        OS Tumor stage,KIF23 expression,AURKB expression
2       PFI                              KIF23 expression
[1] "OS"
[1] "PFI"
  End.point                                       Feature
1        OS Tumor stage,KIF23 expression,AURKB expression
2       PFI                              KIF23 expression
[1] "OS"
[1] "PFI"
  End.point                        

“Removed 156 rows containing non-finite values (stat_boxplot).”


  End.point                                 Feature
1        OS Age,Gender,Tumor stage,AURKA expression
2       DSS Age,Gender,Tumor stage,AURKA expression
3       PFI     Gender,Tumor stage,AURKA expression
[1] "OS"
[1] "DSS"
[1] "PFI"
  End.point                                 Feature
1        OS Age,Gender,Tumor stage,AURKA expression
2       DSS Age,Gender,Tumor stage,AURKA expression
3       PFI     Gender,Tumor stage,AURKA expression
[1] "OS"
[1] "DSS"
[1] "PFI"
  End.point                                 Feature
1        OS Age,Gender,Tumor stage,AURKA expression
2       DSS Age,Gender,Tumor stage,AURKA expression
3       PFI     Gender,Tumor stage,AURKA expression
[1] "OS"
[1] "DSS"
[1] "PFI"
  End.point                                 Feature
1        OS Age,Gender,Tumor stage,AURKA expression
2       DSS Age,Gender,Tumor stage,AURKA expression
3       PFI     Gender,Tumor stage,AURKA expression
[1] "OS"
[1] "DSS"
[1] "PFI"
  End.point                                 Feature


“Removed 65 rows containing non-finite values (stat_boxplot).”
“Unknown columns: `Tumor.stage.oth`”
“Unknown columns: `Tumor.stage.oth`”
“Unknown columns: `Tumor.stage.oth`”


  End.point                                                  Feature
1        OS                                   Age,Gender,Tumor stage
2       DSS Age,Gender,Tumor stage,APPL1 expression,VPS4B expression
3       PFI                                          Age,Tumor stage
[1] "OS"
[1] "DSS"
[1] "PFI"
  End.point                                                  Feature
1        OS                                   Age,Gender,Tumor stage
2       DSS Age,Gender,Tumor stage,APPL1 expression,VPS4B expression
3       PFI                                          Age,Tumor stage
[1] "OS"
[1] "DSS"
[1] "PFI"
  End.point                                                  Feature
1        OS                                   Age,Gender,Tumor stage
2       DSS Age,Gender,Tumor stage,APPL1 expression,VPS4B expression
3       PFI                                          Age,Tumor stage
[1] "OS"
[1] "DSS"
[1] "PFI"
  End.point                                                  Feature
1        OS     

“Removed 273 rows containing non-finite values (stat_boxplot).”


  End.point                          Feature
1        OS     Tumor stage,APPL1 expression
2       DSS Age,Tumor stage,APPL1 expression
3       DFI          Gender,APPL1 expression
[1] "OS"
[1] "DSS"
[1] "DFI"
  End.point                          Feature
1        OS     Tumor stage,APPL1 expression
2       DSS Age,Tumor stage,APPL1 expression
3       DFI          Gender,APPL1 expression
[1] "OS"
[1] "DSS"
[1] "DFI"
  End.point                          Feature
1        OS     Tumor stage,APPL1 expression
2       DSS Age,Tumor stage,APPL1 expression
3       DFI          Gender,APPL1 expression
[1] "OS"
[1] "DSS"
[1] "DFI"
  End.point                          Feature
1        OS     Tumor stage,APPL1 expression
2       DSS Age,Tumor stage,APPL1 expression
3       DFI          Gender,APPL1 expression
[1] "OS"
[1] "DSS"
[1] "DFI"
  End.point                          Feature
1        OS     Tumor stage,APPL1 expression
2       DSS Age,Tumor stage,APPL1 expression
3       DFI          Gender,A

“Removed 624 rows containing non-finite values (stat_boxplot).”


  End.point
1        OS
2       DSS
3       DFI
4       PFI
                                                                                                                              Feature
1                                    Age,Tumor stage,TRAF6 expression,AR expression,KIF23 expression,VPS4B expression,PTEN expression
2 Age,Tumor stage,TRAF6 expression,AR expression,AURKB expression,AURKA expression,KIF23 expression,VPS4B expression,APPL2 expression
3                                                                                                    Gender,Tumor stage,AR expression
4                                             Age,Gender,Tumor stage,TRAF6 expression,VPS4B expression,AURKB expression,AR expression
[1] "OS"
[1] "DSS"
[1] "DFI"
[1] "PFI"
  End.point
1        OS
2       DSS
3       DFI
4       PFI
                                                                                                                              Feature
1                                    

“Removed 91 rows containing non-finite values (stat_boxplot).”


  End.point
1        OS
2       DSS
3       DFI
4       PFI
                                                                                                                               Feature
1                                                         Tumor stage,AURKB expression,AR expression,VPS4A expression,AURKA expression
2 Gender,Tumor stage,AURKB expression,VPS4A expression,AURKA expression,MYC expression,AR expression,APPL2 expression,TRAF6 expression
3                                                                                        Tumor stage,AURKA expression,KIF23 expression
4                                                               Age,Gender,Tumor stage,AURKB expression,VPS4A expression,AR expression
[1] "OS"
[1] "DSS"
[1] "DFI"
[1] "PFI"
  End.point
1        OS
2       DSS
3       DFI
4       PFI
                                                                                                                               Feature
1                              

“Removed 26 rows containing non-finite values (stat_boxplot).”
“Unknown columns: `Tumor.stage.oth`”
“Unknown columns: `Tumor.stage.oth`”


  End.point
1        OS
2       DSS
3       DFI
4       PFI
                                                                       Feature
1                                             Age,Tumor stage,KIF23 expression
2 Tumor stage,AURKA expression,MYC deletion,MYC amplification,AURKB expression
3                                                 Tumor stage,KIF23 expression
4                                Tumor stage,AURKA expression,KIF23 expression
[1] "OS"
[1] "DSS"
[1] "DFI"
[1] "PFI"
  End.point
1        OS
2       DSS
3       DFI
4       PFI
                                                                       Feature
1                                             Age,Tumor stage,KIF23 expression
2 Tumor stage,AURKA expression,MYC deletion,MYC amplification,AURKB expression
3                                                 Tumor stage,KIF23 expression
4                                Tumor stage,AURKA expression,KIF23 expression
[1] "OS"
[1] "DSS"
[1] "DFI"
[1] "PFI"
  End.point


“Removed 78 rows containing non-finite values (stat_boxplot).”


  End.point                                                             Feature
1        OS Age,Tumor stage,KIF23 expression,AURKA expression,TGFBR1 expression
2       DSS                                                    KIF23 expression
3       DFI             Age,Gender,Tumor stage,PTEN expression,AURKB expression
4       PFI           Age,Gender,Tumor stage,TGFBR1 expression,AURKB expression
[1] "OS"
[1] "DSS"
[1] "DFI"
[1] "PFI"
  End.point                                                             Feature
1        OS Age,Tumor stage,KIF23 expression,AURKA expression,TGFBR1 expression
2       DSS                                                    KIF23 expression
3       DFI             Age,Gender,Tumor stage,PTEN expression,AURKB expression
4       PFI           Age,Gender,Tumor stage,TGFBR1 expression,AURKB expression
[1] "OS"
[1] "DSS"
[1] "DFI"
[1] "PFI"
  End.point                                                             Feature
1        OS Age,Tumor stage,KIF23 expressi

“Removed 117 rows containing non-finite values (stat_boxplot).”


  End.point                                                 Feature
1        OS Age,Gender,Tumor stage,VPS4A expression,PTEN expression
2       PFI                            Tumor stage,TRAF6 expression
[1] "OS"
[1] "PFI"
  End.point                                                 Feature
1        OS Age,Gender,Tumor stage,VPS4A expression,PTEN expression
2       PFI                            Tumor stage,TRAF6 expression
[1] "OS"
[1] "PFI"
  End.point                                                 Feature
1        OS Age,Gender,Tumor stage,VPS4A expression,PTEN expression
2       PFI                            Tumor stage,TRAF6 expression
[1] "OS"
[1] "PFI"
  End.point                                                 Feature
1        OS Age,Gender,Tumor stage,VPS4A expression,PTEN expression
2       PFI                            Tumor stage,TRAF6 expression
[1] "OS"
[1] "PFI"
  End.point                                                 Feature
1        OS Age,Gender,Tumor stage,VPS4A

“Removed 39 rows containing non-finite values (stat_boxplot).”


  End.point
1        OS
2       DSS
3       PFI
                                                                                 Feature
1 Gender,Tumor stage,KIF23 expression,AURKA expression,TGFBR1 expression,PTEN expression
2                                                                       KIF23 expression
3                                                                       KIF23 expression
[1] "OS"
[1] "DSS"
[1] "PFI"
  End.point
1        OS
2       DSS
3       PFI
                                                                                 Feature
1 Gender,Tumor stage,KIF23 expression,AURKA expression,TGFBR1 expression,PTEN expression
2                                                                       KIF23 expression
3                                                                       KIF23 expression
[1] "OS"
[1] "DSS"
[1] "PFI"
  End.point
1        OS
2       DSS
3       PFI
                                                                                 Featu

“Removed 13 rows containing non-finite values (stat_boxplot).”
“Unknown columns: `Tumor.stage.oth`”


  End.point
1        OS
2       DSS
3       DFI
4       PFI
                                                               Feature
1                                Age,VPS4B expression,KIF23 expression
2                                                     KIF23 expression
3 Age,APPL1 expression,KIF23 expression,MYC expression,PTEN expression
4         Tumor stage,KIF23 expression,VPS4B expression,MYC expression
[1] "OS"
[1] "DSS"
[1] "DFI"
[1] "PFI"
  End.point
1        OS
2       DSS
3       DFI
4       PFI
                                                               Feature
1                                Age,VPS4B expression,KIF23 expression
2                                                     KIF23 expression
3 Age,APPL1 expression,KIF23 expression,MYC expression,PTEN expression
4         Tumor stage,KIF23 expression,VPS4B expression,MYC expression
[1] "OS"
[1] "DSS"
[1] "DFI"
[1] "PFI"
  End.point
1        OS
2       DSS
3       DFI
4       PFI
                                

“Removed 104 rows containing non-finite values (stat_boxplot).”
“Unknown columns: `Tumor.stage.oth`”
“Unknown columns: `Tumor.stage.oth`”


  End.point                                                  Feature
1        OS                         Age,Tumor stage,APPL1 expression
2       DSS Age,Tumor stage,Gender,APPL1 expression,KIF23 expression
3       PFI                  Age,Tumor stage,Gender,AURKA expression
[1] "OS"
[1] "DSS"
[1] "PFI"
  End.point                                                  Feature
1        OS                         Age,Tumor stage,APPL1 expression
2       DSS Age,Tumor stage,Gender,APPL1 expression,KIF23 expression
3       PFI                  Age,Tumor stage,Gender,AURKA expression
[1] "OS"
[1] "DSS"
[1] "PFI"
  End.point                                                  Feature
1        OS                         Age,Tumor stage,APPL1 expression
2       DSS Age,Tumor stage,Gender,APPL1 expression,KIF23 expression
3       PFI                  Age,Tumor stage,Gender,AURKA expression
[1] "OS"
[1] "DSS"
[1] "PFI"
  End.point                                                  Feature
1        OS     

“Removed 130 rows containing non-finite values (stat_boxplot).”


[1] End.point Feature  
<0 rows> (or 0-length row.names)
[1] NA
character(0)
[1] End.point Feature  
<0 rows> (or 0-length row.names)
[1] NA
character(0)
[1] End.point Feature  
<0 rows> (or 0-length row.names)
[1] NA
character(0)
[1] End.point Feature  
<0 rows> (or 0-length row.names)
[1] NA
character(0)
[1] End.point Feature  
<0 rows> (or 0-length row.names)
[1] NA
character(0)
[1] End.point Feature  
<0 rows> (or 0-length row.names)
[1] NA
character(0)
[1] End.point Feature  
<0 rows> (or 0-length row.names)
[1] NA
character(0)
[1] End.point Feature  
<0 rows> (or 0-length row.names)
[1] NA
character(0)
[1] End.point Feature  
<0 rows> (or 0-length row.names)
[1] NA
character(0)
[1] End.point Feature  
<0 rows> (or 0-length row.names)
[1] NA
character(0)
[1] End.point Feature  
<0 rows> (or 0-length row.names)
[1] NA
character(0)
[1] End.point Feature  
<0 rows> (or 0-length row.names)
[1] NA
character(0)
[1] End.point Feature  
<0 rows> (or 0-length row.names)
[1] NA
character(0)

“Removed 65 rows containing non-finite values (stat_boxplot).”
“Unknown columns: `Tumor.stage.oth`”


  End.point
1       PFI
                                                                                     Feature
1 Age,Gender,Tumor stage,VPS4A expression,APPL1 expression,KIF23 expression,TRAF6 expression
[1] "PFI"
  End.point
1       PFI
                                                                                     Feature
1 Age,Gender,Tumor stage,VPS4A expression,APPL1 expression,KIF23 expression,TRAF6 expression
[1] "PFI"
  End.point
1       PFI
                                                                                     Feature
1 Age,Gender,Tumor stage,VPS4A expression,APPL1 expression,KIF23 expression,TRAF6 expression
[1] "PFI"
  End.point
1       PFI
                                                                                     Feature
1 Age,Gender,Tumor stage,VPS4A expression,APPL1 expression,KIF23 expression,TRAF6 expression
[1] "PFI"
  End.point
1       PFI
                                                                                     Feature
1 A

“Removed 4069 rows containing non-finite values (stat_boxplot).”


  End.point
1        OS
2       DSS
3       DFI
4       PFI
                                                                                Feature
1                                Age,Gender,Tumor stage,TGFBR1 expression,AR expression
2 Age,Gender,Tumor stage,TP53 expression,AR expression,TGFBR1 expression,MYC expression
3                                  Gender,Tumor stage,AURKA expression,KIF23 expression
4       Gender,Tumor stage,MYC deletion,MYC amplification,TP53 expression,AR expression
[1] "OS"
[1] "DSS"
[1] "DFI"
[1] "PFI"
  End.point
1        OS
2       DSS
3       DFI
4       PFI
                                                                                Feature
1                                Age,Gender,Tumor stage,TGFBR1 expression,AR expression
2 Age,Gender,Tumor stage,TP53 expression,AR expression,TGFBR1 expression,MYC expression
3                                  Gender,Tumor stage,AURKA expression,KIF23 expression
4       Gender,Tumor stage,MYC deletion,MYC ampli

“Removed 832 rows containing non-finite values (stat_boxplot).”


  End.point                       Feature
1       DFI Age,Tumor stage,AR expression
[1] "DFI"
  End.point                       Feature
1       DFI Age,Tumor stage,AR expression
[1] "DFI"
  End.point                       Feature
1       DFI Age,Tumor stage,AR expression
[1] "DFI"
  End.point                       Feature
1       DFI Age,Tumor stage,AR expression
[1] "DFI"
  End.point                       Feature
1       DFI Age,Tumor stage,AR expression
[1] "DFI"
  End.point                       Feature
1       DFI Age,Tumor stage,AR expression
[1] "DFI"
  End.point                       Feature
1       DFI Age,Tumor stage,AR expression
[1] "DFI"
  End.point                       Feature
1       DFI Age,Tumor stage,AR expression
[1] "DFI"
  End.point                       Feature
1       DFI Age,Tumor stage,AR expression
[1] "DFI"
  End.point                       Feature
1       DFI Age,Tumor stage,AR expression
[1] "DFI"
  End.point                       Feature
1       DFI Age,Tu

“Removed 65 rows containing non-finite values (stat_boxplot).”
“Unknown columns: `Tumor.stage.oth`”
“Unknown columns: `Tumor.stage.oth`”


  End.point
1        OS
2       DSS
3       PFI
                                                                                Feature
1                          Tumor stage,APPL2 expression,MYC expression,AURKA expression
2 Age,Gender,Tumor stage,APPL2 expression,MYC expression,AURKA expression,AR expression
3                                         Tumor stage,AURKA expression,APPL1 expression
[1] "OS"
[1] "DSS"
[1] "PFI"
  End.point
1        OS
2       DSS
3       PFI
                                                                                Feature
1                          Tumor stage,APPL2 expression,MYC expression,AURKA expression
2 Age,Gender,Tumor stage,APPL2 expression,MYC expression,AURKA expression,AR expression
3                                         Tumor stage,AURKA expression,APPL1 expression
[1] "OS"
[1] "DSS"
[1] "PFI"
  End.point
1        OS
2       DSS
3       PFI
                                                                                Feature
1     

In [66]:
stat.results = bind_rows(stat.results.ls, .id = "Cancer type")
write.csv(stat.results, file.path(dir.res.root, "Features_and_tumor_stage_boxplots", "Features_vs_tumor_stage_stat_results.csv"))