In [1]:
#export 
suppressMessages(source("exp/nb_04.R"))

“package ‘survival’ was built under R version 4.1.1”


In [2]:
#export
library(mboost)

Loading required package: stabs


Attaching package: ‘stabs’


The following object is masked from ‘package:randomForestSRC’:

    subsample


The following object is masked from ‘package:mlr’:

    subsample



Attaching package: ‘mboost’


The following object is masked from ‘package:tidyr’:

    extract


The following object is masked from ‘package:ggplot2’:

    %+%




## Meta-Learner

In [3]:
#export
learning_algs =   c("Cox_PH_model",
                    "Ridge",
                    "Elastic_Net",
                    "Lasso",
                    "Gradient_Boosting_tree_based",
                    "Gradient_Boosting_linear_model_based",
                    "Random_Survival_Forests",
                    "Maximally_selected_rank_statistics_Random_Forests",
                    "Survival_Tree"
                   )

In [4]:
#export
learners = list(makeLearner("surv.coxph",           id = learning_algs[[1]]),
                makeLearner("surv.cvglmnet",        id = learning_algs[[2]], alpha = 0,   nfolds=20),
                makeLearner("surv.cvglmnet",        id = learning_algs[[3]], alpha = 0.5, nfolds=20, s="lambda.min"),
                makeLearner("surv.cvglmnet",        id = learning_algs[[4]], alpha = 1,   nfolds=20, s="lambda.min"),
                makeLearner("surv.gamboost",        id = learning_algs[[5]], baselearner = "bols" ),
                makeLearner("surv.gamboost",        id = learning_algs[[6]], baselearner = "btree"),
                makeLearner("surv.randomForestSRC", id = learning_algs[[7]]),
                makeLearner("surv.ranger",          id = learning_algs[[8]]),
                makeLearner("surv.rpart",           id = learning_algs[[9]])
               )
names(learners) = learning_algs

In [5]:
#export
MetaLearner = function(df, learners){list(
    resamplers  = map(learners, 
                      purrr::partial(mlr::resample,
                                     task       = makeSurvTask(data = df, target = c("survival_time", "event")),
                                     resampling = makeResampleDesc("CV", iters = 5),
                                     show.info  = FALSE)))}

In [6]:
#export
MetaLearner = purrr::partial(MetaLearner, learners = learners)

In [7]:
#export
get_c_index = function(resampler){
    #values below 0.5 are good antipredictiors: convert them into predictors
    mean(abs(resampler$measures.test$cindex - 0.5) + 0.5, na.rm = TRUE)
}

In [8]:
#export
get_c_index_for_algs = function(resampler){
    l = map(resampler, get_c_index)
    df = data.frame(matrix(unlist(l), ncol=length(l), byrow=FALSE),stringsAsFactors=FALSE)
    names(df) = names(l)
    df
}

In [9]:
#export
complete_df = function(df){df[complete.cases(df), ]}

In [10]:
add_col = function(df){
    if(dim(df)[2]==3){
        df$tmp = df[,3]
        return(df)
    }
    else(return(df))
}

## Load data

In [11]:
dfs_random             = readRDS(paste(data_path,"dfs_random.rds", sep=""))
dfs_signatures         = readRDS(paste(data_path,"dfs_signatures.rds", sep=""))
dfs_feature_selections = readRDS(paste(data_path,"dfs_feature_selections.rds", sep=""))
dfs_npi                = readRDS(paste(data_path,"dfs_npi.rds", sep=""))

dfs = dfs_random

In [12]:
data_path = '../data/metalearners/'

#### Remove rows with NAs in some dataframes:

In [13]:
for (i in c(6,7)){
    dfs_random[[i]]                 = map(dfs_random[[i]], complete_df)
    dfs_signatures[[i]]             = map(dfs_signatures[[i]], complete_df)
    dfs_feature_selections[[i]] = map(dfs_feature_selections[[i]], complete_df)
    }
dfs_npi[[6]] = complete_df(dfs_npi[[6]])

#### A trick to make algs work with dfs 

In [14]:
for (i in 1:8) {dfs_signatures[[i]] = map(dfs_signatures[[i]], add_col)}

### RUN

In [15]:
ν = 1:length(dfs_npi)

#### NPI

In [23]:
for (i in ν[-c(7)]){     #TCGA (i = 7) does not have NPIs!
    print(i)
    df = dfs_npi[[i]]
    df$dummy = df$npi    #a workaround for using glmnet
    ML = MetaLearner(df)
    saveRDS(ML, paste(data_path,"metalearners_npi_",names(dfs)[[i]],".rds", sep=""))
}

[1] 1
[1] 2
[1] 3
[1] 4
[1] 5
[1] 6
[1] 8


#### State-of-the-art selections

In [17]:
df = dfs_feature_selections[[1]]

In [25]:
for (i in ν){
    print(i)
    ML = map(dfs_feature_selections[[i]], MetaLearner)
    saveRDS(ML, paste(data_path,"metalearners_feature_selections_",names(dfs)[[i]],".rds", sep=""))
}

In [26]:
ML = map(dfs_feature_selections[[8]], MetaLearner)
saveRDS(ML, paste(data_path,"metalearners_feature_selections_",names(dfs)[[i]],".rds", sep=""))

#### Random

In [27]:
for (i in ν){
    print(i)
    ML = map(dfs_random[[i]], MetaLearner)
    saveRDS(ML, paste(data_path,"metalearners_random_",names(dfs)[[i]],".rds", sep=""))
}

[1] 1
[1] 2
[1] 3
[1] 4
[1] 5
[1] 6
[1] 7
[1] 8


In [29]:
names(dfs)

#### Signatures aka gene lists

In [30]:
#for (i in c(1,2,3,4,5,6,7,8)){ #not working: 6, 8
#    tryCatch({
#    print(i)
#    ML = map(dfs_signatures[[i]], MetaLearner)
#    saveRDS(ML, paste(data_path,"metalearners_signatures_",names(dfs)[[i]],".rds", sep=""))
#        }, error=function(e){})
#}

In [16]:
ML = map(dfs_signatures[[8]], MetaLearner)
saveRDS(ML, paste(data_path,"metalearners_signatures_",names(dfs)[[8]],".rds", sep=""))

## Export code

In [11]:
system("python3 notebook2script.py 05_ml.ipynb")