# NAGuide R methods

Setup basic methods and packages used for all methods

- BiocManager could be moved to methods who are installed from BioConductor

In [1]:
packages_base_R <-
  c("BiocManager", "reshape2", "data.table", "readr", "tibble")

install_rpackage  <- function(pkg) {
  # If not installed, install the package
  if (!require(pkg, character.only = TRUE)) {
    install.packages(pkg)
    library(pkg, character.only = TRUE)
  }
  
}

# used in the large imputation function for two packages
install_bioconductor  <- function(pkg) {
  # If not installed, install the package
  if (!require(pkg, character.only = TRUE)) {
    BiocManager::install(pkg)
    library(pkg, character.only = TRUE)
  }
  
}


for (package in packages_base_R) {
  # Check if the package is already installed
  install_rpackage(pkg = package)
}


Loading required package: BiocManager



Bioconductor version '3.18' is out-of-date; the current release version '3.19'
  is available with R version '4.4'; see https://bioconductor.org/install



Loading required package: reshape2



Loading required package: data.table



“package ‘data.table’ was built under R version 4.3.3”



Attaching package: ‘data.table’




The following objects are masked from ‘package:reshape2’:

    dcast, melt




Loading required package: readr



“package ‘readr’ was built under R version 4.3.2”


Loading required package: tibble



setup can be tricky... trying to integrate as much as possible into conda environment

Copied from [NAGuideR's github](https://github.com/wangshisheng/NAguideR/blob/15ec86263d5821990ad39a8d9f378cf4d76b25fb/inst/NAguideRapp/app.R#L1705-L1849) RShiny application. Adapted to run as standalone function in context of the Snakemake workflow.

- `df` and `df1` ?
- seems quite hacky
- code is only slightly adapted from repo to run here, mainly to install packages on the fly

In [2]:
nafunctions <- function(x, method = "zero") {
  df <- df1 <- as.data.frame(x)
  method <- tolower(method)
  if (method == "zero") {
    df[is.na(df)] <- 0
  }
  else if (method == "minimum") {
    df[is.na(df)] <- min(df1, na.rm = TRUE)
  }
  else if (method == "colmedian") {
    install_rpackage('e1071')
    df <- impute(df1, what = "median")
  }
  else if (method == "rowmedian") {
    install_rpackage('e1071')
    dfx <- impute(t(df1), what = "median")
    df <- t(dfx)
  }
  else if (method == "knn_impute") {
    install_bioconductor('impute')
    data_zero1 <-
      impute.knn(as.matrix(df1),
                 k = 10,
                 rowmax = 1,
                 colmax = 1)#rowmax = 0.9, colmax = 0.9
    df <- data_zero1$data
  }
  else if (method == "seqknn") {
    if (!require(SeqKnn)) {
      install.packages("src/R_NAGuideR/SeqKnn_1.0.1.tar.gz",
                       repos = NULL,
                       type = "source")
      library(SeqKnn)
    }
    df <- SeqKNN(df1, k = 10)
  }
  else if (method == "bpca") {
    install_bioconductor('pcaMethods')
    data_zero1 <-
      pcaMethods::pca(
        as.matrix(df1),
        nPcs = ncol(df1) - 1,
        method = "bpca",
        maxSteps = 100
      )
    df <- completeObs(data_zero1)
  }
  else if (method == "svdmethod") {
    install_bioconductor('pcaMethods')
    data_zero1 <-
      pcaMethods::pca(as.matrix(df1),
                      nPcs = ncol(df1) - 1,
                      method = "svdImpute")
    df <- completeObs(data_zero1)
  }
  else if (method == "lls") {
    install_bioconductor('pcaMethods')
    data_zero1 <- llsImpute(t(df1), k = 10)
    df <- t(completeObs(data_zero1))
  }
  else if (method == "mle") {
    install_rpackage('norm')
    xxm <- as.matrix(df1)
    ss <- norm::prelim.norm(xxm)
    thx <- norm::em.norm(ss)
    norm::rngseed(123)
    df <- norm::imp.norm(ss, thx, xxm)
  }
  else if (method == "qrilc") {
    install_bioconductor("impute")
    install_bioconductor("pcaMethods")
    install_rpackage('imputeLCMD')
    xxm <- t(df1)
    data_zero1 <-
      imputeLCMD::impute.QRILC(xxm, tune.sigma = 1)[[1]]
    df <- t(data_zero1)
  }
  else if (method == "mindet") {
    install_bioconductor("impute")
    install_bioconductor("pcaMethods")
    install_rpackage('imputeLCMD')
    xxm <- as.matrix(df1)
    df <- imputeLCMD::impute.MinDet(xxm, q = 0.01)
  }
  else if (method == "minprob") {
    install_bioconductor("impute")
    install_bioconductor("pcaMethods")
    install_rpackage('imputeLCMD')
    xxm <- as.matrix(df1)
    df <-
      imputeLCMD::impute.MinProb(xxm, q = 0.01, tune.sigma = 1)
  }
  else if (method == "irm") {
    install_rpackage('VIM')
    df <- irmi(df1, trace = TRUE, imp_var = FALSE)
    rownames(df) <- rownames(df1)
  }
  else if (method == "impseq") {
    install_rpackage('rrcovNA')
    df <- impSeq(df1)
  }
  else if (method == "impseqrob") {
    install_rpackage('rrcovNA')
    data_zero1 <- impSeqRob(df1, alpha = 0.9)
    df <- data_zero1$x
  }
  else if (method == "mice-norm") {
    install_rpackage('mice')
    minum <- 5
    datareadmi <- mice(df1,
                       m = minum,
                       seed = 1234,
                       method = "norm")
    newdatareadmi <- 0
    for (i in 1:minum) {
      newdatareadmi <- complete(datareadmi, action = i) + newdatareadmi
    }
    df <- newdatareadmi / minum
    rownames(df) <- rownames(df1)
  }
  else if (method == "mice-cart") {
    install_rpackage('mice')
    minum <- 5
    datareadmi <- mice(df1,
                       m = minum,
                       seed = 1234,
                       method = "cart")
    newdatareadmi <- 0
    for (i in 1:minum) {
      newdatareadmi <- complete(datareadmi, action = i) + newdatareadmi
    }
    df <- newdatareadmi / minum
    rownames(df) <- rownames(df1)
  }
  else if (method == "trknn") {
    source('src/R_NAGuideR/Imput_funcs.r')
    # sim_trKNN_wrapper <- function(data) {
    #   result <- data %>% as.matrix %>% t %>% imputeKNN(., k=10, distance='truncation', perc=0) %>% t
    #   return(result)
    # }
    # df1x <- sim_trKNN_wrapper(t(df1))
    # df<-as.data.frame(t(df1x))
    df <-
      imputeKNN(as.matrix(df),
                k = 10,
                distance = 'truncation',
                perc = 0)
    df <- as.data.frame(df)
  }
  else if (method == "rf") {
    install_rpackage("missForest")
    data_zero1 <- missForest(
      t(df1),
      maxiter = 10,
      ntree = 20 # input$rfntrees
      ,
      mtry = floor(nrow(df1) ^ (1 / 3)),
      verbose = TRUE
    )
    df <- t(data_zero1$ximp)
  }
  else if (method == "pi") {
    width <- 0.3 # input$piwidth
    downshift <- 1.8 # input$pidownshift
    for (i in 1:ncol(df1)) {
      temp <- df1[[i]]
      if (sum(is.na(temp)) > 0) {
        temp.sd <- width * sd(temp[!is.na(temp)], na.rm = TRUE)
        temp.mean <-
          mean(temp[!is.na(temp)], na.rm = TRUE) - downshift * sd(temp[!is.na(temp)], na.rm = TRUE)
        n.missing <- sum(is.na(temp))
        temp[is.na(temp)] <-
          rnorm(n.missing, mean = temp.mean, sd = temp.sd)
        df[[i]] <- temp
      }
    }
    df
  }
  # else if(method=="grr"){
  #   library(DreamAI)
  #   df<-impute.RegImpute(data=as.matrix(df1), fillmethod = "row_mean", maxiter_RegImpute = 10,conv_nrmse = 1e-03)
  # }
  else if (method == "gms") {
    # install.packages('GMSimpute')
    if (!require(GMSimpute)) {
      install.packages(
        "src/R_NAGuideR/GMSimpute_0.0.1.1.tar.gz",
        repos = NULL,
        type = "source"
      )
      
      library(GMSimpute)
    }
    
    df <- GMS.Lasso(df1,
                    nfolds = 3,
                    log.scale = FALSE,
                    TS.Lasso = TRUE)
  }
  else if (method == "msimpute") {
    install_bioconductor("msImpute")
    df <- msImpute(as.matrix(df),
                   method = 'v2')
    df <- as.data.frame(df)
  }
  else if (method == "msimpute_mnar") {
    install_bioconductor("msImpute")
    df <-
      msImpute(as.matrix(df),
               method = 'v2-mnar',
               group = rep(1, dim(df)[2]))
    df <- as.data.frame(df)
  }
  else if (method == "gsimp") {
    options(stringsAsFactors = F)
    # dependencies parly for sourced file
    
    install_bioconductor("impute")
    install_bioconductor("pcaMethods")
    install_rpackage('imputeLCMD')
    install_rpackage("magrittr")
    install_rpackage("glmnet")
    install_rpackage("abind")
    install_rpackage("foreach")
    install_rpackage("doParallel")
    source('src/R_NAGuideR/GSimp.R')
    
    # wrapper function with data pre-processing
    pre_processing_GS_wrapper <- function(data_raw_log) {
      # samples in rows, features in columns #
      # Initialization #
      data_raw_log_qrilc <- as.data.frame(data_raw_log) %>%
        impute.QRILC() %>% extract2(1)
      # Centralization and scaling #
      data_raw_log_qrilc_sc <-
        scale_recover(data_raw_log_qrilc, method = 'scale')
      # Data after centralization and scaling #
      data_raw_log_qrilc_sc_df <- data_raw_log_qrilc_sc[[1]]
      # Parameters for centralization and scaling (for scaling recovery) #
      data_raw_log_qrilc_sc_df_param <- data_raw_log_qrilc_sc[[2]]
      # NA position #
      NA_pos <- which(is.na(data_raw_log), arr.ind = T)
      # NA introduced to log-scaled-initialized data #
      data_raw_log_sc <- data_raw_log_qrilc_sc_df
      data_raw_log_sc[NA_pos] <- NA
      # Feed initialized and missing data into GSimp imputation #
      result <-
        data_raw_log_sc %>% GS_impute(
          .,
          iters_each = 50,
          iters_all = 10,
          initial = data_raw_log_qrilc_sc_df,
          lo = -Inf,
          hi = 'min',
          n_cores = 1,
          imp_model = 'glmnet_pred'
        )
      data_imp_log_sc <- result$data_imp
      # Data recovery #
      data_imp <- data_imp_log_sc %>%
        scale_recover(., method = 'recover',
                      param_df = data_raw_log_qrilc_sc_df_param) %>%
        extract2(1)
      return(data_imp)
    }
    df <- t(df) # samples in rows, feature in columns
    df <- pre_processing_GS_wrapper(df)
    df <- t(df) # features in rows, samples in columns
    
  }
  else{
    stop(paste("Unspported methods so far: ", method))
  }
  df <- as.data.frame(df)
  df
}

## Parameters

Choose one of the available methods. 
Some methods might fail for your dataset for unknown reasons
(and the error won't always be easy to understand)
```method
method = 'ZERO'
method = 'MINIMUM'
method = 'COLMEDIAN'
method = 'ROWMEDIAN'
method = 'KNN_IMPUTE'
method = 'SEQKNN'
method = 'BPCA'
method = 'SVDMETHOD'
method = 'LLS'
method = 'MLE'
mehtod = 'LLS'
method = 'QRILC'
method = 'MINDET'
method = 'MINPROB'
method = 'IRM'
method = 'IMPSEQ'
method = 'IMPSEQROB'
method = 'MICE-NORM'
method = 'MICE-CART'
method = 'RF'
method = 'PI'
method = 'GMS'
method = 'TRKNN',
method = 'MSIMPUTE'
method = 'MSIMPUTE_MNAR'
method = 'GSIMP'
```

In [3]:
train_split = 'runs/example/data/data_wide_sample_cols.csv' # test
folder_experiment = 'runs/example/'
method = 'KNN_IMPUTE'

In [4]:
# Parameters
train_split = "runs/dev_dataset_small/proteinGroups_N50/data/data_wide_sample_cols.csv"
method = "IMPSEQROB"
folder_experiment = "runs/dev_dataset_small/proteinGroups_N50"


## Dump predictions

In [5]:
df <-
  utils::read.csv(
    train_split,
    row.names = 1,
    header = TRUE,
    stringsAsFactors = FALSE
  )
df

Unnamed: 0_level_0,X2019_12_18_14_35_Q.Exactive.HF.X.Orbitrap_6070,X2019_12_19_19_48_Q.Exactive.HF.X.Orbitrap_6070,X2019_12_20_14_15_Q.Exactive.HF.X.Orbitrap_6070,X2019_12_27_12_29_Q.Exactive.HF.X.Orbitrap_6070,X2019_12_29_15_06_Q.Exactive.HF.X.Orbitrap_6070,X2019_12_29_18_18_Q.Exactive.HF.X.Orbitrap_6070,X2020_01_02_17_38_Q.Exactive.HF.X.Orbitrap_6070,X2020_01_03_11_17_Q.Exactive.HF.X.Orbitrap_6070,X2020_01_03_16_58_Q.Exactive.HF.X.Orbitrap_6070,X2020_01_03_20_10_Q.Exactive.HF.X.Orbitrap_6070,⋯,X2020_05_20_12_33_Q.Exactive.HF.X.Orbitrap_6070,X2020_05_20_15_35_Q.Exactive.HF.X.Orbitrap_6070,X2020_05_22_14_57_Q.Exactive.HF.X.Orbitrap_6070,X2020_05_22_17_43_Q.Exactive.HF.X.Orbitrap_6070,X2020_05_26_14_20_Q.Exactive.HF.X.Orbitrap_6070,X2020_05_27_13_57_Q.Exactive.HF.X.Orbitrap_6070,X2020_05_28_04_06_Q.Exactive.HF.X.Orbitrap_6070,X2020_06_01_10_22_Q.Exactive.HF.X.Orbitrap_6070,X2020_06_01_15_41_Q.Exactive.HF.X.Orbitrap_6070,X2020_06_02_09_41_Q.Exactive.HF.X.Orbitrap_6070
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
AAAS,28.34929,27.65738,28.35216,26.82554,27.40365,27.89128,25.49826,27.35187,27.61975,27.29981,⋯,27.39591,27.72123,27.80753,28.05150,27.32457,29.11944,30.08030,27.29815,27.12110,29.03787
AACS,26.13316,25.01865,23.74047,,26.94849,26.48102,,,25.62377,,⋯,25.55151,24.91598,24.71392,25.60834,26.80045,27.06147,27.37294,,,25.98913
AAMDC,,24.23623,,,23.86439,26.34755,,,23.52037,25.66036,⋯,,,,26.03032,25.51860,25.98906,26.75977,,,25.12017
AAMP,26.77693,26.27071,27.09788,,26.98163,27.84942,,25.27523,27.13555,,⋯,25.74060,,25.85700,,25.20408,27.99863,27.60088,28.31847,27.96802,26.94369
AAR2,27.24780,,27.37741,,26.51979,26.91696,,24.84589,25.97128,26.89654,⋯,25.89322,26.72637,25.69277,25.53641,27.26284,28.29443,28.02290,,26.70715,28.08796
AARS,32.19493,,32.88453,31.92642,31.84382,32.27373,30.22561,30.97931,31.41538,,⋯,31.67955,32.36784,32.39376,32.59986,33.17150,34.44770,34.62178,31.39810,31.95802,33.22043
AARS2,27.15258,26.53020,27.51450,26.15688,25.34211,,,,25.35964,25.43692,⋯,25.82221,27.01489,27.61919,27.50120,28.12450,29.08768,,23.91734,,27.45544
AASDHPPT,27.87209,28.19145,28.47564,27.63492,27.41643,27.40406,23.80134,24.89296,,26.81348,⋯,27.15808,27.80710,27.15258,28.50571,28.38854,30.01491,30.31603,,26.21932,28.75856
AATF,28.60254,27.94190,28.77087,27.85085,27.47414,28.08106,,25.32382,25.75001,26.20075,⋯,27.41191,27.21706,28.06625,27.84554,28.40880,29.82548,29.11389,,,28.67570
ABCB10,26.11028,25.73490,26.78681,25.34604,,,,,,,⋯,25.62856,27.04770,25.98811,25.77871,27.04978,28.31069,29.00220,,,27.16019


- `data.frame` does not allow abritary column names, but only valid column names...
- tibbles don't support rownames, and the imputation methods rely on normal `data.frame`s.
Save the header row for later use.

In [6]:
original_header <- colnames(readr::read_csv(
  train_split,
  n_max = 1,
  col_names = TRUE,
  skip = 0
))
feat_name <- original_header[1]
original_header[1:5]

[1mRows: [22m[34m1[39m [1mColumns: [22m[34m51[39m


[36m──[39m [1mColumn specification[22m [36m────────────────────────────────────────────────────────[39m
[1mDelimiter:[22m ","
[31mchr[39m  (1): Gene Names
[32mdbl[39m (45): 2019_12_18_14_35_Q-Exactive-HF-X-Orbitrap_6070, 2019_12_19_19_48_Q...
[33mlgl[39m  (5): 2020_01_04_04_23_Q-Exactive-HF-X-Orbitrap_6070, 2020_01_04_10_03_Q...



[36mℹ[39m Use `spec()` to retrieve the full column specification for this data.
[36mℹ[39m Specify the column types or set `show_col_types = FALSE` to quiet this message.


Uncomment to test certain methods (only for debugging, as at least one method per package is tested using Github Actions)

In [7]:
# to_test <- c(
# 'ZERO',
# 'MINIMUM',
# 'COLMEDIAN',
# 'ROWMEDIAN',
# 'KNN_IMPUTE',
# 'SEQKNN',
# 'BPCA',
# 'SVDMETHOD',
# 'LLS',
# 'MLE',
# 'LLS',
# 'QRILC',
# 'MINDET',
# 'MINPROB',
# 'IRM',
# 'IMPSEQ',
# 'IMPSEQROB',
# 'MICE-NORM',
# 'MICE-CART',
# 'RF',
# 'PI',
# 'GMS', # fails to install on Windows
# 'TRKNN',
# 'MSIMPUTE'
# 'MSIMPUTE_MNAR'
# 'GSIMP'
# )

# for (method in to_test) {
#     print(method)
#     pred <- nafunctions(df, method)
# }

Impute and save predictions with original feature and column names

In [8]:
pred <- nafunctions(df, method)
pred <- tibble::as_tibble(cbind(rownames(pred), pred))
names(pred) <- original_header
pred

Loading required package: rrcovNA



“there is no package called ‘rrcovNA’”


Updating HTML index of packages in '.Library'



Making 'packages.html' ...


 done



Loading required package: rrcov



“package ‘rrcov’ was built under R version 4.3.2”


Loading required package: robustbase



“package ‘robustbase’ was built under R version 4.3.2”


Scalable Robust Estimators with High Breakdown Point (version 1.7-5)




Scalable Robust Estimators with High Breakdown Point for
Incomplete Data (version 0.5-1)




“NAs introduced by coercion to integer range”


Gene Names,2019_12_18_14_35_Q-Exactive-HF-X-Orbitrap_6070,2019_12_19_19_48_Q-Exactive-HF-X-Orbitrap_6070,2019_12_20_14_15_Q-Exactive-HF-X-Orbitrap_6070,2019_12_27_12_29_Q-Exactive-HF-X-Orbitrap_6070,2019_12_29_15_06_Q-Exactive-HF-X-Orbitrap_6070,2019_12_29_18_18_Q-Exactive-HF-X-Orbitrap_6070,2020_01_02_17_38_Q-Exactive-HF-X-Orbitrap_6070,2020_01_03_11_17_Q-Exactive-HF-X-Orbitrap_6070,2020_01_03_16_58_Q-Exactive-HF-X-Orbitrap_6070,⋯,2020_05_20_12_33_Q-Exactive-HF-X-Orbitrap_6070,2020_05_20_15_35_Q-Exactive-HF-X-Orbitrap_6070,2020_05_22_14_57_Q-Exactive-HF-X-Orbitrap_6070,2020_05_22_17_43_Q-Exactive-HF-X-Orbitrap_6070,2020_05_26_14_20_Q-Exactive-HF-X-Orbitrap_6070,2020_05_27_13_57_Q-Exactive-HF-X-Orbitrap_6070,2020_05_28_04_06_Q-Exactive-HF-X-Orbitrap_6070,2020_06_01_10_22_Q-Exactive-HF-X-Orbitrap_6070,2020_06_01_15_41_Q-Exactive-HF-X-Orbitrap_6070,2020_06_02_09_41_Q-Exactive-HF-X-Orbitrap_6070
<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
ACP1,29.31907,29.91114,30.73497,29.64940,29.47601,29.64964,28.23945,28.17679,28.19737,⋯,29.32352,29.16698,29.17474,29.68679,30.19906,30.81176,31.38376,28.20378,29.39425,30.23706
CAP1,31.28444,31.43926,31.78722,31.07744,33.59602,33.90428,31.79091,32.50961,33.39254,⋯,31.80031,32.11954,30.92896,31.61899,32.22257,33.30946,33.83253,32.73927,34.10095,32.34982
CAPRIN1,31.67367,31.30356,32.08819,31.18323,31.41715,32.45403,30.16027,30.52085,31.81520,⋯,31.83726,31.50720,31.19777,31.53733,32.61421,33.55568,33.68248,30.66246,31.90824,32.59127
CBX5,30.14783,29.14131,29.72795,28.63831,31.11092,31.23814,28.87847,30.61728,28.91157,⋯,28.97443,29.22430,28.76655,29.54460,30.20711,31.00631,31.45420,29.88330,30.99375,29.97924
CLPB,28.76113,28.91559,30.02049,29.26774,27.69664,28.31700,24.76415,25.83026,27.49793,⋯,28.68083,29.30254,29.34925,29.58751,29.92550,31.45160,31.71260,26.67654,27.40908,30.23341
DDX24,29.13775,28.27115,29.16027,28.11793,29.02369,28.90101,26.16576,26.82153,28.99566,⋯,28.55300,28.63575,28.00158,28.95110,28.91068,30.65215,30.81582,27.97323,28.27702,29.24563
FAM49B,29.03970,30.34064,30.37700,29.16746,29.50841,29.45236,26.69059,29.13844,28.70586,⋯,27.97323,29.22320,28.93469,29.39346,30.37648,31.44929,31.62875,28.16629,29.71270,30.42842
GCLM,29.56260,29.72491,30.49581,29.61800,30.13858,30.82031,28.43678,29.53380,29.71820,⋯,29.10104,29.72162,29.40337,29.93846,30.66348,31.82483,31.56955,29.11456,28.94673,30.41397
HEXB,28.49140,28.28171,29.52916,28.40141,29.05251,29.50444,27.64293,28.10475,28.29557,⋯,27.49297,28.16658,28.47603,28.51552,29.45513,30.32582,30.15641,28.78650,29.69349,29.71360
HLA-A,30.10986,30.01465,30.06933,28.97812,31.11942,31.23900,29.01510,30.00075,29.42170,⋯,30.26696,30.19131,29.55461,30.25101,30.52141,31.79943,32.00320,29.34223,29.58404,28.84649


In [9]:
pred <- reshape2::melt(pred, id.vars = feat_name)
names(pred) <- c(feat_name, 'Sample ID', method)
pred <- pred[reshape2::melt(is.na(df))['value'] == TRUE, ]
pred

Unnamed: 0_level_0,Gene Names,Sample ID,IMPSEQROB
Unnamed: 0_level_1,<chr>,<fct>,<dbl>
3,CAPRIN1,2019_12_18_14_35_Q-Exactive-HF-X-Orbitrap_6070,31.67367
12,MAPK1,2019_12_18_14_35_Q-Exactive-HF-X-Orbitrap_6070,28.68819
14,NUP35,2019_12_18_14_35_Q-Exactive-HF-X-Orbitrap_6070,27.60837
15,PAFAH1B2,2019_12_18_14_35_Q-Exactive-HF-X-Orbitrap_6070,29.69239
17,PUS7,2019_12_18_14_35_Q-Exactive-HF-X-Orbitrap_6070,29.82748
26,USP39,2019_12_18_14_35_Q-Exactive-HF-X-Orbitrap_6070,29.98929
28,ABCF3,2019_12_18_14_35_Q-Exactive-HF-X-Orbitrap_6070,28.83470
39,ATP5F1,2019_12_18_14_35_Q-Exactive-HF-X-Orbitrap_6070,30.31883
41,BANF1,2019_12_18_14_35_Q-Exactive-HF-X-Orbitrap_6070,31.75955
44,BTF3,2019_12_18_14_35_Q-Exactive-HF-X-Orbitrap_6070,30.48827


In [10]:
dim(pred)

In [11]:
fname = file.path(folder_experiment,
                  'preds',
                  paste0('pred_all_', toupper(method), '.csv'))
fname

In [12]:
write_csv(pred, path = fname)

“[1m[22mThe `path` argument of `write_csv()` is deprecated as of readr 1.4.0.
[36mℹ[39m Please use the `file` argument instead.”
