# Importing libraries

Because the original cancer datasets do not contain annotations, the effect of joint dimensionality reduction approaches on factors with clinical annotations or biological annotations does not need to be considered when processing such datasets. Just need to consider the effect of factors on survival.

In general, `MCIA`, `RGCCA`, and `JIVE` achieved the best performances, finding factors significantly associated with survival in seven out of ten cancer types.



In [4]:
library("RGCCA")



library("omicade4")
library("ade4")


library("r.jive")

In [None]:
runfactorization <- function(folder,file.names,num.factors,sep=" ",filtering="none"){
  factorizations<-list()
  t<-1
  method<-numeric(0)
  
  num.factors<-as.numeric(num.factors)

  
  ##creating list of omics
  omics <- list()
  for(i in 1:length(file.names)){
    omics[[i]]<-as.matrix(read.table(paste(folder,file.names[i],sep="/"),sep=sep,row.names=1,header=T))
  }
  
  ####
  #omics<-lapply(omics, function(x) t(x))
  ######
  
  ##restricting to common samples and filtering
  samples<-colnames(omics[[1]])
  for(j in 1:length(omics)){
    samples<-intersect(samples,colnames(omics[[j]]))
  }
  for(j in 1:length(omics)){
    omics[[j]]<-omics[[j]][,samples]
    if(filtering!="none"){
      x<-apply( omics[[j]],1,sd)
      x<-as.matrix(sort(x, decreasing = T))
      w<-which(x>0)
      if(filtering=="stringent"){
        selected<-rownames(x)[1:min(w[length(w)],5000)]
      }else{
        selected<-rownames(x)[1:min(w[length(w)],6000)]
      }
      m<-match(rownames(omics[[j]]),selected)
      w<-which(!is.na(m))
      omics[[j]]<-omics[[j]][w,]
    }else{
      omics[[j]]<-omics[[j]][,which(apply(omics[[j]],2,sd)>0)]
    }
  }  
  
 
  ##RGCCA 
  factorizations_RGCCA<-rgcca(lapply(omics, function(x) t(x)), ncomp = rep(num.factors, length(omics)), scheme = "centroid", scale = TRUE, init = "svd",bias = TRUE, tol = 1e-08, verbose = F)
  factors_rgcca<-as.matrix(factorizations_RGCCA$Y[[1]])
  metagenes_rgcca <- list()
  for(j in 1:length(omics)){
    metagenes_rgcca[[j]]<-as.matrix(factorizations_RGCCA$a[[j]])
    rownames(metagenes_rgcca[[j]])<-rownames(omics[[j]])
    colnames(metagenes_rgcca[[j]])<-1:num.factors
  }
  factorizations[[t]]<-list(factors_rgcca,metagenes_rgcca)
  t<-t+1
  method<-c(method,"RGCCA")
  
  ###MCIA
  omics_pos<-list()
  for(j in 1:length(omics)){
    if(min(omics[[j]])<0){
      omics_pos[[j]]<-omics[[j]]+abs(min(omics[[j]]))
    }else{
      omics_pos[[j]]<-omics[[j]]
    }
    omics_pos[[j]]<-omics_pos[[j]]/max(omics_pos[[j]])
  }
  factorizations_mcia<-mcia(omics_pos, cia.nf = num.factors)
  factors_mcia<-as.matrix(factorizations_mcia$mcoa$SynVar)
  metagenes_mcia<-list()
  for(j in 1:length(omics)){
    metagenes_mcia[[j]]<-as.matrix(factorizations_mcia$mcoa$axis[1:dim(omics[[j]])[1],])
    rownames(metagenes_mcia[[j]])<-rownames(omics[[j]])
    colnames(metagenes_mcia[[j]])<-1:num.factors
  }
  factorizations[[t]]<-list(factors_mcia,metagenes_mcia)
  t<-t+1
  method<-c(method,"MCIA")
  
  factorizations_jive<-jive(omics, rankJ=num.factors, rankA = rep(num.factors, length(omics)), method = "given", conv = "default", maxiter = 100, showProgress=FALSE)
  rankJV <- factorizations_jive$rankJ;
  rankIV.v <- factorizations_jive$rankA;
  J<-numeric(0)
  ng<-0
  metagenes_jive <- list();
  for(j in 1:length(omics)){
    J <- rbind(J,factorizations_jive$joint[[j]]);
    ng<-c(ng,dim(factorizations_jive$joint[[j]])[1])
  }
  svd.o <- svd(J);
  jV <- svd.o$v %*% diag(svd.o$d);
  for(j in 1:length(omics)){
    metagenes_jive[[j]] <- svd.o$u[(1+sum(ng[1:j])):sum(ng[1:j+1]),1:rankJV]; ###error in dimension
    rownames(metagenes_jive[[j]])<-rownames(omics[[j]])
    colnames(metagenes_jive[[j]])<-1:num.factors
  }
  factors_jive=jV[,1:rankJV]
  rownames(factors_jive)<-colnames(omics[[1]])
  colnames(factors_jive)<-1:num.factors
  factorizations[[t]]<-list(factors_jive,metagenes_jive)
  t<-t+1
  method<-c(method,"JIVE")
                                     
  out<-list(factorizations=factorizations,method=method)
  
  return(out)
}

`BIC` = Base de données de sein

`SKCM` = Ensemble de données sur le mélanome

`OV` = Ensemble de données de l'ovaire

In [None]:
cancers <- c('./data/cancer/breast',
             './data/cancer/melanoma',
             './data/cancer/ovarian')

In [None]:
# Label to identify current run
tag <- format(Sys.time(), "%Y%m%d%H%M%S")
# Folder for comparison results
results_folder <- paste0("./results", tag, "/")
# Create output folder
dir.create(results_folder, showWarnings = FALSE)

In [None]:
## Support function to apply log2(+1) to a matrix
log2matrix <- function(folder, file.name){
    # Read table
    data <- as.matrix(read.table(paste(folder,file.name,sep="/"),sep=" ",row.names=1,header=TRUE))
    # Apply transformation
    data <- log2(data+1)
    # Output file name
    output <- paste(folder,paste0("log_",file.name), sep="/")
    # Export transformed data
    write.table(data,output,sep=" ", col.names=TRUE, row.names=TRUE)
    # ?
    system(paste("sed -i '1s/^/probe\t/'", output, sep=" "))
}

In [None]:
num.factors <- 2

In [None]:
for(i in cancers){

    print(paste0("Now analysing ", i))
    
    # Name of current cancer
    current_cancer <- basename(i)

    # If the expression and miRNA data are not log2-transformed as for those provided by XX et al.
    log2matrix(i,"exp")
    log2matrix(i,"mirna")

    # Perform factorisation
    print("Running factorisation...")
    out <- runfactorization(i, c("log_exp","methy","log_mirna"), num.factors, sep=" ", filtering="sd")
    save(out, file=paste0(results_folder, current_cancer, "results_out.rds"))
    
}