Skip to content

Commit

Permalink
modify ignore files
Browse files Browse the repository at this point in the history
  • Loading branch information
JEFworks committed Jun 9, 2018
1 parent 78cee3c commit 80c6d5a
Show file tree
Hide file tree
Showing 16 changed files with 104 additions and 55 deletions.
11 changes: 11 additions & 0 deletions .Rbuildignore
@@ -0,0 +1,11 @@
^.*\.Rproj$
^.*\.DS_Store$
\.tar\.gz$
^\.Rproj\.user$
^.travis\.yml$
^\.github
^vignettes/cache
^vignettes/figures
^cran_build_instructions.md$
^README.md
^images/*
4 changes: 2 additions & 2 deletions .gitignore
@@ -1,7 +1,6 @@
*~
R/*~
.DS_Store
.Rbuildignore
.Rhistory
.Rproj.user
.RData
Expand All @@ -12,4 +11,5 @@ src/*.so
*_bak
.*_bak
inst/doc
test/*
test/*
*.tar.gz
6 changes: 2 additions & 4 deletions DESCRIPTION
Expand Up @@ -2,17 +2,15 @@ Package: liger
Type: Package
Title: Lightweight Iterative Geneset Enrichment in R
Version: 0.1
Author: Jean Fan [aut, cre], Peter Kharchenko [aut]
Authors@R: c(person("Jean", "Fan", role = c("aut", "cre"), email = "jeanfan@fas.harvard.edu", comment = c(ORCID = "0000-0002-0212-5451")), person("Peter", "Kharchenko", role = "aut", email = "Peter_Kharchenko@hms.harvard.edu"))
Maintainer: Jean Fan <jeanfan@fas.harvard.edu>
Description: Gene Set Enrichment Analysis (GSEA) is a computational method that determines whether an a priori defined set of genes shows statistically significant, concordant differences between two biological states. The original algorithm is detailed in Subramanian, Tamayo, et al. with Java implementations available through the Broad Institute. The liger package provides a lightweight R implementation of this enrichment test on a list of values. Given a list of values, such as p-values or log-fold changes derived from differential expression analysis or other analyses comparing biological states, this package enables you to test a priori defined set of genes for enrichment to enable interpretability of highly significant or high fold-change genes.
License: GPL-3 | file LICENSE
LazyData: TRUE
Depends: R (>= 2.10)
Imports: graphics, stats, Rcpp, matrixStats, parallel
LinkingTo: Rcpp, RcppArmadillo
Suggests: knitr
Suggests: knitr, rmarkdown
VignetteBuilder: knitr
URL: https://github.com/JEFworks/liger
BugReports: https://github.com/JEFworks/liger/issues
RoxygenNote: 6.0.1
RoxygenNote: 6.0.1
5 changes: 3 additions & 2 deletions NAMESPACE
@@ -1,6 +1,7 @@
useDynLib(liger)
importFrom(Rcpp, evalCpp)
# Generated by roxygen2: do not edit by hand

export(bulk.gsea)
export(gsea)
export(iterative.bulk.gsea)
importFrom(Rcpp,evalCpp)
useDynLib(liger)
8 changes: 4 additions & 4 deletions R/RcppExports.R
@@ -1,11 +1,11 @@
# Generated by using Rcpp::compileAttributes() -> do not edit by hand
# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393

gseaRandCore <- function(sset, eso, nsamples, seed) {
.Call('_liger_gseaRandCore', PACKAGE = 'liger', sset, eso, nsamples, seed)
gseaRandCore <- function(sset, eso, nsamples) {
.Call('_liger_gseaRandCore', PACKAGE = 'liger', sset, eso, nsamples)
}

gseaBulkCore <- function(setm, eso, nsamples, seed) {
.Call('_liger_gseaBulkCore', PACKAGE = 'liger', setm, eso, nsamples, seed)
gseaBulkCore <- function(setm, eso, nsamples) {
.Call('_liger_gseaBulkCore', PACKAGE = 'liger', setm, eso, nsamples)
}

29 changes: 18 additions & 11 deletions R/functions.R
Expand Up @@ -10,7 +10,7 @@
#' @param return.details whether to return extended details (default: FALSE)
#' @param quantile.threshold threshold used (default: min(100/n.rand,0.1))
#' @param random.seed random seed (default: 1)
#' @param mc.cores number of cores for parallel processing (default: 2)
#' @param mc.cores number of cores for parallel processing (default: 1)
#'
#' @examples
#' data("org.Hs.GO2Symbol.list")
Expand All @@ -20,11 +20,12 @@
#' vals <- rnorm(length(universe), 0, 10)
#' names(vals) <- universe
#' vals[gs] <- rnorm(length(gs), 100, 10)
#' gsea(values=vals, geneset=gs, mc.cores=1) # test obviously enriched set
#' # test obviously enriched set, reduce n.rand for speed
#' gsea(values=vals, geneset=gs, mc.cores=1, n.rand=100)
#'
#' @export
#'
gsea <- function(values, geneset, power=1, rank=FALSE, weight=rep(1,length(values)), n.rand=1e4, plot=TRUE, return.details=FALSE, quantile.threshold=min(100/n.rand,0.1), random.seed=1, mc.cores=2) {
gsea <- function(values, geneset, power=1, rank=FALSE, weight=rep(1,length(values)), n.rand=1e4, plot=TRUE, return.details=FALSE, quantile.threshold=min(100/n.rand,0.1), random.seed=1, mc.cores=1) {

# Binary vector indicating presence in the gene set
set <- names(values) %in% geneset
Expand Down Expand Up @@ -68,14 +69,16 @@ gsea <- function(values, geneset, power=1, rank=FALSE, weight=rep(1,length(value
# Though here, we randomly permute set labels as opposed to recomputing full phenotype recalculation
if (mc.cores > 1) {
rvll <- parallel::mclapply(1:mc.cores, function(i) {
gseaRandCore(set, eso, nsamples = ceiling(n.rand / mc.cores), seed = random.seed + i)
set.seed(random.seed + i)
gseaRandCore(set, eso, nsamples = ceiling(n.rand / mc.cores))
}, mc.preschedule=TRUE, mc.cores = mc.cores)
rvl <- list(
p = unlist(lapply(rvll, function(x) x$p)),
n = unlist(lapply(rvll, function(x) x$n))
)
} else {
rvl <- gseaRandCore(set, eso, nsamples = n.rand, seed = random.seed)
set.seed(random.seed)
rvl <- gseaRandCore(set, eso, nsamples = n.rand)
}
# From original paper: "Estimate nominal P value for S from ESNULL by using the
# positive or negative portion of the distribution corresponding to
Expand Down Expand Up @@ -154,7 +157,7 @@ gsea <- function(values, geneset, power=1, rank=FALSE, weight=rep(1,length(value
#' @param n.rand number of random permutations used to assess significance (default: 1e4)
#' @param return.details whether to return extended details (default: FALSE)
#' @param quantile.threshold threshold used (default: min(100/n.rand,0.1))
#' @param mc.cores number of cores for parallel processing (default: 2)
#' @param mc.cores number of cores for parallel processing (default: 1)
#' @param skip.qval.estimation whether to skip q-value estimation for multiple testing (default: FALSE)
#'
#' @examples
Expand All @@ -165,11 +168,12 @@ gsea <- function(values, geneset, power=1, rank=FALSE, weight=rep(1,length(value
#' names(vals) <- universe
#' vals[gs] <- rnorm(length(gs), 100, 10)
#' gs.list <- org.Hs.GO2Symbol.list # get gene sets
#' bulk.gsea(values = vals, set.list = gs.list[1:5], mc.cores = 1)
#' # reduce n.rand for speed
#' bulk.gsea(values = vals, set.list = gs.list[1:3], mc.cores = 1, n.rand=100)
#'
#' @export
#'
bulk.gsea <- function(values, set.list, power=1, rank=FALSE, weight=rep(1,length(values)), n.rand=1e4, mc.cores=2, quantile.threshold=min(100/n.rand,0.1), return.details=FALSE, skip.qval.estimation=FALSE) {
bulk.gsea <- function(values, set.list, power=1, rank=FALSE, weight=rep(1,length(values)), n.rand=1e4, mc.cores=1, quantile.threshold=min(100/n.rand,0.1), return.details=FALSE, skip.qval.estimation=FALSE) {

# Determine set matrix
setm <- do.call(rbind, parallel::mclapply(set.list, function(set) names(values) %in% set, mc.cores=mc.cores))
Expand Down Expand Up @@ -208,13 +212,15 @@ bulk.gsea <- function(values, set.list, power=1, rank=FALSE, weight=rep(1,length
# Randomization
if(mc.cores>1) {
rvlp <- parallel::mclapply(1:mc.cores,function(i) {
gseaBulkCore(setm,eso,ceiling(n.rand/mc.cores),i)
set.seed(i)
gseaBulkCore(setm,eso,ceiling(n.rand/mc.cores))
}, mc.preschedule=TRUE, mc.cores=mc.cores)
rvl <- list(p=do.call(cbind,lapply(rvlp,function(x) x$p)),
n=do.call(cbind,lapply(rvlp,function(x) x$n)));
rm(rvlp); gc();
} else {
rvl <- gseaBulkCore(setm,eso,n.rand,1)
set.seed(1)
rvl <- gseaBulkCore(setm,eso,n.rand)
}

# Raw p-values
Expand Down Expand Up @@ -286,7 +292,8 @@ bulk.gsea <- function(values, set.list, power=1, rank=FALSE, weight=rep(1,length
#' names(vals) <- universe
#' vals[gs] <- rnorm(length(gs), 100, 10)
#' gs.list <- org.Hs.GO2Symbol.list # get gene sets
#' iterative.bulk.gsea(values = vals, set.list = gs.list[1:5], mc.cores = 1)
#' # reduce n.rand for speed
#' iterative.bulk.gsea(values = vals, set.list = gs.list[1:3], mc.cores = 1, n.rand=100)
#'
#' @export
#'
Expand Down
10 changes: 10 additions & 0 deletions R/liger-package.R
@@ -0,0 +1,10 @@
#' liger
#'
#' This package contains permutation-based gene set enrichment functionalities in R
#'
#' @name liger
#' @docType package
#'
#' @useDynLib liger
#' @importFrom Rcpp evalCpp
NULL
15 changes: 15 additions & 0 deletions cran_build_instructions.md
@@ -0,0 +1,15 @@
1. Build a source package:
```
R CMD build .
```

2. Look in the package to make sure it doesn't contain spurious files. Modify .Rbuildignore as needed.
```
tar tvf liger_*.tar.gz
```

3. Check the package:
```
R CMD check --as-cran liger_*.tar.gz
```

7 changes: 4 additions & 3 deletions man/bulk.gsea.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 4 additions & 3 deletions man/gsea.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion man/iterative.bulk.gsea.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 10 additions & 0 deletions man/liger.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

18 changes: 8 additions & 10 deletions src/RcppExports.cpp
Expand Up @@ -7,37 +7,35 @@
using namespace Rcpp;

// gseaRandCore
Rcpp::List gseaRandCore(arma::vec sset, arma::vec eso, int nsamples, int seed);
RcppExport SEXP _liger_gseaRandCore(SEXP ssetSEXP, SEXP esoSEXP, SEXP nsamplesSEXP, SEXP seedSEXP) {
Rcpp::List gseaRandCore(arma::vec sset, arma::vec eso, int nsamples);
RcppExport SEXP _liger_gseaRandCore(SEXP ssetSEXP, SEXP esoSEXP, SEXP nsamplesSEXP) {
BEGIN_RCPP
Rcpp::RObject rcpp_result_gen;
Rcpp::RNGScope rcpp_rngScope_gen;
Rcpp::traits::input_parameter< arma::vec >::type sset(ssetSEXP);
Rcpp::traits::input_parameter< arma::vec >::type eso(esoSEXP);
Rcpp::traits::input_parameter< int >::type nsamples(nsamplesSEXP);
Rcpp::traits::input_parameter< int >::type seed(seedSEXP);
rcpp_result_gen = Rcpp::wrap(gseaRandCore(sset, eso, nsamples, seed));
rcpp_result_gen = Rcpp::wrap(gseaRandCore(sset, eso, nsamples));
return rcpp_result_gen;
END_RCPP
}
// gseaBulkCore
Rcpp::List gseaBulkCore(arma::mat setm, arma::vec eso, int nsamples, int seed);
RcppExport SEXP _liger_gseaBulkCore(SEXP setmSEXP, SEXP esoSEXP, SEXP nsamplesSEXP, SEXP seedSEXP) {
Rcpp::List gseaBulkCore(arma::mat setm, arma::vec eso, int nsamples);
RcppExport SEXP _liger_gseaBulkCore(SEXP setmSEXP, SEXP esoSEXP, SEXP nsamplesSEXP) {
BEGIN_RCPP
Rcpp::RObject rcpp_result_gen;
Rcpp::RNGScope rcpp_rngScope_gen;
Rcpp::traits::input_parameter< arma::mat >::type setm(setmSEXP);
Rcpp::traits::input_parameter< arma::vec >::type eso(esoSEXP);
Rcpp::traits::input_parameter< int >::type nsamples(nsamplesSEXP);
Rcpp::traits::input_parameter< int >::type seed(seedSEXP);
rcpp_result_gen = Rcpp::wrap(gseaBulkCore(setm, eso, nsamples, seed));
rcpp_result_gen = Rcpp::wrap(gseaBulkCore(setm, eso, nsamples));
return rcpp_result_gen;
END_RCPP
}

static const R_CallMethodDef CallEntries[] = {
{"_liger_gseaRandCore", (DL_FUNC) &_liger_gseaRandCore, 4},
{"_liger_gseaBulkCore", (DL_FUNC) &_liger_gseaBulkCore, 4},
{"_liger_gseaRandCore", (DL_FUNC) &_liger_gseaRandCore, 3},
{"_liger_gseaBulkCore", (DL_FUNC) &_liger_gseaBulkCore, 3},
{NULL, NULL, 0}
};

Expand Down
10 changes: 3 additions & 7 deletions src/functions.cpp
Expand Up @@ -3,14 +3,12 @@ using namespace Rcpp;

// Rcpp implementation of the GSEA resampling procedure
// [[Rcpp::export]]
Rcpp::List gseaRandCore(arma::vec sset, arma::vec eso, int nsamples, int seed) {
Rcpp::List gseaRandCore(arma::vec sset, arma::vec eso, int nsamples) {

std::vector<double> pscores(nsamples);
std::vector<double> nscores(nsamples);
int nelem=sset.size();

srand(seed);

for(int i=0;i<nsamples;i++) {
// shuffle the set
std::random_shuffle(sset.begin(),sset.end());
Expand Down Expand Up @@ -46,10 +44,8 @@ Rcpp::List gseaRandCore(arma::vec sset, arma::vec eso, int nsamples, int seed) {

// Gsea randomization core method for multiple genesets
// [[Rcpp::export]]
Rcpp::List gseaBulkCore(arma::mat setm, arma::vec eso, int nsamples, int seed) {
Rcpp::List gseaBulkCore(arma::mat setm, arma::vec eso, int nsamples) {

srand(seed);

int nelem=setm.n_cols;
int nsets=setm.n_rows;

Expand Down Expand Up @@ -106,4 +102,4 @@ Rcpp::List gseaBulkCore(arma::mat setm, arma::vec eso, int nsamples, int seed) {
return Rcpp::List::create(Rcpp::Named( "p" ) = wrap(pscores),
Rcpp::Named( "n" ) = wrap(nscores));
}


10 changes: 5 additions & 5 deletions vignettes/gsea.Rmd
Expand Up @@ -42,7 +42,7 @@ To test for enrichment of a particular gene set:
```{r, echo=TRUE}
names(org.Hs.GO2Symbol.list)[[1]]
gs # look at gs
gsea(values=vals, geneset=gs, mc.cores=1, plot=TRUE)
gsea(values=vals, geneset=gs, mc.cores=1, plot=TRUE, n.rand=500)
```

In this simulation, we created `vals` such that `gs` was obviously enriched. And indeed, we see that this gene set exhibits significant enrichment.
Expand All @@ -53,7 +53,7 @@ Now to test for enrichment of another gene set:
gs.new <- org.Hs.GO2Symbol.list[[2]]
names(org.Hs.GO2Symbol.list)[[2]]
head(gs.new) # look at gs.new
gsea(values=vals, geneset=gs.new)
gsea(values=vals, geneset=gs.new, mc.cores=1, n.rand=500)
```

In this simulation, we created `vals` such that `gs.new` was obviously not enriched. And indeed, we see that this gene set does not exhibit significant enrichment.
Expand All @@ -64,7 +64,7 @@ If we simulate a more ambiguous case:
vals[sample(1:length(universe), 1000)] <- rnorm(1000, 100, 10)
# test previously perfectly enriched gene set again
gs <- org.Hs.GO2Symbol.list[[1]]
gsea(values=vals, geneset=gs)
gsea(values=vals, geneset=gs, mc.cores=1, n.rand=500)
```
The enrichment plots and p-values are affected as expected.

Expand All @@ -73,12 +73,12 @@ The enrichment plots and p-values are affected as expected.
We can also test a number of gene sets:

```{r, echo=TRUE}
bulk.gsea(values=vals, set.list=org.Hs.GO2Symbol.list[1:10])
bulk.gsea(values=vals, set.list=org.Hs.GO2Symbol.list[1:5], mc.cores=1, n.rand=500)
```

To save on computation time, we can also iterative assess significance:
```{r, echo=TRUE}
iterative.bulk.gsea(values=vals, set.list=org.Hs.GO2Symbol.list[1:10])
iterative.bulk.gsea(values=vals, set.list=org.Hs.GO2Symbol.list[1:5], mc.cores=1, n.rand=500)
```

## R Session Info
Expand Down

0 comments on commit 80c6d5a

Please sign in to comment.