yaGST

This is a collection of wrappers to the Wilcoxon test to run competitive gene set tests and comparison between imbalanced datasets.

Installation

library(devtools)
install_github("miccec/yaGST")

Running a gene-set enrichment analysis

library(yaGST)
data("rankedList")
# generate a random data set of dimension 100
geneSet <- sample(head(names(rankedList), 5000), 100)
ans <- mwwGST(rankedList, geneSet, moreDetails = TRUE)
ans
plot(ans)

# generate a second gene set
geneSet <- sample(tail(names(rankedList), 5000), 100)
ans <- mwwGST(rankedList, geneSet, moreDetails = TRUE)
plot(ans)

How to run an enrichment analysis on a collection of gene-sets.

You may need to download the collection of gene-sets, for example from the Molecular Signatures Database at the Broad Institute

h.all.v6.0.symbols.gmt

In case the output of the analysis has to be saved in an .xls file, you need to install the WriteXLS package.

load the GO collections

library(yaGST)
GO <- gmt2GO("~/pathToYourFiles/h.all.v6.0.symbols.gmt")
length(GO)

load the ranked list

data("rankedList")
head(rankedList)
tail(rankedList)

run the mwwGST

ans <- lapply(GO, function(x) mwwGST(rankedList, x, minLenGeneSet = 15, verbose = FALSE))

Now, arrange the results in a table

actualGeneSetSize <- unlist(sapply(ans, function(x) x$actualGeneSetCount))
originalGeneSetSize <- unlist(sapply(ans, function(x) x$originalGeneSetCount))
pValue <- unlist(sapply(ans, function(x) x$p.value))
qValue <- p.adjust(pValue)
nes <- unlist(sapply(ans, function(x) x$nes))
pu <- unlist(sapply(ans, function(x) x$pu))
log2.pu <- unlist(sapply(ans, function(x) x$log.pu))
result_table <- cbind(originalGeneSetSize, actualGeneSetSize,  
  round(nes, 4), round(pu, 4), round(log2.pu, 4), round(pValue, 6), round(qValue, 6))
colnames(result_table) <- c("originalGeneSetSize", "actualGeneSetSize", "NES", "pu", "log2_pu", "pValue", "qValue")
result_table <- result_table[order(result_table[, "NES"], decreasing = TRUE),]

If needed, the table can be stored in an .xls file as follows

library(WriteXLS)
gst_results <- as.data.frame(result_table)
WriteXLS("gst_results", ExcelFileName = "~/pathToYourFiles/aNameForTheAnalysis.xls", row.names = TRUE)

filter and plot the results

qValue_treshold <- 0.05
NES_treshold <- 0.6
selected <- which((result_table[, "NES"] > NES_treshold) & (result_table[, "qValue"] < qValue_treshold))
result_table[selected,]
first_geneSet <- names(selected)[1]
plot(ans[[first_geneSet]], rankedList = rankedList, main = first_geneSet)

Running an extended enrichment analysis on a regulon (with positive and negative targets)

data("rankedList")
positive_gs <- sample(head(names(rankedList), 10000), 200)
negative_gs <- sample(tail(names(rankedList), 10000), 200)
ans <- mwwExtGST(rankedList, positive_gs, negative_gs, moreDetails = TRUE)
ans
plot(ans)

Running ee-MWW

nr <- 100; nc <- 1000
# generate a data-matrix with nr samples, and nc features
exprData <- matrix(rpois(nc * nr, 100), nrow = nr, ncol = nc)
colnames(exprData) <- paste0("feat", 1:nc)
rownames(exprData) <- paste0("sam", 1:nr)

# increase the first 3 samples (minority set) of 10\% of the original intensity
# of the first 30 features (later the gene-set)
exprData[1, 1:30] <- exprData[1, 1:30]* runif(30, min = 1, max = 1.10)
exprData[2, 1:30] <- exprData[1, 1:30]* runif(30, min = 1, max = 1.10)
exprData[3, 1:30] <- exprData[1, 1:30]* runif(30, min = 1, max = 1.10)
samples_of_interest <- rownames(exprData)[1:3] # minority set

# running in parallel
library(doParallel)
# adjust the number of CPUs as needed
cl <- makePSOCKcluster(3)
clusterApply(cl, floor(runif(length(cl), max = 10000000)), set.seed)
registerDoParallel(cl)
ans_eeMWW <- eeMWW(exprData, samples_of_interest)
stopCluster(cl)

# set the gene-set and run the enrichment analysis
geneSet <- colnames(exprData)[1:30]
(tmp <- mwwGST(ans_eeMWW, geneSet))
plot(tmp, rankedList = ans_eeMWW)

Name		Name	Last commit message	Last commit date
Latest commit History 24 Commits
R		R
data		data
man		man
DESCRIPTION		DESCRIPTION
NAMESPACE		NAMESPACE
README.md		README.md

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

R

R

data

data

man

man

DESCRIPTION

DESCRIPTION

NAMESPACE

NAMESPACE

README.md

README.md

Repository files navigation

yaGST

Installation

Running a gene-set enrichment analysis

How to run an enrichment analysis on a collection of gene-sets.

Running an extended enrichment analysis on a regulon (with positive and negative targets)

Running ee-MWW

About

Releases

Packages

Languages

miccec/yaGST

Folders and files

Latest commit

History

Repository files navigation

yaGST

Installation

Running a gene-set enrichment analysis

How to run an enrichment analysis on a collection of gene-sets.

Running an extended enrichment analysis on a regulon (with positive and negative targets)

Running ee-MWW

About

Resources

Stars

Watchers

Forks

Languages