In [None]:
# Load MARVEL package
library(MARVEL)

# Load adjunct packages for selected MARVEL features
  # General data processing, plotting
  library(ggnewscale)
  library(ggrepel)
  library(parallel)
  library(reshape2)
  library(stringr)
  library(textclean)

  # Dimension reduction analysis
  library(factoextra)
  library(FactoMineR)

  # Modality analysis
  library(fitdistrplus)

  # Differential splicing analysis
  library(kSamples)
  library(twosamples)

  # Gene ontology analysis
  library(AnnotationDbi)
  library(clusterProfiler)
  library(org.Hs.eg.db)
  library(org.Mm.eg.db)

  # Nonsense-mediated decay (NMD) analysis
  library(Biostrings)
  library(BSgenome)
  library(BSgenome.Hsapiens.NCBI.GRCh38)

# Load adjunct packages for this tutorial
library(data.table)
library(ggplot2)
library(gridExtra)

载入需要的程序包：ggplot2

Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa

载入需要的程序包：MASS

载入需要的程序包：survival

载入需要的程序包：SuppDists

载入需要的程序包：stats4

载入需要的程序包：BiocGenerics


载入程序包：‘BiocGenerics’




In [None]:
path <- "result/marvel/dataset1/"
file <- "SJ.txt"
sj <- as.data.frame(fread(paste(path, file, sep=""), sep="\t", header=TRUE, stringsAsFactors=FALSE, na.strings="NA"))

In [None]:
path <- "result/marvel/dataset1/"
file <- "SJ_phenoData.txt"
df.pheno <- read.table(paste(path, file, sep=""), sep="\t", header=TRUE, stringsAsFactors=FALSE, na.strings="NA")

In [None]:
path <- "genome/gencode.v31/gencode.v31/"
file <- "gencode.v31.annotation.gtf"
gtf <- as.data.frame(data.table::fread(paste(path, file, sep=""), sep="\t", header=FALSE, stringsAsFactors=FALSE, quote=""))

In [None]:
path <- "./rMATS_out/"
file <- "fromGTF.SE.txt"
df <- read.table(paste(path, file, sep=""), sep="\t", header=TRUE, stringsAsFactors=FALSE)
df.feature.se <- Preprocess_rMATS(file=df, GTF=gtf, EventType="SE")

path <- "./rMATS_out/"
file <- "fromGTF.MXE.txt"
df <- read.table(paste(path, file, sep=""), sep="\t", header=TRUE, stringsAsFactors=FALSE)
df.feature.mxe <- Preprocess_rMATS(file=df, GTF=gtf, EventType="MXE")

path <- "./rMATS_out/"
file <- "fromGTF.RI.txt"
df <- read.table(paste(path, file, sep=""), sep="\t", header=TRUE, stringsAsFactors=FALSE)
df.feature.ri <- Preprocess_rMATS(file=df, GTF=gtf, EventType="RI")

path <- "./rMATS_out/"
file <- "fromGTF.A5SS.txt"
df <- read.table(paste(path, file, sep=""), sep="\t", header=TRUE, stringsAsFactors=FALSE)
df.feature.a5ss <- Preprocess_rMATS(file=df, GTF=gtf, EventType="A5SS")

path <- "./rMATS_out/"
file <- "fromGTF.A3SS.txt"
df <- read.table(paste(path, file, sep=""), sep="\t", header=TRUE, stringsAsFactors=FALSE)
df.feature.a3ss <- Preprocess_rMATS(file=df, GTF=gtf, EventType="A3SS")

In [None]:
df.feature.list <- list(df.feature.se, df.feature.mxe, df.feature.ri, df.feature.a5ss, df.feature.a3ss)
names(df.feature.list) <- c("SE", "MXE", "RI", "A5SS", "A3SS")

In [None]:
path <- "./"
file <- "Counts_by_Region.txt"
df.intron.counts <- as.data.frame(fread(paste(path, file, sep=""), sep="\t", header=TRUE, stringsAsFactors=FALSE, na.strings="NA"))
df.intron.counts[1:5,1:5]

In [13]:
path <- "/disk1/humanData/131_GSE81252_253GB/StarSolo_mapping/test_StarSolo_mapping/Gene/"
file <- "tpm_matrix.tab"
df.tpm <- read.table(paste(path, file, sep=""), sep="\t", header=TRUE, stringsAsFactors=FALSE)

In [15]:
path <- "/disk1/humanData/131_GSE81252_253GB/StarSolo_mapping/test_StarSolo_mapping/Gene/"
file <- "TPM_featureData.txt"
df.tpm.feature <- read.table(paste(path, file, sep=""), sep="\t", header=TRUE, stringsAsFactors=FALSE)

In [17]:
path <- "genome/gencode.v31/"
file <- "gencode.v31.annotation.gtf"
gtf <- as.data.frame(fread(paste(path, file, sep=""), sep="\t", header=FALSE, stringsAsFactors=FALSE, na.strings="NA", quote="\""))

In [18]:
marvel <- CreateMarvelObject(SpliceJunction=sj,
                             SplicePheno=df.pheno,
                             SpliceFeature=df.feature.list,
                             IntronCounts=df.intron.counts,
                             GeneFeature=df.tpm.feature,
                             Exp=df.tpm,
                             GTF=gtf
                             )

In [17]:
# Detect AFE
marvel <- DetectEvents(MarvelObject=marvel,
                       min.cells=10,
                       min.expr=1,
                       track.progress=FALSE,
                       EventType="AFE"
                       )

Parsing GTF...

Analysing +ve strand...

Retrieving expressed genes...

12359 expressed genes identified

Retrieving final exon-exon junctions from 50572 multi-exon transcripts

Collapsing redundant coordinates/exons...

2080 AFE identified

Analysing -ve strand...

Retrieving gene metadata from GTF...

Retrieving expressed genes...

12099 expressed genes identified

Retrieving final exon-exon junctions from 49028 multi-exon transcripts

Collapsing redundant coordinates/exons...

2079 AFE identified



In [18]:
# Detect ALE
marvel <- DetectEvents(MarvelObject=marvel,
                       min.cells=50,
                       min.expr=1,
                       track.progress=FALSE,
                       EventType="ALE"
                       )

Parsing GTF...

Analysing +ve strand...

Retrieving gene metadata from GTF...

Retrieving expressed genes...

8250 expressed genes identified

Retrieving final exon-exon junctions from 44658 multi-exon transcripts

Collapsing redundant coordinates/exons...

813 ALE identified

Analysing -ve strand...

Retrieving expressed genes...

8050 expressed genes identified

Retrieving final exon-exon junctions from 43005 multi-exon transcripts

Collapsing redundant coordinates/exons...

826 ALE identified



In [19]:
marvel <- CheckAlignment(MarvelObject=marvel, level="SJ")

765 samples (cells) identified in SJ phenoData

765 samples (cells) identified in SJ count matrix

765 overlapping samples (cells) identified

sample IDs in sample metadata and SJ count matrix column names MATCHED

Additional checks for intron count matrix...

765 samples (cells) identified in SJ phenoData

770 samples (cells) identified in intron count matrix

0 overlapping samples (cells) identified

sample IDs in sample metadata and SJ count matrix and intron count matrix column names NOT MATCHED



In [19]:
marvel <- ComputePSI(MarvelObject=marvel,
                     CoverageThreshold=20,
                     UnevenCoverageMultiplier=10,
                     EventType="SE"
                     )

97655 splicing events found

25361 splicing events validated and quantified



In [1]:
# Validate, filter, compute RI splicing events      
marvel <- ComputePSI(MarvelObject=marvel,
                     CoverageThreshold=20,
                     EventType="RI",
                     thread=10
                     )

In [None]:
marvel <- ComputePSI(MarvelObject=marvel,
                     CoverageThreshold=20,
                     UnevenCoverageMultiplier=10,
                     EventType="MXE"
                     )
# Validate, filter, compute A5SS splicing events  
marvel <- ComputePSI(MarvelObject=marvel,
                     CoverageThreshold=20,
                     EventType="A5SS"
                     )

# Validate, filter, compute A3SS splicing events  
marvel <- ComputePSI(MarvelObject=marvel,
                     CoverageThreshold=20,
                     EventType="A3SS"
                     )

# Validate, filter, compute AFE splicing events     
marvel <- ComputePSI(MarvelObject=marvel,
                     CoverageThreshold=20,
                     EventType="AFE"
                     )
    
# Validate, filter, compute ALE splicing events      
marvel <- ComputePSI(MarvelObject=marvel,
                     CoverageThreshold=20,
                     EventType="ALE"
                     )

In [20]:
marvel <- CheckAlignment(MarvelObject=marvel, level="splicing")


765 samples (cells) identified in sample metadata

765 samples (cells) identified in sample metadata

765 samples (cells) identified in matrix(s) 

765 overlapping samples (cells) retained

Checking alignment...

sample IDs in sample metadata and matrix column names MATCHED for SE

Checking for SE...

25361 transcripts identified in transcript metadata

25361 transcripts identified in matrix

25361 overlapping transcripts retained

Checking alignment...

Transcript IDs in transcript metadata and matrix row names MATCHED for SE



In [21]:
df.pheno <- marvel$SplicePheno

In [2]:
marvel <- CountEvents(MarvelObject=marvel,
                      sample.ids=df.pheno$sample.id,
                      min.cells=10
                      )

In [37]:
write.table(t(marvel$PSI$SE), "../result/marvel/dataset1/mat.txt",sep='\t', row.names = TRUE,col.names=FALSE,quote = FALSE)

In [23]:
save(marvel, file="../result/marvel/dataset1/SE.RData")