In [1]:
#Load library
library(tximport)
library(tidyverse)
library(biomaRt)
library(DESeq2)

── [1mAttaching core tidyverse packages[22m ──────────────────────── tidyverse 2.0.0 ──
[32m✔[39m [34mdplyr    [39m 1.1.4     [32m✔[39m [34mreadr    [39m 2.1.5
[32m✔[39m [34mforcats  [39m 1.0.0     [32m✔[39m [34mstringr  [39m 1.5.1
[32m✔[39m [34mggplot2  [39m 3.5.1     [32m✔[39m [34mtibble   [39m 3.2.1
[32m✔[39m [34mlubridate[39m 1.9.4     [32m✔[39m [34mtidyr    [39m 1.3.1
[32m✔[39m [34mpurrr    [39m 1.0.2     
── [1mConflicts[22m ────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()
[36mℹ[39m Use the conflicted package ([3m[34m<http://conflicted.r-lib.org/>[39m[23m) to force all conflicts to become errors
Loading required package: S4Vectors

Loading required package: stats4

Loading required package: BiocGenerics


Attaching package: ‘BiocGenerics’


The following objects

In [2]:
#### Step1_Tximport RHAM1 ####
### Tximport ###
# load study design file to load Kallisto
targets <- read_tsv("Sequencing_data/kallisto/studydesign.txt")
path <- file.path("Sequencing_data/kallisto", targets$sample, "abundance.tsv") 
all(file.exists(path))

[1mRows: [22m[34m9[39m [1mColumns: [22m[34m3[39m
[36m──[39m [1mColumn specification[22m [36m────────────────────────────────────────────────────────[39m
[1mDelimiter:[22m "\t"
[31mchr[39m (3): sample, sra_accession, group

[36mℹ[39m Use `spec()` to retrieve the full column specification for this data.
[36mℹ[39m Specify the column types or set `show_col_types = FALSE` to quiet this message.


In [3]:
# load reference from ensembl
listMarts(host="plants.ensembl.org")
myMart <- useMart(biomart="plants_mart", host="plants.ensembl.org")
available.datasets <- listDatasets(myMart)
At.anno <- useMart(biomart="plants_mart", host="plants.ensembl.org",
                   dataset = "athaliana_eg_gene")
At.attributes <- listAttributes(At.anno)
Tx.At <- getBM(attributes=c('ensembl_gene_id',
                            'external_gene_name'),
               mart = At.anno)

# make reference tx2gene table
Tx.At <- as_tibble(Tx.At)
Tx.At <- dplyr::rename(Tx.At, target_id = ensembl_gene_id, 
                       gene_name = external_gene_name)
Tx2Gene.AT <- Tx.At
Tx2Gene.AT$gene_name <- Tx.At$target_id 

# tximport from RG-I kallisto results
txi.kallisto.tsv <- tximport(path, 
                             type = "kallisto", 
                             tx2gene = Tx2Gene.AT, 
                             txOut = FALSE, #Raw name TRUE: target_id, FALSE: gene_id
                             countsFromAbundance = "no", 
                             #if your RNAs are not from 3'end sequencing method, use "lengthScaledTPM"
                             #countsFromAbundance = "lengthScaledTPM",
                             ignoreTxVersion = TRUE)



“Ensembl will soon enforce the use of https.
Ensure the 'host' argument includes "https://"”


biomart,version
<chr>,<chr>
plants_mart,Ensembl Plants Genes 60
plants_variations,Ensembl Plants Variations 60


“Ensembl will soon enforce the use of https.
Ensure the 'host' argument includes "https://"”
“Ensembl will soon enforce the use of https.
Ensure the 'host' argument includes "https://"”
Note: importing `abundance.h5` is typically faster than `abundance.tsv`

reading in files with read_tsv

1 
2 
3 
4 
5 
6 
7 
8 
9 


summarizing abundance

summarizing counts

summarizing length



In [4]:
#### Step2_DESeq RHAM1 ####
# Set the experiment design
samples.rh <- targets
samples.rh$condition <- factor(rep(c("A", "B", "C"),each=3))
rownames(samples.rh) <- samples.rh$sample

# DEseq using Tximport with RG-I tsv file
ddsrh <- DESeqDataSetFromTximport(txi.kallisto.tsv,
                                  colData = samples.rh,
                                  design = ~ condition)
ddsrh.dds <- DESeq(ddsrh)
colnames(ddsrh.dds) <- samples.rh$sample


“Setting row names on a tibble is deprecated.”
using counts and average transcript lengths from tximport

estimating size factors

using 'avgTxLength' from assays(dds), correcting for library size

estimating dispersions

gene-wise dispersion estimates

mean-dispersion relationship

final dispersion estimates

fitting model and testing



In [5]:
# Get results comparing with RG-I 0min treatment
res.30min.rh <- results(ddsrh.dds, contrast=c('condition', 'B', 'A'))
res.90min.rh <- results(ddsrh.dds, contrast=c('condition', 'C', 'A'))
res.30min.rh.df <- as.data.frame(res.30min.rh)
res.90min.rh.df <- as.data.frame(res.90min.rh)
res.30min.rh.full <- merge(res.30min.rh.df, as.data.frame(counts(ddsrh.dds, normalized=TRUE)), by="row.names", sort=FALSE)
res.90min.rh.full <- merge(res.90min.rh.df, as.data.frame(counts(ddsrh.dds, normalized=TRUE)), by="row.names", sort=FALSE)
names(res.30min.rh.full)[1] <- "Gene"
names(res.90min.rh.full)[1] <- "Gene"

# Generate RG-I full DESeq results file and export
res.30min.rh.full <- res.30min.rh.full %>%
  dplyr::arrange(Gene)
res.90min.rh.full <- res.90min.rh.full %>%
  dplyr::arrange(Gene)

write_csv(res.30min.rh.full, "01.DESeq_results/rh_30min_gene.csv")
print("01.DESeq_results/rh_30min_gene.csv")
write_csv(res.90min.rh.full, "01.DESeq_results/rh_90min_gene.csv")
print("01.DESeq_results/rh_90min_gene.csv")

[1] "01.DESeq_results/rh_30min_gene.csv"
[1] "01.DESeq_results/rh_90min_gene.csv"
