# VSOX
QC

## 0. Setting up workenvironment<a id="0"></a>

In [None]:
suppressPackageStartupMessages({
    library(DropletUtils)
    library(SingleCellExperiment)
    library(scuttle)
    library(Seurat)
    library(SeuratWrappers)
    library(stringr)
    library(dplyr)
    library(data.table)
    library(Matrix)
    library(patchwork)
    library(ggplot2)
})

options(repr.plot.width = 16, repr.plot.height = 8)

## KO

## 1. Importing data<a id="1"></a>


In [None]:
VSOX.data <- Read10X(data.dir = "./KO/P13/")
VSOX <- CreateSeuratObject(counts = VSOX.data)
rm(VSOX.data)

## 2. Preprocessing: quality control with scater<a id="4"></a>

### 2.1. RD3_1

#### 2.1.1. Converting a Seurat object to SingleCellExperiment 

In [None]:
VSOX <- as.SingleCellExperiment(VSOX)

#### 2.1.2. Calculating QC metrics

In [None]:
is.mito <- grepl("^mt-", rownames(VSOX))
mito.list <-  grep("^mt-", rownames(VSOX), value = TRUE)

In [None]:
is.spike <- grepl("^gSpikein", rownames(VSOX))
spike.list <- grep("^gSpikein", rownames(VSOX), value = TRUE)

In [None]:
celldata <- perCellQCMetrics(VSOX, subsets = list(Mt = is.mito, ERCC = is.spike))

In [None]:
VSOX <- addPerCellQC(VSOX, subsets=list(Mito = is.mito, ERCC = is.spike))
colnames(colData(VSOX))

Expression of all ERCC spikeins is 0 because they were not used in this study.

##### **2.1.2.1. QC of the cells by feature number and library size**


In [None]:
libsize.drop <- isOutlier(VSOX$sum, nmads = 3, type = "both", log = TRUE)
feature.drop <- isOutlier(VSOX$detected, nmads = 3, type = "both", log = TRUE)

# libsize.drop cutoff values
min.libsize <- (median(VSOX$sum) - 3*mad(VSOX$sum))/1e3
max.libsize <- (median(VSOX$sum) + 3*mad(VSOX$sum))/1e3

# feature.drop cutoff values
min.features <- (median(VSOX$detected) - 3*mad(VSOX$detected))
max.features <- (median(VSOX$detected) + 3*mad(VSOX$detected))

par(mfrow = c(1,2))

hist(VSOX$sum/1e3, xlab = "Library sizes (thousands)", main = "", breaks = 20,
     col = "grey", ylab = "Number of cells")

abline(v = min.libsize, col = "blue", lwd = 2, lty = 2)
abline(v = max.libsize, col = "blue", lwd = 2, lty = 2)

hist(VSOX$detected, xlab = "Number of expressed genes", main = "", breaks = 20,
     col = "grey", ylab = "Number of cells")

abline(v = min.features, col = "blue", lwd = 2, lty = 2)
abline(v = max.features, col = "blue", lwd = 2, lty = 2)

##### **2.1.2.2. Proportion of mitochondrial reads**


In [None]:
mito.drop <- isOutlier(VSOX$subsets_Mito_percent, nmads = 3)

max.mito <- median(VSOX$subsets_Mito_percent) + 3*mad(VSOX$subsets_Mito_percent)

hist(VSOX$subsets_Mito_percent, xlab = "Mitochondrial proportion (%)",
     ylab = "Number of cells",
     breaks = 40, main = "", col = "grey")

abline(v = max.mito, col = "blue", lwd = 2, lty = 2)

Subset by column to retain only high-quality cells that pass all filters.

In [None]:
VSOX_QC <- VSOX[,!(libsize.drop | feature.drop | mito.drop)]
data.frame(ByLibrarySize = sum(libsize.drop), ByFeature = sum(feature.drop), ByMito = sum(mito.drop),
           Remaining = ncol(VSOX_QC))

#### 2.1.2. Filtering out low-abundance genes


In [None]:
average.counts <- calculateAverage(VSOX_QC, exprs_values = "counts",
                                   subset_row = NULL)
genes.to.keep <- average.counts >= 0.001
sum(genes.to.keep)

In [None]:
hist(log10(average.counts), breaks = 100, main = "", col = "grey",
     xlab = expression(Log[10]~"average count"), ylab = "Gene counts")
abline(v = log10(0.001), col = "blue", lwd = 2, lty = 2)

rm(average.counts)

Apply the mean-based filter to the data by subsetting the `SingleCellExperiment` object.

In [None]:
VSOX <- VSOX_QC[genes.to.keep,]

rm(VSOX_QC)

dim(VSOX)

#### 2.1.3. Duplicate removal

In [None]:
VSOX <- VSOX[!duplicated(rownames(VSOX)),]
dim(VSOX)

In [None]:
VSOX <- as.Seurat(VSOX)

In [None]:
VSOX

In [None]:
saveRDS(VSOX, file = "VSOX_KOp13_QC.Rds")

## WT

In [None]:
VSOX.data <- Read10X(data.dir = "./WT/P13/")
VSOX <- CreateSeuratObject(counts = VSOX.data)
rm(VSOX.data)

## 2. Preprocessing: quality control with scater<a id="4"></a>

### 2.1. RD3_1

#### 2.1.1. Converting a Seurat object to SingleCellExperiment 

In [None]:
VSOX <- as.SingleCellExperiment(VSOX)

#### 2.1.2. Calculating QC metrics

In [None]:
is.mito <- grepl("^mt-", rownames(VSOX))
mito.list <-  grep("^mt-", rownames(VSOX), value = TRUE)

In [None]:
is.spike <- grepl("^gSpikein", rownames(VSOX))
spike.list <- grep("^gSpikein", rownames(VSOX), value = TRUE)

In [None]:
celldata <- perCellQCMetrics(VSOX, subsets = list(Mt = is.mito, ERCC = is.spike))

In [None]:
VSOX <- addPerCellQC(VSOX, subsets=list(Mito = is.mito, ERCC = is.spike))
colnames(colData(VSOX))

Expression of all ERCC spikeins is 0 because they were not used in this study.

##### **2.1.2.1. QC of the cells by feature number and library size**


In [None]:
libsize.drop <- isOutlier(VSOX$sum, nmads = 3, type = "both", log = TRUE)
feature.drop <- isOutlier(VSOX$detected, nmads = 3, type = "both", log = TRUE)

# libsize.drop cutoff values
min.libsize <- (median(VSOX$sum) - 3*mad(VSOX$sum))/1e3
max.libsize <- (median(VSOX$sum) + 3*mad(VSOX$sum))/1e3

# feature.drop cutoff values
min.features <- (median(VSOX$detected) - 3*mad(VSOX$detected))
max.features <- (median(VSOX$detected) + 3*mad(VSOX$detected))

par(mfrow = c(1,2))

hist(VSOX$sum/1e3, xlab = "Library sizes (thousands)", main = "", breaks = 20,
     col = "grey", ylab = "Number of cells")

abline(v = min.libsize, col = "blue", lwd = 2, lty = 2)
abline(v = max.libsize, col = "blue", lwd = 2, lty = 2)

hist(VSOX$detected, xlab = "Number of expressed genes", main = "", breaks = 20,
     col = "grey", ylab = "Number of cells")

abline(v = min.features, col = "blue", lwd = 2, lty = 2)
abline(v = max.features, col = "blue", lwd = 2, lty = 2)

##### **2.1.2.2. Proportion of mitochondrial reads**


In [None]:
mito.drop <- isOutlier(VSOX$subsets_Mito_percent, nmads = 3)

max.mito <- median(VSOX$subsets_Mito_percent) + 3*mad(VSOX$subsets_Mito_percent)

hist(VSOX$subsets_Mito_percent, xlab = "Mitochondrial proportion (%)",
     ylab = "Number of cells",
     breaks = 40, main = "", col = "grey")

abline(v = max.mito, col = "blue", lwd = 2, lty = 2)

Subset by column to retain only high-quality cells that pass all filters.

In [None]:
VSOX_QC <- VSOX[,!(libsize.drop | feature.drop | mito.drop)]
data.frame(ByLibrarySize = sum(libsize.drop), ByFeature = sum(feature.drop), ByMito = sum(mito.drop),
           Remaining = ncol(VSOX_QC))

#### 2.1.2. Filtering out low-abundance genes


In [None]:
average.counts <- calculateAverage(VSOX_QC, exprs_values = "counts",
                                   subset_row = NULL)
genes.to.keep <- average.counts >= 0.001
sum(genes.to.keep)

In [None]:
hist(log10(average.counts), breaks = 100, main = "", col = "grey",
     xlab = expression(Log[10]~"average count"), ylab = "Gene counts")
abline(v = log10(0.001), col = "blue", lwd = 2, lty = 2)

rm(average.counts)

Apply the mean-based filter to the data by subsetting the `SingleCellExperiment` object.

In [None]:
VSOX <- VSOX_QC[genes.to.keep,]

rm(VSOX_QC)

dim(VSOX)

#### 2.1.3. Duplicate removal

In [None]:
VSOX <- VSOX[!duplicated(rownames(VSOX)),]
dim(VSOX)

In [None]:
VSOX <- as.Seurat(VSOX)

In [None]:
VSOX

In [None]:
saveRDS(VSOX, file = "VSOX_WTp13_QC.Rds")