In [None]:
library("Matrix")
library(rlang)
library(htmltools)
library(fastmap)
library(pillar)
library(celldex);
library(reshape);
library(cowplot);
library(biomaRt);
library(vctrs, lib = "/storage/brno2/home/vercanie/R/x86_64-pc-linux-gnu-library/4.0")
library(Seurat, lib = "/storage/brno2/home/vercanie/R/x86_64-pc-linux-gnu-library/4.0");
library(ggplot2);
library(tidyverse, lib = "/storage/brno2/home/vercanie/R/x86_64-pc-linux-gnu-library/4.0");
library("org.Mm.eg.db", lib = "/storage/brno2/home/vercanie/R/x86_64-pc-linux-gnu-library/4.0");
library(readxl, lib = "/storage/brno2/home/vercanie/R/x86_64-pc-linux-gnu-library/4.0");
library(STACAS, lib = "/storage/brno2/home/vercanie/R/x86_64-pc-linux-gnu-library/4.0");
library(Azimuth, lib = "/storage/brno2/home/vercanie/R/x86_64-pc-linux-gnu-library/4.0");
library(SingleR, lib = "/storage/brno2/home/vercanie/R/x86_64-pc-linux-gnu-library/4.0");
library(scCustomize, lib = "/storage/brno2/home/vercanie/R/x86_64-pc-linux-gnu-library/4.0");
load("/storage/brno2/home/vercanie/CD4_CTL/ref_wherry_new.RData");
mid.se <- celldex::MonacoImmuneData();

### Preprocess data

In [None]:
hpap_files  <- list.files("hpapdata/temp_data/", full.names = T)

In [None]:
hpap_files

In [None]:
full_dataset  <- map(.x = hpap_files,  .f = readRDS)

In [None]:
full_dataset <- Merge_Seurat_List(full_dataset)

In [None]:
full_dataset

In [None]:
saveRDS(full_dataset, "hpap_5p_full.rds")

In [None]:
full_dataset <- readRDS("hpap_datasets/hpap_5p_full.rds")

In [None]:
DimPlot(full_dataset)

In [None]:
plan("multisession", workers = 2)

In [None]:
plan("sequential")

In [None]:
options(future.globals.maxSize = 10000 * 1024^2)
full_dataset <- NormalizeData(full_dataset)
full_dataset <- ScaleData(full_dataset)

full_dataset <- FindVariableFeatures(full_dataset, 
                                     selection.method = "vst", nfeatures =2000)
full_dataset <- RunPCA(full_dataset)
full_dataset <- RunUMAP(full_dataset, dims = 1:15)
full_dataset <- FindNeighbors(full_dataset)
full_dataset <- FindClusters(full_dataset, resolution = 0.2)

In [None]:
DimPlot(full_dataset, raster = T, label = T)

In [None]:
saveRDS(full_dataset, "hpap_datasets/hpap_5p_full.rds")

In [None]:
FeaturePlot(full_dataset, features = "CD3D", raster = F)

In [None]:
FeaturePlot(full_dataset, features = "AIRE", raster = F)

In [None]:
FeaturePlot(full_dataset, features = "CD14", raster = F)

In [None]:
cl8  <- subset(full_dataset, seurat_clusters == 8)

In [None]:
DimPlot(cl8)

In [None]:
cl8 <- NormalizeData(cl8)
cl8 <- ScaleData(cl8)

cl8 <- FindVariableFeatures(cl8, 
                                     selection.method = "vst", nfeatures =2000)
cl8 <- RunPCA(cl8)
cl8 <- RunUMAP(cl8, dims = 1:15)
cl8 <- FindNeighbors(cl8)
cl8 <- FindClusters(cl8, resolution = 0.2)

In [None]:
DimPlot(cl8)

In [None]:
saveRDS(cl8, "hpap_datasets/hpap_5p_cluster8.rds")

In [None]:
FeaturePlot(cl8, features = "CCR7", raster = F)

In [None]:
FeaturePlot(full_dataset, features = "CD19", raster = F)

In [None]:
FeaturePlot(full_dataset, features = "MKI67", raster = F)

In [None]:
FeaturePlot(full_dataset, features = "NCR1", raster = F)

In [None]:
FeaturePlot(full_dataset, features = "TRGV2", raster = F)

## T cells


In [None]:
t_nk  <- subset(full_dataset, seurat_clusters %in% c(3:9))

In [None]:
DimPlot(t_nk, raster = T, label = T)

In [None]:
t_nk <- NormalizeData(t_nk)
t_nk <- ScaleData(t_nk)

t_nk <- FindVariableFeatures(t_nk, 
                                     selection.method = "vst", nfeatures =2000)
t_nk <- RunPCA(t_nk)
t_nk <- RunUMAP(t_nk, dims = 1:15)
t_nk <- FindNeighbors(t_nk)
t_nk <- FindClusters(t_nk, resolution = 0.2)

In [None]:
DimPlot(t_nk, raster = T, label = T)

In [None]:
FeaturePlot(t_nk, features = "CD3D", raster = F)

In [None]:
FeaturePlot(t_nk, features = "CD8A", raster = F)

In [None]:
FeaturePlot(t_nk, features = "CD4", raster = F)

In [None]:
FeaturePlot(t_nk, features = "MKI67", raster = F)

In [None]:
FeaturePlot(t_nk, features = "NCR1", raster = F)

In [None]:
FeaturePlot(t_nk, features = "CCL5", raster = F)

In [None]:
FeaturePlot(t_nk, features = "CCR7", raster = F)

In [None]:
DefaultAssay(t_nk)  <- "ADT"

In [None]:
rownames(t_nk)

In [None]:
grep(pattern = "Hu.CD8", rownames(t_nk), value = TRUE)

In [None]:
FeaturePlot(t_nk, features = "Hu.CD4-RPA.T4", raster = F, max.cutoff = 50)

In [None]:
FeaturePlot(t_nk, features = "Hu.CD8", raster = F, max.cutoff = 50)

In [None]:
saveRDS(t_nk, "hpap_5p_t_nk.rds")

In [None]:
## Filter

In [None]:
DimPlot(t_nk, raster = T, label = T)

In [None]:
options(repr.plot.width = 14, repr.plot.height = 5)

VlnPlot(t_nk, features = c("percent.mt", "percent.rp", "nCount_RNA", "nFeature_RNA"), ncol = 4, raster = F, 
        pt.size = 0)

In [None]:
DefaultAssay(t_nk)  <- "RNA"

In [None]:
options(repr.plot.width = 6, repr.plot.height = 5)
FeaturePlot(t_nk, features = "MS4A1", raster = F)

In [None]:
options(repr.plot.width = 6, repr.plot.height = 5)
FeaturePlot(t_nk, features = "CD14", raster = F)

## Remove contaminating and low quality

In [None]:
t_nk_filt  <- subset(t_nk, seurat_clusters %in% c(0:3,5))

In [None]:
DimPlot(t_nk_filt, raster = T, label = T)

In [None]:
t_nk_filt <- NormalizeData(t_nk_filt)
t_nk_filt <- ScaleData(t_nk_filt)

t_nk_filt <- FindVariableFeatures(t_nk_filt, 
                                     selection.method = "vst", nfeatures =2000)
t_nk_filt <- RunPCA(t_nk_filt)
t_nk_filt <- RunUMAP(t_nk_filt, dims = 1:15)
t_nk_filt <- FindNeighbors(t_nk_filt)
t_nk_filt <- FindClusters(t_nk_filt, resolution = 0.2)

In [None]:
DimPlot(t_nk_filt, raster = T, label = T)

In [None]:
FeaturePlot(t_nk_filt, features = "CD3D", raster = F)

In [None]:
FeaturePlot(t_nk_filt, features = "CD8A", raster = F)

In [None]:
FeaturePlot(t_nk_filt, features = "CD4", raster = F)

In [None]:
saveRDS(t_nk_filt, "hpap_5p_t_nk_filt.rds")

In [None]:
source("../CD4_CTL/cd4_t_cell_meta_pipeline_v02.R")

In [None]:
library(Azimuth)

In [None]:
hpca.se  <- celldex::HumanPrimaryCellAtlasData()
mid.se <- celldex::MonacoImmuneData()

In [None]:
library(SeuratData, lib = "/storage/brno2/home/vercanie/R/x86_64-pc-linux-gnu-library/4.0")

In [None]:
library(pbmcref.SeuratData, lib = "/storage/brno2/home/vercanie/R/x86_64-pc-linux-gnu-library/4.0")

In [None]:
install.packages("SeuratData")

In [None]:
SeuratData::InstallData("pbmcref")

In [None]:
library(Azimuth, lib = "/storage/brno2/home/vercanie/R/x86_64-pc-linux-gnu-library/4.0");


In [None]:
SeuratData::AvailableData()

In [None]:
Azimuth::AzimuthReference

In [None]:
t_nk_filt  <- annotate_tcell_data(t_nk_filt)

In [None]:
saveRDS(t_nk_filt, "hpap_5p_t_nk_filt_annot.rds")

In [None]:
options(repr.plot.width = 14)

DimPlot(t_nk_filt, group.by = "Monaco_single", label = T, repel = T)

In [None]:
options(repr.plot.width = 10)
DimPlot(t_nk_filt, group.by = "Wherry_main", label = T, repel = T)

In [None]:
options(repr.plot.width = 12)
DimPlot(t_nk_filt, group.by = "predicted.celltype.l3", label = T, repel = T)

## QC - removal of dead and contaminating cells

In [None]:
DimPlot(full_dataset, label = T)

VlnPlot(full_dataset, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rp"), ncol = 2, pt.size = 0)

In [None]:
DimPlot(full_dataset, group.by = "source")

VlnPlot(full_dataset, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rp"), ncol = 2, pt.size = 0, group.by = "source")

In [None]:
cutoff_nFeature_RNA <- 300
cutoff_percent_mt <- 5
cluster_exclude <- c()
sample_exclude <- c()

In [None]:
options(repr.plot.width = 10, repr.plot.height = 5)
p1 <- ggplot(data.frame(nCount_RNA = full_dataset$nCount_RNA,
                  nFeature_RNA = full_dataset$nFeature_RNA,
                  percent_mt = full_dataset$percent.mt,
                  seurat_clusters = full_dataset$seurat_clusters,
                  exclude = ifelse(full_dataset$seurat_clusters %in% cluster_exclude, TRUE, FALSE)), 
       aes(x = seurat_clusters, y = percent_mt)) +
  geom_violin(scale = "width", aes(fill = exclude)) + 
  geom_hline(yintercept = cutoff_percent_mt,
               geom = "line", 
               width = 0.5,
               colour = "red") + 
  ggtitle("Percent mt. cutoff") + 
  theme_classic() +
  scale_fill_manual(values = c("white","red")) +
  theme(panel.background = element_blank(), 
        axis.text.x = element_text(angle = 0, hjust = 1)) +
  annotate(geom = "rect", xmin = min(as.numeric(full_dataset$seurat_clusters))-1, 
           xmax = max(as.numeric(full_dataset$seurat_clusters))+1, 
           ymin=cutoff_percent_mt,ymax=1.1*(max(full_dataset$percent.mt)), fill = "red", alpha = 0.1)

p2 <- ggplot(data.frame(nCount_RNA = full_dataset$nCount_RNA,
                  nFeature_RNA = full_dataset$nFeature_RNA,
                  percent_mt = full_dataset$percent.mt,
                  seurat_clusters = full_dataset$seurat_clusters,
                        exclude = ifelse(full_dataset$seurat_clusters %in% cluster_exclude, TRUE, FALSE)), 
       aes(x = seurat_clusters, y = nFeature_RNA)) +
  geom_violin(scale = "width", aes(fill = exclude)) + 
  geom_hline(yintercept = cutoff_nFeature_RNA,
               geom = "line", 
               width = 0.5,
               colour = "red") + 
  ggtitle("nFeature RNA cutoff") + 
  theme_classic() +
  scale_fill_manual(values = c("white","red")) +
  theme(panel.background = element_blank(), 
        axis.text.x = element_text(angle = 0, hjust = 1)) +
  annotate(geom = "rect", xmin = min(as.numeric(full_dataset$seurat_clusters))-1, 
           xmax = max(as.numeric(full_dataset$seurat_clusters))+1, 
           ymin=0, ymax=cutoff_nFeature_RNA, fill = "red", alpha = 0.1)



p1 + p2

Create filtered dataset. 

In [None]:
full_dataset_filt <- subset(full_dataset, ((seurat_clusters %in% cluster_exclude)==F) & 
                       nFeature_RNA > 300 &
                         percent.mt < 5)

In [None]:
options(future.globals.maxSize = 10000 * 1024^2)
options(repr.plot.width = 6, repr.plot.height = 5)


full_dataset_filt <- SCTransform(full_dataset_filt)


var_features <- VariableFeatures(full_dataset_filt)
var_remove  <-  '^HLA-|^IG[HJKL]|^RNA|^MT|^RP|^MTRNR' # remove HLA, immunoglobulin, RNA, MT, and RP genes based on HUGO gene names
var_features  <-  grep(var_remove, var_features, invert=T, value=T)

full_dataset_filt <- RunPCA(full_dataset_filt, features = var_features)
full_dataset_filt <- RunUMAP(full_dataset_filt, dims = 1:12)
full_dataset_filt <- FindNeighbors(full_dataset_filt)
full_dataset_filt <- FindClusters(full_dataset_filt, resolution = 0.2)

DimPlot(full_dataset_filt, group.by = "source")
DimPlot(full_dataset_filt, label = T)

FeaturePlot(full_dataset_filt, features = "nCount_RNA")
FeaturePlot(full_dataset_filt, features = "MKI67")
FeaturePlot(full_dataset_filt, features = "CD4")
FeaturePlot(full_dataset_filt, features = "CD8A")
FeaturePlot(full_dataset_filt, features = "CD3D")

In [None]:
options(repr.plot.width = 10)

DimPlot(full_dataset_filt, group.by = "Monaco_single", label = T, repel = T)

In [None]:
options(repr.plot.width = 10)
DimPlot(full_dataset_filt, group.by = "Wherry_main", label = T, repel = T)

In [None]:
options(repr.plot.width = 10)
DimPlot(full_dataset_filt, group.by = "predicted.celltype.l3", label = T, repel = T)

In [None]:
options(repr.plot.width = 10)
DimPlot(full_dataset_filt, group.by = "predicted.celltype.l2", label = T, repel = T)

In [None]:
full_dataset_filt <- FindClusters(full_dataset_filt, resolution = 0.4)


In [None]:
options(repr.plot.width = 6, repr.plot.height = 5)
DimPlot(full_dataset_filt, label = T)

Second filtering, based on clustering, markers and predicted cell types.

In [None]:
full_dataset_filt <- subset(full_dataset_filt, seurat_clusters %in% c(0:3,7,8,10))

In [None]:
options(future.globals.maxSize = 10000 * 1024^2)
full_dataset_filt <- SCTransform(full_dataset_filt)


var_features <- VariableFeatures(full_dataset_filt)
var_remove  <-  '^HLA-|^IG[HJKL]|^RNA|^MT|^RP|^MTRNR|^TR[AB]V' # remove HLA, immunoglobulin, RNA, MT, RP and TCR genes based on HUGO gene names
var_features  <-  grep(var_remove, var_features, invert=T, value=T)

full_dataset_filt <- RunPCA(full_dataset_filt, features = var_features)
full_dataset_filt <- RunUMAP(full_dataset_filt, dims = 1:12)
full_dataset_filt <- FindNeighbors(full_dataset_filt)
full_dataset_filt <- FindClusters(full_dataset_filt, resolution = 0.2)

DimPlot(full_dataset_filt, group.by = "source")
DimPlot(full_dataset_filt, label = T)

FeaturePlot(full_dataset_filt, features = "nCount_RNA")
FeaturePlot(full_dataset_filt, features = "MKI67")
FeaturePlot(full_dataset_filt, features = "CD4")
FeaturePlot(full_dataset_filt, features = "CD8A")
FeaturePlot(full_dataset_filt, features = "CD3D")

In [None]:
options(repr.plot.width = 6)
DimPlot(full_dataset_filt, label = T)

In [None]:
options(repr.plot.width = 16, repr.plot.height = 16)

FeaturePlot(full_dataset_filt, features = c("SELL", "CCL5","FOXP3","IL2RA","IFNG",
                                       "CD160","ZBTB16","IL21R",
                                       "TBX21","RORC","BCL6","IL26",
                                       "GNLY","CD8A","CSF2", "CD27", "HLA-DRB1",
                                       "IL13","IL4", "MKI67"), ncol = 4)

In [None]:
options(repr.plot.width = 14, repr.plot.height = 10)

DimPlot(full_dataset_filt, group.by = "Monaco_single", label = T, repel = T)

In [None]:
options(repr.plot.width = 14, repr.plot.height = 10)
DimPlot(full_dataset_filt, group.by = "Wherry_main", label = T, repel = T)

In [None]:
options(repr.plot.width = 14, repr.plot.height = 10)
DimPlot(full_dataset_filt, group.by = "predicted.celltype.l2", label = T, repel = T)

In [None]:
options(repr.plot.width = 14, repr.plot.height = 10)
DimPlot(full_dataset_filt, group.by = "predicted.celltype.l3", label = T, repel = T)

In [None]:
DimPlot(full_dataset_filt, group.by = "source")
