In [None]:
source("diabetes_analysis_v07.R")

# CD8 preliminary data

In [None]:
paths  <- list.files("../data/rawdata/", full.names = T)

In [None]:
paths

In [None]:
seu_list  <- future_map(paths[c(1,2,3)], readRDS)

In [None]:
seu_list[[2]]$hashtags  %>% table

In [None]:
exp10_cd8  <- subset(seu_list[[2]], hashtags == "H5")

In [None]:
seu_list[[2]] <- exp10_cd8

In [None]:
seu_list[[2]]$Condition  %>% table

In [None]:
seu_list[[1]]$Patient_ID  %>% table

In [None]:
seu_list[[2]]$Patient_ID  %>% table

In [None]:
seu_list[[3]]$Patient_ID  %>% table

In [None]:
cd8_prelim  <- merge(seu_list[[1]], c(seu_list[[2]], seu_list[[3]]))

In [None]:
cd8_prelim

In [None]:
cd8_prelim$Condition  %>% table

In [None]:
options(future.globals.maxSize = 10000 * 1024^2)
plan("sequential")

In [None]:
DefaultAssay(cd8_prelim)  <- "RNA"
cd8_prelim <- NormalizeData(cd8_prelim, verbose = FALSE)
cd8_prelim <- ScaleData(cd8_prelim, verbose = FALSE)
cd8_prelim <- FindVariableFeatures(cd8_prelim, nfeatures = 1000, verbose = FALSE)
cd8_prelim <- RunPCA(cd8_prelim, npcs = 12, verbose = FALSE)
cd8_prelim <- RunUMAP(cd8_prelim, reduction = "pca", dims = 1:12)

In [None]:
cd8_prelim <- FindNeighbors(cd8_prelim, dims = 1:12)
cd8_prelim <- FindClusters(cd8_prelim, resolution = 1)

In [None]:
DimPlot(cd8_prelim, label = T)

In [None]:
DimPlot(cd8_prelim, label = T)

In [None]:
options(repr.plot.width = 16, repr.plot.height = 8)
FeaturePlot(cd8_prelim, features = c("MKI67", "LCK", "CD3G", "TYROBP", "CD14", "MKI67", "CD3D", "CD8A"), ncol = 4)

In [None]:
FeaturePlot(cd8_prelim, features = c("SELL", "CCR7", "IL7R", "ITGA4", "CCL5", "IFNG"), ncol = 4)

In [None]:
cd8_prelim_filt  <- subset(cd8_prelim, seurat_clusters %in% c(0:12,14))
cd8_prelim_filt <- NormalizeData(cd8_prelim_filt, verbose = FALSE)
cd8_prelim_filt <- ScaleData(cd8_prelim_filt, verbose = FALSE)
cd8_prelim_filt <- FindVariableFeatures(cd8_prelim_filt, nfeatures = 1000, verbose = FALSE)
cd8_prelim_filt <- RunPCA(cd8_prelim_filt, npcs = 12, verbose = FALSE)
cd8_prelim_filt <- RunUMAP(cd8_prelim_filt, reduction = "pca", dims = 1:12)
cd8_prelim_filt <- FindNeighbors(cd8_prelim_filt, dims = 1:12)
cd8_prelim_filt <- FindClusters(cd8_prelim_filt, resolution = 0.5)

In [None]:
DimPlot(cd8_prelim_filt, label = T, label.size = 12)

In [None]:
cd8_prelim_filt@meta.data  <- cd8_prelim_filt@meta.data  %>% 
mutate(naive_or_eff  = if_else(seurat_clusters %in% c(0,1,7,8,3),"Naive","NonNaive"))

In [None]:
options(repr.plot.width = 7, repr.plot.height = 5)
DimPlot(cd8_prelim_filt, label = F, label.size = 12, group.by = "naive_or_eff", cols = c("dodgerblue1","indianred2"), 
        raster = TRUE, raster.dpi = c(900,900), pt.size = 5) + ggtheme()
ggsave("../figures/prelim/cd8_dimplot.svg", width = 16, height = 12, units = "cm", create.dir = TRUE)

In [None]:
cd8_prelim_filt$prelim  <- "Prelim"

In [None]:
cd8_prelim_filt@meta.data  %>% 
ggplot(aes(x = prelim, fill = naive_or_eff)) +
  geom_bar(position = "fill") + 
scale_fill_manual(values = c("dodgerblue1","indianred2")) + 
theme_classic()+
ggtheme() + ggtitle("CD8")
ggsave("../figures/prelim/cd8_barplot.svg", width = 10, height = 12, units = "cm", create.dir = TRUE)

In [None]:
options(repr.plot.width = 6, repr.plot.height = 5)
FeaturePlot(cd8_prelim_filt, features = c("PTPRC-RA"), max.cutoff = 3, 
        raster = TRUE, raster.dpi = c(900,900), pt.size = 4) + ggtheme()
ggsave("../figures/prelim/cd8_ptprc_ra.svg", width = 13, height = 12, units = "cm", create.dir = TRUE)

In [None]:
options(repr.plot.width = 6, repr.plot.height = 5)
FeaturePlot(cd8_prelim_filt, features = c("PTPRC-RO"), max.cutoff = 2, 
        raster = TRUE, raster.dpi = c(900,900), pt.size = 4) + ggtheme()
ggsave("../figures/prelim/cd8_ptprc_ro.svg", width = 13, height = 12, units = "cm", create.dir = TRUE)

# CD8 Final data

In [None]:
paths  <- list.files("../data/rawdata/", full.names = T)

In [None]:
paths

In [None]:
seu_list  <- future_map(paths[c(7,8,9,11,13,15,17,19)], readRDS)

In [None]:
cd8_final  <- scCustomize::Merge_Seurat_List(seu_list)

In [None]:
cd8_final

In [None]:
cd8_final$Condition  %>% table

In [None]:
options(future.globals.maxSize = 10000 * 1024^2)
plan("sequential")

In [None]:
DefaultAssay(cd8_final)  <- "RNA"
cd8_final <- NormalizeData(cd8_final, verbose = FALSE)
cd8_final <- ScaleData(cd8_final, verbose = FALSE)
cd8_final <- FindVariableFeatures(cd8_final, nfeatures = 1000, verbose = FALSE)
cd8_final <- RunPCA(cd8_final, npcs = 12, verbose = FALSE)
cd8_final <- RunUMAP(cd8_final, reduction = "pca", dims = 1:12)

In [None]:
cd8_final <- FindNeighbors(cd8_final, dims = 1:12)
cd8_final <- FindClusters(cd8_final, resolution = 1)

In [None]:
DimPlot(cd8_final, label = T)

In [None]:
options(repr.plot.width = 16, repr.plot.height = 8)
FeaturePlot(cd8_final, features = c("MKI67", "LCK", "CD3G", "TYROBP", "CD14", "MKI67", "CD3D", "CD8A"), ncol = 4)

In [None]:
FeaturePlot(cd8_final, features = c("SELL", "CCR7", "IL7R", "ITGA4", "CCL5", "IFNG"), ncol = 4)

In [None]:
cd8_final_filt  <- subset(cd8_final, seurat_clusters %in% c(0:21))
cd8_final_filt <- NormalizeData(cd8_final_filt, verbose = FALSE)
cd8_final_filt <- ScaleData(cd8_final_filt, verbose = FALSE)
cd8_final_filt <- FindVariableFeatures(cd8_final_filt, nfeatures = 1000, verbose = FALSE)
cd8_final_filt <- RunPCA(cd8_final_filt, npcs = 15, verbose = FALSE)
cd8_final_filt <- RunUMAP(cd8_final_filt, reduction = "pca", dims = 1:15)
cd8_final_filt <- FindNeighbors(cd8_final_filt, dims = 1:15)
cd8_final_filt <- FindClusters(cd8_final_filt, resolution = 0.5)

In [None]:
DimPlot(cd8_final_filt, label = T, label.size = 12)

In [None]:
FeaturePlot(cd8_final_filt, features = c("SELL", "CCR7", "IL7R", "ITGA4", "CCL5", "IFNG"), ncol = 4)

In [None]:
cd8_final_filt@meta.data  <- cd8_final_filt@meta.data  %>% 
mutate(naive_or_eff  = if_else(seurat_clusters %in% c(0,2,8),"Naive","NonNaive"))

In [None]:
options(repr.plot.width = 7, repr.plot.height = 5)
DimPlot(cd8_final_filt, label = F, label.size = 12, group.by = "naive_or_eff", cols = c("dodgerblue1","indianred2"), 
        raster = TRUE, raster.dpi = c(900,900), pt.size = 5) + ggtheme()
ggsave("../figures/final/cd8_dimplot.svg", width = 16, height = 12, units = "cm", create.dir = TRUE)

In [None]:
cd8_final_filt$final  <- "final"

In [None]:
cd8_final_filt@meta.data  %>% 
ggplot(aes(x = final, fill = naive_or_eff)) +
  geom_bar(position = "fill") + 
scale_fill_manual(values = c("dodgerblue1","indianred2")) + 
theme_classic()+
ggtheme() + ggtitle("CD8")
ggsave("../figures/final/cd8_barplot.svg", width = 10, height = 12, units = "cm", create.dir = TRUE)

In [None]:
options(repr.plot.width = 6, repr.plot.height = 5)
FeaturePlot(cd8_final_filt, features = c("PTPRC-RA"), max.cutoff = 5, 
        raster = TRUE, raster.dpi = c(900,900), pt.size = 3) + ggtheme()
ggsave("../figures/final/cd8_ptprc_ra.svg", width = 13, height = 12, units = "cm", create.dir = TRUE)

In [None]:
options(repr.plot.width = 6, repr.plot.height = 5)
FeaturePlot(cd8_final_filt, features = c("PTPRC-RO"), max.cutoff = 3, 
        raster = TRUE, raster.dpi = c(900,900), pt.size = 3) + ggtheme()
ggsave("../figures/final/cd8_ptprc_ro.svg", width = 13, height = 12, units = "cm", create.dir = TRUE)

# CD8 preliminary and final

The initial data files are saved in the folder `data/rawdata`. If you want to recapitulate the analysis, 
please download them and copy there. The initial files were generated using the following code: `XXX`

The data were obtained in six 10x runs with the following experimental design and chemistry:

`table`

In [None]:
paths  <- list.files("../data/rawdata/", full.names = T)

In [None]:
seu_list  <- future_map(paths[c(1,2,3,7,8,9,11,13,15,17,19)], .f = readRDS)

Experiment 10, which is now the second item in the `seu_list`, contained both CD8 and CD4 T cell samples. 
We will subset the seurat object to keep only hashtag "H5", which is the one CD8 T cell sample in this experiment.

In [None]:
seu_list[[2]]$hashtags  %>% table

In [None]:
exp10_cd8  <- subset(seu_list[[2]], hashtags == "H5")

In [None]:
seu_list[[2]] <- exp10_cd8

We will now merge all CD8 T cell samples.

In [None]:
cd8_full  <- scCustomize::Merge_Seurat_List(seu_list)

# Add metadata

In [None]:
md_dia  <- read_xlsx("../data/metadata_v05.xlsx")

In [None]:
md_dia  %>% colnames

In [None]:
cd8_full@meta.data  <- cd8_full@meta.data  %>% 
separate(Condition, into = c("Disease", "Time"), remove = F, sep = " ")  %>% 
mutate(Patient_Time = paste(Patient_ID, Time))

cd8_full$Time  <- if_else(is.na(cd8_full$Time), "T0", cd8_full$Time)

cd8_full$Sample_char  <- paste(cd8_full$Patient_ID, 
                                  cd8_full$Disease,
                                  cd8_full$Time,
                                  cd8_full$Age_group,
                                  cd8_full$Sex,
                                  cd8_full$Experiment_ID)

In [None]:
md_seurat  <- cd8_full@meta.data

In [None]:
colnames(md_seurat)

In [None]:
md_joined  <- left_join(md_seurat, md_dia)

In [None]:
cd8_full@meta.data  <- md_joined
rownames(cd8_full@meta.data)  <- colnames(cd8_full)

### Remove problematic Patient 206

Patient 206 was removed because of low quality of data on sort, suggesting of low quality of the frozen sample.

In [None]:
cd8_full  <- subset(cd8_full, Patient_ID != "206")

### Remove Pre-Dia

In [None]:
cd8_full$Disease  %>% table

In [None]:
cd8_full  <- subset(cd8_full, Disease %in% c("Dia", "Ctrl"))

In [None]:
cd8_full$Disease  %>% table

### Exchange Ctrl T0 for Ctrl T1 in case of 201, 202, 204

In [None]:
cd8_full$Condition  %>% table

In [None]:
cd8_full$is_old_control  <-  ifelse(cd8_full$Patient_Time %in% c("201 T0","202 T0","204 T0"), TRUE,FALSE)

In [None]:
cd8_full$is_old_control  %>% table

In [None]:
cd8_full  <- subset(cd8_full, is_old_control == FALSE)

In [None]:
cd8_full$Condition  %>% table

In [None]:
cd8_full@meta.data  <- cd8_full@meta.data  %>% 
mutate(Time = ifelse(Patient_Time %in% c("201 T1","202 T1","204 T1"), "T0", Time))  %>% 
mutate(Patient_Time = paste(Patient_ID, Time),
       Condition = paste(Disease, Time))


In [None]:
cd8_full$Condition  %>% table

In [None]:
plan("sequential")

In [None]:
DefaultAssay(cd8_full)  <- "RNA"

In [None]:
cd8_full <- NormalizeData(cd8_full, verbose = FALSE)

In [None]:
cd8_full <- ScaleData(cd8_full, verbose = FALSE)

In [None]:
cd8_full <- FindVariableFeatures(cd8_full, nfeatures = 1000, verbose = FALSE)
cd8_full <- RunPCA(cd8_full, npcs = 12, verbose = FALSE)
cd8_full <- RunUMAP(cd8_full, reduction = "pca", dims = 1:12)

In [None]:
cd8_full <- FindNeighbors(cd8_full, dims = 1:12)
cd8_full <- FindClusters(cd8_full, resolution = 1)

In [None]:
options(repr.plot.width = 6, repr.plot.height = 5)

DimPlot(cd8_full, label = T, raster = F)

We will check the canonical markers of T cells and other immune populations to see clusters of contaminating cell types.

In [None]:
options(repr.plot.width = 16, repr.plot.height = 8)
FeaturePlot(cd8_full, features = c("MKI67", "LCK", "CD3G", "TYROBP", "CD14", "MKI67", "CD3D", "CD8A"), ncol = 4, raster = F)

### Automated annotation of cell types

We will perform automated analysis of cell types using the packages [SingleR](https://bioconductor.org/packages/release/bioc/vignettes/SingleR/inst/doc/SingleR.html) and [Azimuth](https://azimuth.hubmapconsortium.org/). We used two built-in reference datasets from the package celldex: Monaco Immune Dataset and Human Primary Cell Atlas Data, a custom reference of human T-cell types profiled by bulk RNA seq from the paper by Giles et al. [Immunity, 2022](https://www.sciencedirect.com/science/article/pii/S107476132200084X) and the three-level Azimuth annotations.

In [None]:
mid.se <- celldex::MonacoImmuneData()
hpca.se  <- celldex::HumanPrimaryCellAtlasData()

load("../data/ref_wherry_new.RData")

In [None]:
cd8_full  <- annotate_tcell_data(cd8_full)

In [None]:
dir.create("../data/processed/")
dir.create("../data/processed/L1/")
dir.create("../data/processed/L2/")
dir.create("../data/processed/L3/")

In [None]:
saveRDS(cd8_full, "../data/processed/L1/cd8_full.rds")

In [None]:
#cd8_full  <- readRDS("../data/processed/L1/cd8_full.rds")

In [None]:
DimPlot(cd8_full, raster = F, group.by = "Monaco_single", label = F)

In [None]:
DimPlot(cd8_full, label = T, raster = F)

In [None]:
DefaultAssay(cd8_full)  <- "RNA"

In [None]:
cd8_full <- FindNeighbors(cd8_full, dims = 1:12)
cd8_full <- FindClusters(cd8_full, resolution = 0.8)

In [None]:
DimPlot(cd8_full, label = T, raster = F)

In [None]:
VlnPlot(cd8_full, features = c("percent.mt", "percent.rp", "nCount_RNA", "nFeature_RNA"), ncol = 4, raster = F)

### Remove dead and contaminating cells

In [None]:
cd8_full  <- readRDS("../data/processed/L1/cd8_full.rds")

We will filter out low quality and contaminating cells:

- cells with more than 10% of reads mapping to mitochondrial genes
- cells with less than 500 detected genes
- cluster 8, which contains dead and dying cells
- cluster 19, which contains contaminating cells, e.g. monocytes, B cells

In [None]:
cutoff_nFeature_RNA <- 500
cutoff_percent_mt <- 10
cluster_exclude  <- c(8,19)

In [None]:
p1 <- ggplot(data.frame(nCount_RNA = cd8_full$nCount_RNA,
                  nFeature_RNA = cd8_full$nFeature_RNA,
                  percent_mt = cd8_full$percent.mt,
                  seurat_clusters = cd8_full$seurat_clusters,
                  exclude = ifelse(cd8_full$seurat_clusters %in% cluster_exclude, TRUE, FALSE)), 
       aes(x = seurat_clusters, y = percent_mt)) +
  geom_violin(scale = "width", aes(fill = exclude)) + 
  geom_hline(yintercept = cutoff_percent_mt,
               geom = "line", 
               width = 0.5,
               colour = "red") + 
  ggtitle("Percent mt. cutoff") + 
  theme_classic() +
  scale_fill_manual(values = c("white","red")) +
  theme(panel.background = element_blank(), 
        axis.text.x = element_text(angle = 0, hjust = 1)) +
  annotate(geom = "rect", xmin = min(as.numeric(cd8_full$seurat_clusters))-1, 
           xmax = max(as.numeric(cd8_full$seurat_clusters))+1, 
           ymin=cutoff_percent_mt,ymax=1.1*(max(cd8_full$percent.mt)), fill = "red", alpha = 0.1)

p2 <- ggplot(data.frame(nCount_RNA = cd8_full$nCount_RNA,
                  nFeature_RNA = cd8_full$nFeature_RNA,
                  percent_mt = cd8_full$percent.mt,
                  seurat_clusters = cd8_full$seurat_clusters,
                        exclude = ifelse(cd8_full$seurat_clusters %in% cluster_exclude, TRUE, FALSE)), 
       aes(x = seurat_clusters, y = nFeature_RNA)) +
  geom_violin(scale = "width", aes(fill = exclude)) + 
  geom_hline(yintercept = cutoff_nFeature_RNA,
               geom = "line", 
               width = 0.5,
               colour = "red") + 
  ggtitle("nFeature RNA cutoff") + 
  theme_classic() +
  scale_fill_manual(values = c("white","red")) +
  theme(panel.background = element_blank(), 
        axis.text.x = element_text(angle = 0, hjust = 1)) +
  annotate(geom = "rect", xmin = min(as.numeric(cd8_full$seurat_clusters))-1, 
           xmax = max(as.numeric(cd8_full$seurat_clusters))+1, 
           ymin=0, ymax=cutoff_nFeature_RNA, fill = "red", alpha = 0.1)



p1 + p2

### QC dead and contaminating removal for figure

In [None]:
options(repr.plot.width = 14, repr.plot.height = 5)

p1 <- ggplot(data.frame(nCount_RNA = cd8_full$nCount_RNA,
                  nFeature_RNA = cd8_full$nFeature_RNA,
                  percent_mt = cd8_full$percent.mt,
                  seurat_clusters = cd8_full$seurat_clusters,
                  exclude = ifelse(cd8_full$seurat_clusters %in% cluster_exclude, TRUE, FALSE)), 
       aes(x = seurat_clusters, y = percent_mt)) +
  geom_violin(scale = "width", aes(fill = exclude)) + 
  geom_hline(yintercept = cutoff_percent_mt,
               geom = "line", 
               width = 0.5,
               colour = "red") + 
  ggtitle("Percent mt. cutoff") + 
  theme_classic() +
  scale_fill_manual(values = c("white","red")) +
  theme(panel.background = element_blank(), 
        axis.text.x = element_text(angle = 45, hjust = 1)) +
  annotate(geom = "rect", xmin = min(as.numeric(cd8_full$seurat_clusters))-1, 
           xmax = max(as.numeric(cd8_full$seurat_clusters))+1, 
           ymin=cutoff_percent_mt,ymax=1.1*(max(cd8_full$percent.mt)), fill = "red", alpha = 0.1) + ggtheme() + NoLegend()

p2 <- ggplot(data.frame(nCount_RNA = cd8_full$nCount_RNA,
                  nFeature_RNA = cd8_full$nFeature_RNA,
                  percent_mt = cd8_full$percent.mt,
                  seurat_clusters = cd8_full$seurat_clusters,
                        exclude = ifelse(cd8_full$seurat_clusters %in% cluster_exclude, TRUE, FALSE)), 
       aes(x = seurat_clusters, y = nFeature_RNA)) +
  geom_violin(scale = "width", aes(fill = exclude)) + 
  geom_hline(yintercept = cutoff_nFeature_RNA,
               geom = "line", 
               width = 0.5,
               colour = "red") + 
  ggtitle("nFeature RNA cutoff") + 
  theme_classic() +
  scale_fill_manual(values = c("white","red")) +
  theme(panel.background = element_blank(), 
        axis.text.x = element_text(angle = 45, hjust = 1)) +
  annotate(geom = "rect", xmin = min(as.numeric(cd8_full$seurat_clusters))-1, 
           xmax = max(as.numeric(cd8_full$seurat_clusters))+1, 
           ymin=0, ymax=cutoff_nFeature_RNA, fill = "red", alpha = 0.1) + ggtheme()



p1 + p2

ggsave("../figures/QC/cd8_QC_plot1.png", width = 9, height = 4)
ggsave("../figures/QC/cd8_QC_plot1.svg", width = 9, height = 4)

In [None]:
options(repr.plot.width = 10, repr.plot.height = 6)

DotPlot(cd8_full, features = rev(c("CD3D","CD8A","CD8B","CD4","LCK","TRAC","CD14","MS4A1"))) + 
ggtheme() +
theme(panel.background = element_blank(), 
      axis.text.x = element_text(angle = 45, hjust = 1)) + coord_flip() +
      scale_size_continuous(range = c(0.2,3))
ggsave("../figures/QC/cd8_QC_plot2.png", width = 6.8, height = 3.7)
ggsave("../figures/QC/cd8_QC_plot2.svg", width = 6.8, height = 3.7)


In [None]:
cd8_full$remove  <- ifelse((cd8_full$seurat_clusters %in% cluster_exclude)  |
cd8_full$percent.mt > cutoff_percent_mt |
cd8_full$nFeature_RNA < cutoff_nFeature_RNA, "Remove", "Keep")

In [None]:
options(repr.plot.width = 5, repr.plot.height = 4)
DimPlot(cd8_full, raster = T, group.by = "remove", cols = c("grey88","red")) + ggtheme()
ggsave("../figures/QC/cd8_QC_plot3.png", width = 5, height = 4)
ggsave("../figures/QC/cd8_QC_plot3.svg", width = 5, height = 4)

In [None]:
DimPlot(cd8_full, raster = T, label = T, label.size = 7) + ggtheme()
ggsave("../figures/QC/cd8_QC_plot4.png", width = 5, height = 4)
ggsave("../figures/QC/cd8_QC_plot4.svg", width = 5, height = 4)

In [None]:
options(repr.plot.width = 60, repr.plot.height = 12)
VlnPlot(cd8_l1_full_filt, features = c( "nFeature_RNA"), 
        ncol = 4, group.by = "Sample_ID", raster = F, pt.size = 0) + NoLegend()

### Processing after QC

In [None]:
cd8_l1_full_filt  <- subset(cd8_full, 
                       ((seurat_clusters %in% cluster_exclude) == F) &
                      percent.mt < cutoff_percent_mt &
                      nFeature_RNA > cutoff_nFeature_RNA)

In [None]:
cd8_l1_full_filt <- NormalizeData(cd8_l1_full_filt, verbose = FALSE)
cd8_l1_full_filt <- ScaleData(cd8_l1_full_filt, verbose = FALSE)
cd8_l1_full_filt <- FindVariableFeatures(cd8_l1_full_filt, nfeatures = 800, verbose = FALSE)
cd8_l1_full_filt <- RunPCA(cd8_l1_full_filt, npcs = 12, verbose = FALSE)
cd8_l1_full_filt <- RunUMAP(cd8_l1_full_filt, reduction = "pca", dims = 1:12)
cd8_l1_full_filt <- FindNeighbors(cd8_l1_full_filt, dims = 1:12)
cd8_l1_full_filt <- FindClusters(cd8_l1_full_filt, resolution = 1)

In [None]:
options(repr.plot.width = 6, repr.plot.height = 5)
DimPlot(cd8_l1_full_filt, label = T, raster = F)

In [None]:
options(repr.plot.width = 20)
VlnPlot(cd8_l1_full_filt, features = c("percent.mt", "percent.rp", "nCount_RNA", "nFeature_RNA"), ncol = 4, raster = F)

In [None]:
options(repr.plot.width = 16, repr.plot.height = 8)

FeaturePlot(cd8_l1_full_filt, features = c("CD8A", "SELL", "CD3D", "CD19", "MS4A1", "TRGV2", "TRDC"), ncol = 4, raster = F)

In [None]:
cd8_l1_full_filt <- FindClusters(cd8_l1_full_filt, resolution = 0.6)

options(repr.plot.width = 6, repr.plot.height = 5)
DimPlot(cd8_l1_full_filt, label = T, raster = F)

### PCA on samples

In [None]:
cd8_l1_full_filt@meta.data  <- cd8_l1_full_filt@meta.data  %>% separate(Condition, into = c("Disease", "Time"), remove = F, sep = " ")

In [None]:
cd8_l1_full_filt$Sample_char  <- paste(cd8_l1_full_filt$Patient_ID, 
                                  cd8_l1_full_filt$Disease,
                                  cd8_l1_full_filt$Time,
                                  cd8_l1_full_filt$Age_group,
                                  cd8_l1_full_filt$Sex,
                                  cd8_l1_full_filt$Experiment_ID)

In [None]:
cd8_samples  <- AverageExpression(cd8_l1_full_filt, group.by = "Sample_char", return.seurat = T)

In [None]:
cd8_samples  <- FindVariableFeatures(cd8_samples)

In [None]:
cd8_samples  <- RunPCA(cd8_samples)

In [None]:
DimPlot(cd8_samples)

In [None]:
cd8_samples$Sample_char  <- colnames(cd8_samples)

In [None]:
cd8_samples$Sample_char  %>% table

In [None]:
cd8_samples@meta.data  <- cd8_samples@meta.data  %>% separate(Sample_char, 
                                                              into = c("Patient_ID",
                                                                      "Disease",
                                                                      "Time",
                                                                      "Age_group",
                                                                      "Sex",
                                                                      "Exp"), 
                                                             sep = " ",
                                                             remove = F)

In [None]:
options(repr.plot.width = 12, repr.plot.height = 7)
(DimPlot(cd8_samples, group.by = "Exp") + DimPlot(cd8_samples, group.by = "Disease") + DimPlot(cd8_samples, group.by = "Time")) / (DimPlot(cd8_samples, group.by = "Sex") + DimPlot(cd8_samples, group.by = "Age_group") + (DimPlot(cd8_samples, group.by = "Patient_ID") + NoLegend()))

In [None]:
p1  <- (DimPlot(cd8_samples, group.by = "Exp") + DimPlot(cd8_samples, group.by = "Disease") + DimPlot(cd8_samples, group.by = "Time")) / (DimPlot(cd8_samples, group.by = "Sex") + DimPlot(cd8_samples, group.by = "Age_group") + (DimPlot(cd8_samples, group.by = "Patient_ID") + NoLegend()))

## STACAS Integration over Experiment

In [None]:
cd8_l1_full_filt  <- SplitObject(cd8_l1_full_filt, split.by = "Experiment_ID")

In [None]:
# normalize and identify variable features for each dataset independently
merged.list <- lapply(X = cd8_l1_full_filt, FUN = function(x) {
    DefaultAssay(x)  <- "RNA"
    x$barcode  <- colnames(x)
    x <- NormalizeData(x)
    x <- FindVariableFeatures(x, selection.method = "vst", nfeatures = 800)
})

cd8_l1_full_filt <- Run.STACAS(merged.list, dims = 1:12)
cd8_l1_full_filt <- RunUMAP(cd8_l1_full_filt, dims = 1:12) 

In [None]:
# Visualize
DimPlot(cd8_l1_full_filt, group.by = c("Experiment_ID"), raster=F)

### PCA on samples

In [None]:
cd8_samples2  <- AverageExpression(cd8_l1_full_filt, group.by = "Sample_char", return.seurat = T)

In [None]:
cd8_samples2  <- FindVariableFeatures(cd8_samples2)

In [None]:
cd8_samples2  <- RunPCA(cd8_samples2)

In [None]:
DimPlot(cd8_samples2)

In [None]:
cd8_samples2$Sample_char  <- colnames(cd8_samples2)

In [None]:
cd8_samples2@meta.data  <- cd8_samples2@meta.data  %>% separate(Sample_char, 
                                                              into = c("Patient_ID",
                                                                      "Disease",
                                                                      "Time",
                                                                      "Age_group",
                                                                      "Sex",
                                                                      "Exp"), 
                                                             sep = " ",
                                                             remove = F)

In [None]:
options(repr.plot.width = 12, repr.plot.height = 7)
(DimPlot(cd8_samples2, group.by = "Exp") + DimPlot(cd8_samples2, group.by = "Disease") + DimPlot(cd8_samples2, group.by = "Time")) / 
(DimPlot(cd8_samples2, group.by = "Sex") + DimPlot(cd8_samples2, group.by = "Age_group") + (DimPlot(cd8_samples2, group.by = "Patient_ID") + NoLegend()))

In [None]:
p2  <- (DimPlot(cd8_samples2, group.by = "Exp") + DimPlot(cd8_samples2, group.by = "Disease") + DimPlot(cd8_samples2, group.by = "Time")) / 
(DimPlot(cd8_samples2, group.by = "Sex") + DimPlot(cd8_samples2, group.by = "Age_group") + (DimPlot(cd8_samples2, group.by = "Patient_ID") + NoLegend()))

In [None]:
p3  <-  p1  + plot_annotation(title = 'CD8',
                                           subtitle = 'PCA no integration',
                              theme = theme(plot.title = element_text(size = 12))) & ggtheme()

In [None]:
p4  <- p2 + plot_annotation(title = "CD8", subtitle = 'PCA integration STACAS', 
                              theme = theme(plot.title = element_text(size = 10))) & ggtheme()

In [None]:
plot2  <- cowplot::plot_grid(p3,p4, ncol = 1)

In [None]:
plot2

In [None]:
#saveRDS(cd8_l1_full_filt, "../data/processed/L1/cd8_l1_full_filt.rds")

In [None]:
cd8_l1_full_filt$group_cpept <- ifelse(cd8_l1_full_filt$fasting_cpept_1 > 200 & cd8_l1_full_filt$Time == 'T0', "Cpept_HI_T0", 
                             ifelse(cd8_l1_full_filt$fasting_cpept_1 < 200 & cd8_l1_full_filt$Time == 'T0', "Cpept_LO_T0", 
                             ifelse(cd8_l1_full_filt$fasting_cpept_T1 < 200 & cd8_l1_full_filt$Time == 'T1', "Cpept_LO_T1", 
                             ifelse(cd8_l1_full_filt$fasting_cpept_T1 > 200 & cd8_l1_full_filt$Time == 'T1', "Cpept_HI_T1", 
                                    NA_character_))))

In [None]:
cd8_l1_full_filt$Condition2 <- ifelse(is.na(cd8_l1_full_filt$part_remission_y_n), 
                                      ifelse(cd8_l1_full_filt$Condition == "Ctrl T0","Ctrl",NA_character_),
                                      paste0("Part_remission_",cd8_l1_full_filt$part_remission_y_n))

In [None]:
cd8_l1_full_filt$Condition2  %>% table

# Analysis CD8 Level 1

In [None]:
cd8_l1_full_filt  <- readRDS("../data/processed/L1/cd8_l1_full_filt.rds")

In [None]:
cd8_l1_full_filt <- FindNeighbors(cd8_l1_full_filt, dims = 1:12)

In [None]:
cd8_l1_full_filt <- FindClusters(cd8_l1_full_filt, resolution = 0.6)

In [None]:
options(repr.plot.width=6, repr.plot.height=5)

DimPlot(cd8_l1_full_filt, label = T, raster = F)

### Markers

In [None]:
Idents(cd8_l1_full_filt)  <- cd8_l1_full_filt$annotations_manual

In [None]:
mrk  <- FindAllMarkers(cd8_l1_full_filt)

In [None]:
mrk  <- rank_score_func(mrk)

In [None]:
dir.create("../tables/cd8/markers_annotations/")

In [None]:
write.csv(mrk, "../tables/cd8/markers_annotations/mrk_cd8_l1_full_filt.csv", row.names = FALSE)

### Cluster annotations

In [None]:
cd8_l1_full_filt@meta.data  <- cd8_l1_full_filt@meta.data  %>% 
mutate(annotations_manual = recode_factor(seurat_clusters, 
                                     "0" = "NK cells",
                                     "1" = "CD8 T cells",
                                     "2" = "CD8 T cells",
                                     "3" = "CD8 T cells",
                                     "4" = "CD8 T cells",
                                     "5" = "CD8 T cells",
                                     "6" = "Unconventional T cells",
                                     "7" = "Unconventional T cells",
                                     "8" = "CD8 T cells",
                                     "9" = "CD8 T cells",
                                     "10" = "NK cells",
                                     "11" = "NK cells",
                                     "12" = "NK cells",
                                     "13" = "Unconventional T cells",
                                     "14" = "CD8 T cells"))

In [None]:
cd8_l1_full_filt@misc$cols_annotations  <- c("#1f77b4ff","#66b559ff", "#9d6ba9ff")

In [None]:
options(repr.plot.width = 8, repr.plot.height = 6.5)

DimPlot(cd8_l1_full_filt, raster = T, group.by = "annotations_manual", 
        cols = cd8_l1_full_filt@misc$cols_annotations)

In [None]:
options(repr.plot.width = 8.2, repr.plot.height = 8)

FeaturePlot(cd8_l1_full_filt, features = c("TRDC"), raster = F, min.cutoff = 0)

In [None]:
options(repr.plot.width = 8, repr.plot.height = 8)
print(DimPlot(cd8_l1_full_filt, raster = F, 
              cells.highlight = colnames(cd8_l1_full_filt)[grep(cd8_l1_full_filt$cdr3_A1, 
                                                           pattern = "CAVMDSSYKLIF")]) + 
      NoLegend() + ggtitle("CAVMDSSYKLIF"))


In [None]:
cd8_l1_full_filt@misc$dataset_name  <- "cd8_l1_full_filt"

In [None]:
cd8_l1_full_filt@misc$all_md  <- cd8_l1_full_filt@meta.data  %>% 
                            dplyr::select(Sample_ID, Condition, Condition2, 
                                          Disease, 
                                          Sex, Age, Age_group, Patient_ID, 
                                          Time, Experiment_ID)   %>% unique

In [None]:
dir.create("../figures/auto/svg", recursive = T)
dir.create("../figures/auto/png")

In [None]:
options(repr.plot.width = 8, repr.plot.height = 6.5)
save_dimplot_plot(seurat_dataset = cd8_l1_full_filt)

In [None]:
saveRDS(cd8_l1_full_filt, "../data/processed/L1/cd8_l1_full_filt.rds")

### Cluster composition

In [None]:
process_plots_from_dataset(seurat_dataset = cd8_l1_full_filt)

### Save frequencies

In [None]:
df4  <- create_df4(cd8_l1_full_filt)

In [None]:
df4

In [None]:
dir_create("../tables/")
dir_create("../tables/cd8/")
dir.create("../tables/cd8/markers_annotations/")
dir.create("../tables/cd8/frequencies/")

In [None]:
freq  <- df4  %>% dplyr::select(1:3)
write.csv(freq, "../tables/cd8/frequencies/freq_cd8_l1_full_filt.csv", row.names = FALSE)

## CD8 L1 Dorothea

In [None]:
DimPlot(cd8_l1_full_filt)

In [None]:
net <- get_progeny(organism = 'human', top = 200)
net2 <- decoupleR::get_collectri(organism='human', split_complexes=FALSE)

In [None]:
options(future.globals.maxSize = 10000 * 1024^2)

In [None]:
 data <- cd8_l1_full_filt
# Extract the normalized log-transformed counts
mat <- as.matrix(data@assays$RNA@data)

######## Pathways Progeny #########   
    
# Run wmean
acts <- run_wmean(mat=mat, net=net, .source='source', .target='target',
                  .mor='weight', times = 100, minsize = 5)
  
# Add data to Seurat object

  data[['pathwayswmean']] <- acts %>%
  filter(statistic == 'norm_wmean') %>%
  pivot_wider(id_cols = 'source', names_from = 'condition',
              values_from = 'score') %>%
  column_to_rownames('source') %>%
  Seurat::CreateAssayObject(.)

  # Scale the data
DefaultAssay(object = data) <- "pathwayswmean"

data <- ScaleData(data)
data@assays$pathwayswmean@data <- data@assays$pathwayswmean@scale.data
rownames(data@assays$pathwayswmean@data)

######## CollecTRI ######### 

# Run ULM
acts <- run_ulm(mat=mat, net=net2, .source='source', .target='target',
                .mor='mor', minsize = 5)
  
# Add data to Seurat object
  data[['CollecTRI']] <- acts %>%
  pivot_wider(id_cols = 'source', names_from = 'condition',
              values_from = 'score') %>%
  column_to_rownames('source') %>%
  Seurat::CreateAssayObject(.)

  # Scale the data
DefaultAssay(object = data) <- "CollecTRI"

data <- ScaleData(data)
data@assays$CollecTRI@data <- data@assays$CollecTRI@scale.data
rownames(data@assays$CollecTRI@data)

DefaultAssay(object = data) <- "integrated"
saveRDS(data, paste0("../data/processed/L1/cd8_l1_full_filt.rds"))


# Analysis CD8 Level 2

## Analysis CD8 Level 2: Conventional CD8+ T cells

In [None]:
plan("sequential")

In [None]:
cd8_l1_full_filt  <- readRDS("../data/processed/L1/cd8_l1_full_filt.rds")

In [None]:
merged.list  <- SplitObject(subset(cd8_l1_full_filt, annotations_manual == "CD8 T cells"), split.by = "Experiment_ID")

In [None]:
merged.list <- lapply(X = merged.list, FUN = function(x) {
    DefaultAssay(x)  <- "RNA"
    x$barcode  <- colnames(x)
    x <- NormalizeData(x)
    x <- FindVariableFeatures(x, selection.method = "vst", nfeatures = 2000)
})

In [None]:
new_dia_experiment2 <- Run.STACAS(merged.list, dims = 1:14)
new_dia_experiment2 <- RunUMAP(new_dia_experiment2, dims = 1:14) 

In [None]:
cd8_subcluster  <- new_dia_experiment2

In [None]:
cd8_subcluster <- FindNeighbors(cd8_subcluster, reduction = "pca", dims = 1:14)

In [None]:
cd8_subcluster <- FindClusters(cd8_subcluster, resolution = 0.6)

In [None]:
options(repr.plot.width = 6, repr.plot.height = 5)
DimPlot(cd8_subcluster, label = T)

In [None]:
FeaturePlot(cd8_subcluster, features = "CXCR3", min.cutoff = 0)

In [None]:
cd8_subcluster@meta.data  <- cd8_subcluster@meta.data  %>% 
mutate(annotations_manual = recode_factor(seurat_clusters, 
                                      "0" = "Tem",
                                     "1" = "Temra",
                                     "2" = "Naive",
                                     "3" = "Naive",
                                     "4" = "Naive",
                                     "5" = "Tcm",
                                     "6" = "Temra",
                                     "7" = "Proliferating",
                                     "8" = "Tem",
                                     "9" = "Tem",                                    
                                     "10" = "Naive",                                   
                                     "11" = "Temra"                                   
                                    ))

In [None]:
cd8_subcluster@misc$cols_annotations  <- c(
     "#4c9c9cff", # Tem 
     "#2e8a3fff", # Temra
     "#92c0dfff", #Naive
     "#74bc68ff",  # Tcm
     "#88aa00ff")

In [None]:
options(repr.plot.width = 8, repr.plot.height = 6.5)

DimPlot(cd8_subcluster, group.by = "annotations_manual", cols = cd8_subcluster@misc$cols_annotations, raster = T)

In [None]:
cd8_subcluster@misc$dataset_name  <- "cd8_subcluster"

In [None]:
cd8_subcluster@misc$all_md  <- cd8_l1_full_filt@meta.data  %>% 
                            dplyr::select(Sample_ID, Condition, Condition2, 
                                          Disease, 
                                          Sex, Age, Age_group, Patient_ID, 
                                          Time, Experiment_ID)   %>% unique

In [None]:
options(repr.plot.width = 8, repr.plot.height = 6.5)
save_dimplot_plot(seurat_dataset = cd8_subcluster)

In [None]:
process_plots_from_dataset(cd8_subcluster)

In [None]:
save_dimplot_plot(cd8_subcluster)

In [None]:
saveRDS(cd8_subcluster, "../data/processed/L2/cd8_l2_subcluster.rds")

In [None]:
#cd8_subcluster  <- readRDS("../data/processed/L2/cd8_subcluster.rds")

In [None]:
cd8_subcluster

### Cluster tree

In [None]:
Idents(cd8_subcluster)  <- cd8_subcluster$seurat_clusters

In [None]:
options(repr.plot.width = 6, repr.plot.height = 5)
DimPlot(cd8_subcluster, label = T)

In [None]:
cd8_subcluster <- BuildClusterTree(
  cd8_subcluster,
  dims = 1:14,
  reorder = FALSE,
  reorder.numeric = FALSE
)

In [None]:
tree <- cd8_subcluster@tools$BuildClusterTree
tree$tip.label <- paste0("Cluster ", tree$tip.label)

In [None]:
colors  <- c(
    "#4c9c9cff", # Tem 
     "#92c0dfff", #Naive
     "#92c0dfff", #Naive
     "#2e8a3fff", # Temra
     "#92c0dfff", #Naive
     "#2e8a3fff", # Temra
    "#74bc68ff",  # Tcm
    "#74bc68ff",  # Tcm
    "#88aa00ff",
    "#4c9c9cff", # Tem 
    "#92c0dfff", #Naive
    "#4c9c9cff")

In [None]:
p <- ggtree::ggtree(tree, aes(x, y)) +
  scale_y_reverse() +
  ggtree::geom_tree() +
  ggtree::theme_tree() +
  ggtree::geom_tiplab(offset = 1) +
  ggtree::geom_tippoint(color = colors, shape = 16, size = 5) +
  coord_cartesian(clip = 'off') +
  theme(plot.margin = unit(c(0,2.5,0,0), 'cm'))

#ggsave('plots/cluster_tree.png', p, height = 4, width = 6)

In [None]:
options(repr.plot.width=3.5, repr.plot.height=3)
p

### Cluster composition

In [None]:
options(repr.plot.width=16, repr.plot.height=5)
process_plots_from_dataset(seurat_dataset = cd8_subcluster)

### Save frequencies

In [None]:
df4  <- create_df4(cd8_subcluster)

In [None]:
df4

In [None]:
freq  <- df4  %>% dplyr::select(1:3)
write.csv(freq, "../tables/cd8/frequencies/freq_cd8_l2_cd8_subcluster.csv", row.names = FALSE)

### Save markers

In [None]:
Idents(cd8_subcluster)  <- cd8_subcluster$annotations_manual

In [None]:
plan("multisession")

In [None]:
mrk  <- FindAllMarkers(cd8_subcluster)

In [None]:
mrk  <- rank_score_func(mrk)

In [None]:
write.csv(mrk, "../tables/cd8/markers_annotations/mrk_cd8_l2_cd8_subcluster.csv", row.names = FALSE)

In [None]:
plan("sequential")

### Save markers CollecTRI

In [None]:
cd8_l1_full_filt

In [None]:
DefaultAssay(cd8_subcluster)  <- "CollecTRI"
mrk  <- FindAllMarkers(cd8_subcluster)

mrk  <- rank_score_func(mrk)

write.csv(mrk, "../tables/cd8/markers_annotations/mrk_cd8_l2_cd8_subcluster_CollecTRI.csv", row.names = FALSE)

plan("sequential")

## Analysis CD8 Level 2: NK cells

In [None]:
cd8_l1_full_filt  <- readRDS("../data/processed/L1/cd8_l1_full_filt.rds")

In [None]:
cd8_l2_nk  <- subset(cd8_l1_full_filt, annotations_manual == "NK cells")

In [None]:
cd8_l2_nk

In [None]:
cd8_l2_nk@meta.data  <- cd8_l2_nk@meta.data  %>% mutate(Experiment_ID_2 = 
                                                               ifelse(Experiment_ID == "Exp10","Exp11",Experiment_ID))

In [None]:
cd8_l2_nk$Experiment_ID_2  %>% table

In [None]:
merged.list  <- SplitObject(cd8_l2_nk, split.by = "Experiment_ID_2")

In [None]:
cd8_l2_nk$Experiment_ID_2  %>% table

In [None]:
merged.list <- lapply(X = merged.list, FUN = function(x) {
    DefaultAssay(x)  <- "RNA"
    x$barcode  <- colnames(x)
    x <- NormalizeData(x)
    x <- FindVariableFeatures(x, selection.method = "vst", nfeatures = 2000)
})


stacas_anchors <- FindAnchors.STACAS(merged.list, 
                                     dims = 1:12, 
                                     min.sample.size = 80)
st1 <- SampleTree.STACAS(
  anchorset = stacas_anchors,
  obj.names = names(merged.list)
  )    

In [None]:
new_dia_experiment2 <- IntegrateData.STACAS(stacas_anchors,
                                          sample.tree = st1,
                                          dims=1:12) %>% ScaleData() %>%
  RunPCA(npcs=12) %>% RunUMAP(dims=1:12)

In [None]:
new_dia_experiment2 <- FindNeighbors(new_dia_experiment2, reduction = "pca", dims = 1:12)
new_dia_experiment2 <- FindClusters(new_dia_experiment2, resolution = 0.3)

In [None]:
cd8_l2_nk  <- new_dia_experiment2

In [None]:
cd8_l2_nk

In [None]:
cd8_l2_nk <- FindNeighbors(cd8_l2_nk, reduction = "pca", dims = 1:12)

In [None]:
cd8_l2_nk <- FindClusters(cd8_l2_nk, resolution = 0.4)

In [None]:
DimPlot(cd8_l2_nk, group.by = "seurat_clusters")

In [None]:
cd8_l2_nk

In [None]:
saveRDS(cd8_l2_nk, "../data/processed/L2/cd8_l2_nk.rds")

In [None]:
#cd8_l2_nk  <- readRDS("../data/processed/L2/cd8_l2_nk.rds")

### Cluster annotations

In [None]:
options(repr.plot.width = 6, repr.plot.height = 5)

dimplot1  <- DimPlot(cd8_l2_nk, label = T, raster = T) 
dimplot1

In [None]:
mrk  <- FindAllMarkers(cd8_l2_nk, only.pos = TRUE)

mrk  <- rank_score_func(mrk)

markers  <- mrk  %>% arrange(desc(score))  %>% group_by(cluster)  %>% slice_head(n = 4)  %>% pull(gene)

### Cluster tree

In [None]:
Idents(cd8_l2_nk)  <- cd8_l2_nk$seurat_clusters

In [None]:
cd8_l2_nk <- FindClusters(cd8_l2_nk, resolution = 0.4)

In [None]:
options(repr.plot.width = 6, repr.plot.height = 5)
DimPlot(cd8_l2_nk, label = T)

In [None]:
cd8_l2_nk <- BuildClusterTree(
  cd8_l2_nk,
  dims = 1:12,
  reorder = FALSE,
  reorder.numeric = FALSE
)

In [None]:
tree <- cd8_l2_nk@tools$BuildClusterTree
tree$tip.label <- paste0("Cluster ", tree$tip.label)

In [None]:
p <- ggtree::ggtree(tree, aes(x, y)) +
  scale_y_reverse() +
  ggtree::geom_tree() +
  ggtree::theme_tree() +
  ggtree::geom_tiplab(offset = 1) +
  ggtree::geom_tippoint(shape = 16, size = 5) +
  coord_cartesian(clip = 'off') +
  theme(plot.margin = unit(c(0,2.5,0,0), 'cm'))

#ggsave('plots/cluster_tree.png', p, height = 4, width = 6)

In [None]:
options(repr.plot.width=3.5, repr.plot.height=3)
p

In [None]:
cd8_l2_nk@meta.data  <- cd8_l2_nk@meta.data  %>% 
mutate(annotations_manual = recode_factor(seurat_clusters, 
                                     "0" = "NK1",
                                     "1" = "NK1",
                                     "2" = "NK2",
                                     "3" = "NK3",
                                     "4" = "NK3",
                                     "5" = "NK4",
                                     "6" = "NK5",
                                     "7" = "NK6",
                                     "8" = "NK7"))

In [None]:
Idents(cd8_l2_nk)  <- cd8_l2_nk$annotations_manual

In [None]:
mrk  <- FindAllMarkers(cd8_l2_nk, only.pos = TRUE)

mrk  <- rank_score_func(mrk)

In [None]:
markers  <- mrk  %>% arrange(desc(score))  %>% group_by(cluster)  %>% slice_head(n = 8)  %>% pull(gene)

In [None]:
mrk

In [None]:
options(repr.plot.width = 16, repr.plot.height = 40)
FeaturePlot(cd8_l2_nk, features = markers,
           min.cutoff = 0, ncol = 4)

In [None]:
markers_cl6  <- mrk  %>% arrange(desc(score))  %>% filter(cluster == "NK6")  %>% slice_head(n = 40)  %>% pull(gene)

In [None]:
options(repr.plot.width = 16, repr.plot.height = 40)
FeaturePlot(cd8_l2_nk, features = markers_cl6,
           min.cutoff = 0, ncol = 4)

In [None]:
options(repr.plot.width = 6, repr.plot.height = 5)
DimPlot(cd8_l2_nk, label = T, group.by = "annotations_manual")

In [None]:
cd8_l2_nk@meta.data  <- cd8_l2_nk@meta.data  %>% 
mutate(annotations_manual = recode_factor(seurat_clusters, 
                                     "0" = "NK1: CX3CR1 SPON2 FCGR3A",
                                     "1" = "NK1: CX3CR1 SPON2 FCGR3A",
                                     "2" = "NK2: CD52 SELL GZMH",
                                     "3" = "NK3: DUSP2 CXCR4 CD69",
                                     "4" = "NK3: DUSP2 CXCR4 CD69",
                                     "5" = "NK4: GZMK IL7R XCL1",
                                     "6" = "NK5: IFI6 MX1 ISG15",
                                     "7" = "NK6: CNN2 TRAT1 CD3E",
                                     "8" = "NK7: MCM7 STMN1 E2F2"))

In [None]:
options(repr.plot.width = 7, repr.plot.height = 4)
DimPlot(cd8_l2_nk, group.by = "annotations_manual")

saveRDS(cd8_l2_nk, "../data/processed/L2/cd8_l2_nk.rds")

In [None]:
options(repr.plot.width = 6, repr.plot.height = 5)
dimplot1

In [None]:
cd8_l2_nk@misc$dataset_name  <- "cd8_l2_nk"

In [None]:
cd8_l2_nk@misc$all_md  <- cd8_l1_full_filt@meta.data  %>% 
                            dplyr::select(Sample_ID, Condition, Condition2, 
                                          Disease, 
                                          Sex, Age, Age_group, Patient_ID, 
                                          Time, Experiment_ID)   %>% unique

In [None]:
cd8_l2_nk@misc$cols_annotations  <- c(
     "#aaeeff66",
    "#4adbffff",
    "#87cddeff",
     "#009cc3ff", 
    "#5599ffff",
    "#216778ff" ,
   
   "#0044aaff"
    )

In [None]:
options(repr.plot.width = 10, repr.plot.height = 6.5)
save_dimplot_plot(seurat_dataset = cd8_l2_nk)

In [None]:
saveRDS(cd8_l2_nk, "../data/processed/L2/cd8_l2_nk.rds")

In [None]:
plan("multisession")

### Cluster tree

In [None]:
Idents(cd8_l2_nk)  <- cd8_l2_nk$seurat_clusters

In [None]:
options(repr.plot.width = 6, repr.plot.height = 5)
DimPlot(cd8_l2_nk, label = T)

In [None]:
cd8_l2_nk <- BuildClusterTree(
  cd8_l2_nk,
  dims = 1:12,
  reorder = FALSE,
  reorder.numeric = FALSE
)

In [None]:
tree <- cd8_l2_nk@tools$BuildClusterTree
tree$tip.label <- paste0("Cluster ", tree$tip.label)

In [None]:
colors  <- c(
    "#b9ddf1",
    "#b9ddf1",
    "#6a9bc3",
     "#6a9bc3",
    "#81c6eeff",
     "#214478ff" ,
     "#66a4e7ff" ,
    "dodgerblue",
     "#b3e0a6")

In [None]:
p <- ggtree::ggtree(tree, aes(x, y)) +
  scale_y_reverse() +
  ggtree::geom_tree() +
  ggtree::theme_tree() +
  ggtree::geom_tiplab(offset = 1) +
  ggtree::geom_tippoint(color = colors, shape = 16, size = 5) +
  coord_cartesian(clip = 'off') +
  theme(plot.margin = unit(c(0,2.5,0,0), 'cm'))

#ggsave('plots/cluster_tree.png', p, height = 4, width = 6)

In [None]:
options(repr.plot.width=3.5, repr.plot.height=3)
p

### Cluster composition

In [None]:
options(repr.plot.width=16, repr.plot.height=5)
process_plots_from_dataset(seurat_dataset = cd8_l2_nk)

### Save frequencies

In [None]:
df4  <- create_df4(cd8_l2_nk)

In [None]:
df4

In [None]:
freq  <- df4  %>% dplyr::select(1:3)
write.csv(freq, "../tables/cd8/frequencies/freq_cd8_l2_cd8_l2_nk.csv", row.names = FALSE)

### Save markers

In [None]:
Idents(cd8_l2_nk)  <- cd8_l2_nk$annotations_manual

In [None]:
plan("multisession")

In [None]:
mrk  <- FindAllMarkers(cd8_l2_nk)

In [None]:
mrk  <- rank_score_func(mrk)

In [None]:
write.csv(mrk, "../tables/cd8/markers_annotations/mrk_cd8_l2_cd8_l2_nk.csv", row.names = FALSE)

In [None]:
plan("sequential")

## Analysis CD8 Level 2: Unconventional cells

In [None]:
cd8_l2_unc  <- subset(cd8_l1_full_filt, annotations_manual == "Unconventional T cells")

In [None]:
cd8_l2_unc$Experiment_ID  %>% table

In [None]:
cd8_l2_unc@meta.data  <- cd8_l2_unc@meta.data  %>% mutate(Experiment_ID_2 = 
                                                               ifelse(Experiment_ID == "Exp10","Exp11",Experiment_ID))

In [None]:
cd8_l2_unc$Experiment_ID_2  %>% table

In [None]:
merged.list  <- SplitObject(cd8_l2_unc, split.by = "Experiment_ID_2")

In [None]:
merged.list <- lapply(X = merged.list, FUN = function(x) {
    DefaultAssay(x)  <- "RNA"
    x$barcode  <- colnames(x)
    x <- NormalizeData(x)
    x <- FindVariableFeatures(x, selection.method = "vst", nfeatures = 2000)
})

In [None]:
new_dia_experiment2 <- Run.STACAS(merged.list, dims = 1:12)
new_dia_experiment2 <- RunUMAP(new_dia_experiment2, dims = 1:12)

In [None]:
cd8_l2_unc  <- new_dia_experiment2

In [None]:
cd8_l2_unc <- FindNeighbors(cd8_l2_unc, reduction = "pca", dims = 1:12)

In [None]:
cd8_l2_unc <- FindClusters(cd8_l2_unc, resolution = 0.5)

In [None]:
options(repr.plot.width=3.5, repr.plot.height=3)

DimPlot(cd8_l2_unc, label = T)

In [None]:
mrk  <- FindAllMarkers(cd8_l2_unc, logfc.threshold = log(1.5))

write.csv(mrk, "markers_cd8_l2_unc.csv")

In [None]:
mrk  <- rank_score_func(mrk)

In [None]:
mrk

In [None]:
markers  <- mrk  %>% arrange(desc(score))  %>% group_by(cluster)  %>% slice_head(n = 4)  %>% pull(gene)

In [None]:
options(repr.plot.width = 16, repr.plot.height = 16)
FeaturePlot(cd8_l2_unc, features = markers,
           min.cutoff = 0, ncol = 4)

In [None]:
 mrk  %>% arrange(desc(score))  %>% filter(cluster == "3") 

In [None]:
markers_cl3  <- mrk  %>% arrange(desc(score))  %>% filter(cluster == "3")  %>% slice_head(n = 40)  %>% pull(gene)

options(repr.plot.width = 16, repr.plot.height = 40)
FeaturePlot(cd8_l2_unc, features = markers_cl3,
           min.cutoff = 0, ncol = 4)

In [None]:
markers_cl4  <- mrk  %>% arrange(desc(score))  %>% filter(cluster == "4")  %>% slice_head(n = 40)  %>% pull(gene)

options(repr.plot.width = 16, repr.plot.height = 40)
FeaturePlot(cd8_l2_unc, features = markers_cl4,
           min.cutoff = 0, ncol = 4)

In [None]:
saveRDS(cd8_l2_unc, "../data/processed/L2/cd8_l2_unc.rds")

In [None]:
#cd8_l2_unc  <- readRDS("../data/processed/L2/cd8_l2_unc.rds")

### Cluster annotations

In [None]:
options(repr.plot.width=3.5, repr.plot.height=3)

DimPlot(cd8_l2_unc, label = T)

In [None]:
cd8_l2_unc@meta.data  <- cd8_l2_unc@meta.data  %>% 
mutate(annotations_manual = recode_factor(seurat_clusters, 
                                     "0" = "MAIT1: LTB CXCR6 CEBPD",
                                     "1" = "MAIT2: JUN NFKBIA DUSP1",
                                     "2" = "CD8Tgd1: FGFBP2 GZMH GZMB",
                                     "3" = "CD8Tgd2: TRDV2 SELL COTL1",
                                     "4" = "MAIT3: TENT5C PDE4D CREM",
                                     "5" = "MAIT4: IFI44L IFI6 MX1"))

In [None]:
options(repr.plot.width = 10, repr.plot.height = 5)

DimPlot(cd8_l2_unc, label = T, group.by = "annotations_manual", repel = T)

In [None]:
cd8_l2_unc@misc$cols_annotations  <- c(
     "#D4A6C8",
   "#a359b5ff",
     "#4878a6" ,
    "#89b8da", 
     "#7137c8ff",
"#d400aaff")

In [None]:
cd8_l2_unc@misc$dataset_name  <- "cd8_l2_unc"

In [None]:
cd8_l2_unc@misc$all_md  <- cd8_l1_full_filt@meta.data  %>% 
                            dplyr::select(Sample_ID, Condition, Condition2, 
                                          Disease, 
                                          Sex, Age, Age_group, Patient_ID, 
                                          Time, Experiment_ID)   %>% unique

In [None]:
options(repr.plot.width = 8, repr.plot.height = 6.5)
save_dimplot_plot(seurat_dataset = cd8_l2_unc)

In [None]:
saveRDS(cd8_l2_unc, "../data/processed/L2/cd8_l2_unc.rds")

### Cluster tree

In [None]:
Idents(cd8_l2_unc)  <- cd8_l2_unc$seurat_clusters

In [None]:
options(repr.plot.width = 6, repr.plot.height = 5)
DimPlot(cd8_l2_unc, label = T)

In [None]:
cd8_l2_unc <- BuildClusterTree(
  cd8_l2_unc,
  dims = 1:12,
  reorder = FALSE,
  reorder.numeric = FALSE
)

In [None]:
tree <- cd8_l2_unc@tools$BuildClusterTree
tree$tip.label <- paste0("Cluster ", tree$tip.label)

In [None]:
colors  <- c(
    "#D4A6C8",
   "#a359b5ff",
     "#4878a6" ,
    "#89b8da", 
    "#d400aaff", "#7137c8ff")

In [None]:
p <- ggtree::ggtree(tree, aes(x, y)) +
  scale_y_reverse() +
  ggtree::geom_tree() +
  ggtree::theme_tree() +
  ggtree::geom_tiplab(offset = 1) +
  ggtree::geom_tippoint(color = colors, shape = 16, size = 5) +
  coord_cartesian(clip = 'off') +
  theme(plot.margin = unit(c(0,2.5,0,0), 'cm'))

#ggsave('plots/cluster_tree.png', p, height = 4, width = 6)

In [None]:
options(repr.plot.width=3.5, repr.plot.height=3)
p

### Cluster composition

In [None]:
options(repr.plot.width=16, repr.plot.height=5)
process_plots_from_dataset(seurat_dataset = cd8_l2_unc)

### Save markers

In [None]:
Idents(cd8_l2_unc)  <- cd8_l2_unc$annotations_manual

In [None]:
plan("multisession")

In [None]:
mrk  <- FindAllMarkers(cd8_l2_unc)

In [None]:
mrk  <- rank_score_func(mrk)

In [None]:
write.csv(mrk, "../tables/cd8/markers_annotations/mrk_cd8_l2_cd8_l2_unc.csv", row.names = FALSE)

In [None]:
plan("sequential")

### Save frequencies

In [None]:
df4  <- create_df4(cd8_l2_unc)

In [None]:
df4

In [None]:
freq  <- df4  %>% dplyr::select(1:3)

In [None]:
write.csv(freq, "../tables/cd8/frequencies/freq_cd8_l2_cd8_l2_unc.csv", row.names = FALSE)

## Analysis CD8 Level 3

In [None]:
cd8_subcluster  <- readRDS("../data/processed/L2/cd8_l2_subcluster.rds")

In [None]:
DimPlot(cd8_subcluster, group.by = 'annotations_manual')

In [None]:
cd8_l3_tem  <- subset(cd8_subcluster, annotations_manual == "Tem")
cd8_l3_naive  <- subset(cd8_subcluster, annotations_manual == "Naive")
cd8_l3_tcm  <- subset(cd8_subcluster, annotations_manual == "Tcm")
cd8_l3_temra  <- subset(cd8_subcluster, annotations_manual == "Temra")
cd8_l3_prolif  <- subset(cd8_subcluster, annotations_manual == "Proliferating")

In [None]:
cd8_l3_list  <- list(cd8_l3_tem, cd8_l3_naive, cd8_l3_tcm, cd8_l3_temra, cd8_l3_prolif)

In [None]:
names_list  <- c("cd8_l3_tem", "cd8_l3_naive", "cd8_l3_tcm", "cd8_l3_temra", "cd8_l3_prolif")

Ensure that we will split into datasets of size at least 100 cells

In [None]:
for(i in 1:5){
    seurat  <- cd8_l3_list[[i]]
    seurat@meta.data  <- seurat@meta.data  %>% 
    mutate(Experiment_ID_2 = 
           ifelse(Experiment_ID %in% c("Exp10", "Exp11"), "Exp10_11",Experiment_ID ))
    cd8_l3_list[[i]]  <- seurat
}

In [None]:
for(i in 1:5){
    seurat  <- cd8_l3_list[[i]]
    print(cd8_l3_list[[i]]$Experiment_ID_2  %>% table)
}

We will need to process proliferating cells separtely, as there are only 85 cells from experiment 10/11 and STACAS would remove these cells.

In [None]:
for(i in 1:4){
    seurat  <- cd8_l3_list[[i]]
    
    merged.list  <- SplitObject(seurat, split.by = "Experiment_ID_2")

merged.list <- lapply(X = merged.list, FUN = function(x) {
    DefaultAssay(x)  <- "RNA"
    x$barcode  <- colnames(x)
    x <- NormalizeData(x)
    x <- FindVariableFeatures(x, selection.method = "vst", nfeatures = 2000)
})

new_dia_experiment2 <- Run.STACAS(merged.list, dims = 1:12)
new_dia_experiment2 <- RunUMAP(new_dia_experiment2, dims = 1:12)
new_dia_experiment2 <- FindNeighbors(new_dia_experiment2, reduction = "pca", dims = 1:12)

new_dia_experiment2 <- FindClusters(new_dia_experiment2, resolution = 0.3)

DimPlot(new_dia_experiment2, label = T)

new_dia_experiment2@misc$cols_annotations  <- scales::hue_pal(h.start = 20) (length(levels(factor(new_dia_experiment2$seurat_clusters))))
new_dia_experiment2$annotations_manual  <- paste("Cluster", new_dia_experiment2$seurat_clusters)
new_dia_experiment2@misc$dataset_name  <- names(cd8_l3_list)[i]
new_dia_experiment2@misc$all_md  <- cd8_l1_full_filt@meta.data  %>% 
                            dplyr::select(Sample_ID, Condition, Condition2, 
                                          Disease, 
                                          Sex, Age, Age_group, Patient_ID, 
                                          Time, Experiment_ID)   %>% unique

saveRDS(new_dia_experiment2, paste0("../data/processed/L3/",names_list[i],".rds"))

}

When processing proliferating cells, we will change the min.sample.size parameter of the FindAnchors.STACAS function.

In [None]:
for(i in 5){
    seurat  <- cd8_l3_list[[i]]
    
    merged.list  <- SplitObject(seurat, split.by = "Experiment_ID_2")

merged.list <- lapply(X = merged.list, FUN = function(x) {
    DefaultAssay(x)  <- "RNA"
    x$barcode  <- colnames(x)
    x <- NormalizeData(x)
    x <- FindVariableFeatures(x, selection.method = "vst", nfeatures = 2000)
})


stacas_anchors <- FindAnchors.STACAS(merged.list, 
                                     dims = 1:12, 
                                     min.sample.size = 80)
st1 <- SampleTree.STACAS(
  anchorset = stacas_anchors,
  obj.names = names(merged.list)
  )    

new_dia_experiment2 <- IntegrateData.STACAS(stacas_anchors,
                                          sample.tree = st1,
                                          dims=1:12) %>% ScaleData() %>%
  RunPCA(npcs=12) %>% RunUMAP(dims=1:12)

new_dia_experiment2 <- FindNeighbors(new_dia_experiment2, reduction = "pca", dims = 1:12)
new_dia_experiment2 <- FindClusters(new_dia_experiment2, resolution = 0.3)

DimPlot(new_dia_experiment2, label = T)

new_dia_experiment2@misc$cols_annotations  <- scales::hue_pal(h.start = 20) (length(levels(factor(new_dia_experiment2$seurat_clusters))))
new_dia_experiment2$annotations_manual  <- paste("Cluster", new_dia_experiment2$seurat_clusters)
new_dia_experiment2@misc$dataset_name  <- names(cd8_l3_list)[i]
new_dia_experiment2@misc$all_md  <- cd8_l1_full_filt@meta.data  %>% 
                            dplyr::select(Sample_ID, Condition, Condition2, 
                                          Disease, 
                                          Sex, Age, Age_group, Patient_ID, 
                                          Time, Experiment_ID)   %>% unique
    
saveRDS(new_dia_experiment2, paste0("../data/processed/L3/",names_list[i],".rds"))

}

In [None]:
cd8_l3_tem  <- readRDS("../data/processed/L3/cd8_l3_tem.rds")
cd8_l3_naive  <- readRDS("../data/processed/L3/cd8_l3_naive.rds")
cd8_l3_tcm  <- readRDS("../data/processed/L3/cd8_l3_tcm.rds")
cd8_l3_temra  <- readRDS("../data/processed/L3/cd8_l3_temra.rds")
cd8_l3_prolif  <- readRDS("../data/processed/L3/cd8_l3_prolif.rds")

In [None]:
cd8_l3_list  <- list(cd8_l3_tem, cd8_l3_naive, cd8_l3_tcm, cd8_l3_temra, cd8_l3_prolif)
names(cd8_l3_list) <- c("cd8_l3_tem", "cd8_l3_naive", "cd8_l3_tcm", "cd8_l3_temra", "cd8_l3_prolif")

In [None]:
#cd8_l1_full_filt  <- readRDS("../data/processed/L1/cd8_l1_full_filt.rds")

### Annotations of L3 datasets

#### Tem

In [None]:
options(repr.plot.width = 7, repr.plot.height = 6)
DimPlot(cd8_l3_tem)

In [None]:
cd8_l3_tem <- FindNeighbors(cd8_l3_tem, reduction = "pca", dims = 1:12)
cd8_l3_tem <- FindClusters(cd8_l3_tem, resolution = 0.2)

DimPlot(cd8_l3_tem, label = T)

In [None]:
mrk  <- FindAllMarkers(cd8_l3_tem, only.pos = TRUE, min.diff.pct = 0.1)

In [None]:
mrk  <- rank_score_func(mrk)

In [None]:
markers  <- mrk  %>% arrange(desc(score))  %>% group_by(cluster)  %>% slice_head(n = 4)  %>% pull(gene)

In [None]:
options(repr.plot.width = 16, repr.plot.height = 16)
FeaturePlot(cd8_l3_tem, features = markers,
           min.cutoff = 0, ncol = 4)

In [None]:
options(repr.plot.width = 16, repr.plot.height = 8)
FeaturePlot(cd8_l3_tem, features = c("ZNF683","ITGAM","TBX21","GZMA","GZMB","GZMH","GZMK",
                                    "STAT3","SOCS3","IL2RA"),
           min.cutoff = 0, ncol = 4)

In [None]:
mrk  %>% arrange(desc(score))  %>% group_by(cluster)  %>% slice_head(n = 4)

In [None]:
options(repr.plot.width = 5, repr.plot.height = 4)
DimPlot(cd8_l3_tem, label = T)

In [None]:
cd8_l3_tem@meta.data  <- cd8_l3_tem@meta.data  %>% 
mutate(annotations_manual = recode_factor(seurat_clusters, "0" = "Tem1: ZNF683 ITGAM TBX21",
                                     "1" = "Tem2: GZMK SELL GZMM",
                                     "2" = "Tem3: CCR7 MYC NELL2",
                                     "3" = "Tem4: ISG15 IFI6 IFI44L"))

In [None]:
DimPlot(cd8_l3_tem, label = F, group.by = "annotations_manual")

In [None]:
saveRDS(cd8_l3_tem, "../data/processed/L3/cd8_l3_tem.rds")

#### Naive

In [None]:
options(repr.plot.width = 7, repr.plot.height = 6)

cd8_l3_naive <- FindNeighbors(cd8_l3_naive, reduction = "pca", dims = 1:12)
cd8_l3_naive <- FindClusters(cd8_l3_naive, resolution = 0.2)

In [None]:
DimPlot(cd8_l3_naive, label = T)

In [None]:
mrk  <- FindAllMarkers(cd8_l3_naive, only.pos = TRUE)

In [None]:
mrk  <- rank_score_func(mrk)

In [None]:
mrk

In [None]:
markers  <- mrk  %>% arrange(desc(score))  %>% group_by(cluster)  %>% slice_head(n = 4)  %>% pull(gene)

In [None]:
options(repr.plot.width = 16, repr.plot.height = 16)
FeaturePlot(cd8_l3_naive, features = markers,
           min.cutoff = 0, ncol = 4)

In [None]:
options(repr.plot.width = 4, repr.plot.height = 3)

DimPlot(cd8_l3_naive, label = T)

In [None]:
cd8_l3_naive@meta.data  <- cd8_l3_naive@meta.data  %>% 
mutate(annotations_manual = recode_factor(seurat_clusters, "0" = "Naive1: GIMAP4 GIMAP7 DDX17",
                                     "1" = "Naive2: DUSP1 JUN FOS",
                                     "2" = "Naive3: SOX4 STMN1 CHI3L2",
                                     "3" = "Naive4: IFI44L ISG15 MX1"))

In [None]:
options(repr.plot.width = 10, repr.plot.height = 3)

DimPlot(cd8_l3_naive, label = F, group.by = "annotations_manual")

In [None]:
saveRDS(cd8_l3_naive, "../data/processed/L3/cd8_l3_naive.rds")

#### Tcm

In [None]:
options(repr.plot.width = 7, repr.plot.height = 6)
DimPlot(cd8_l3_tcm)

cd8_l3_tcm <- FindNeighbors(cd8_l3_tcm, reduction = "pca", dims = 1:12)

In [None]:
cd8_l3_tcm <- FindClusters(cd8_l3_tcm, resolution = 0.4)

DimPlot(cd8_l3_tcm, label = T)

In [None]:
mrk  <- FindAllMarkers(cd8_l3_tcm, only.pos = TRUE)

In [None]:
mrk  <- rank_score_func(mrk)

In [None]:
markers  <- mrk  %>% arrange(desc(score))  %>% group_by(cluster)  %>% slice_head(n = 4)  %>% pull(gene)

In [None]:
mrk %>% arrange(desc(score))  %>% group_by(cluster)  %>% slice_head(n = 4) 

In [None]:
options(repr.plot.width = 16, repr.plot.height = 16)
FeaturePlot(cd8_l3_tcm, features = markers,
           min.cutoff = 0, ncol = 4)

In [None]:
options(repr.plot.width = 4, repr.plot.height = 3)

DimPlot(cd8_l3_tcm, label = T)

In [None]:
cd8_l3_tcm@meta.data  <- cd8_l3_tcm@meta.data  %>% 
mutate(annotations_manual = recode_factor(seurat_clusters, "0" = "Tcm1: CCR7 LEF1 NELL2",
                                     "1" = "Tcm2: CST7 GZMA GZMK",
                                     "2" = "Tcm3: GNLY PRF1 GZMB",
                                     "3" = "Tcm4: CCR9 KLRC1 NT5E",
                                     "4" = "Tcm5: CD160 DUSP2 NKG7",
                                     "5" = "Tcm6: CCR4 ITGB1 GATA3",
                                     "6" = "Tcm7: KLRB1 LST1 NCR3"))


In [None]:
options(repr.plot.width = 7, repr.plot.height = 5)

DimPlot(cd8_l3_tcm, label = T, group.by = "annotations_manual", repel = T)

In [None]:
saveRDS(cd8_l3_tcm, "../data/processed/L3/cd8_l3_tcm.rds")

#### Temra

In [None]:
options(repr.plot.width = 7, repr.plot.height = 6)
DimPlot(cd8_l3_temra)

cd8_l3_temra <- FindNeighbors(cd8_l3_temra, reduction = "pca", dims = 1:12)

In [None]:
cd8_l3_temra <- FindClusters(cd8_l3_temra, resolution = 0.3)

DimPlot(cd8_l3_temra, label = T)

In [None]:
mrk  <- FindAllMarkers(cd8_l3_temra, only.pos = TRUE)

In [None]:
mrk  <- rank_score_func(mrk)

In [None]:
mrk  %>% filter(cluster == 5)

In [None]:
markers  <- mrk  %>% arrange(desc(score))  %>% group_by(cluster)  %>% slice_head(n = 4)  %>% pull(gene)

In [None]:
options(repr.plot.width = 16, repr.plot.height = 16)
FeaturePlot(cd8_l3_temra, features = markers,
           min.cutoff = 0, ncol = 4)

In [None]:
options(repr.plot.width = 4, repr.plot.height = 3)

DimPlot(cd8_l3_temra, label = T)

In [None]:
cd8_l3_temra@meta.data  <- cd8_l3_temra@meta.data  %>% 
mutate(annotations_manual = recode_factor(seurat_clusters, "0" = "Temra1: KLRB1 FGFBP2 THEMIS",
                                     "1" = "Temra2: DUSP2 CD160 CMC1",
                                     "2" = "Temra3: TYROBP GNLY ZNF683",
                                     "3" = "Temra4: IL7R GZMK CD27",
                                     "4" = "Temra5: FOS JUN DUSP1",
                                     "5" = "Temra6: IFI6 ISG15 MX1"))

In [None]:
options(repr.plot.width = 7, repr.plot.height = 5)

DimPlot(cd8_l3_temra, label = T, group.by = "annotations_manual", repel = T)

In [None]:
options(repr.plot.width = 7, repr.plot.height = 5)

DimPlot(cd8_l3_temra, label = T, group.by = "annotations_manual", repel = T)

In [None]:
saveRDS(cd8_l3_temra, "../data/processed/L3/cd8_l3_temra.rds")

#### Prolif

In [None]:
options(repr.plot.width = 7, repr.plot.height = 6)
DimPlot(cd8_l3_prolif)

cd8_l3_prolif <- FindNeighbors(cd8_l3_prolif, reduction = "pca", dims = 1:12)

In [None]:
cd8_l3_prolif <- FindClusters(cd8_l3_prolif, resolution = 0.4)

DimPlot(cd8_l3_prolif, label = T)

In [None]:
mrk  <- FindAllMarkers(cd8_l3_prolif, only.pos = TRUE)

In [None]:
mrk  <- rank_score_func(mrk)

In [None]:
mrk  %>% filter(cluster == 3)  %>% arrange(desc(score))

In [None]:
markers  <- mrk  %>% arrange(desc(score))  %>% group_by(cluster)  %>% slice_head(n = 4)  %>% pull(gene)

In [None]:
options(repr.plot.width = 16, repr.plot.height = 24)
FeaturePlot(cd8_l3_prolif, features = markers,
           min.cutoff = 0, ncol = 4)

In [None]:
options(repr.plot.width = 16, repr.plot.height = 8)
DefaultAssay(cd8_l3_prolif)  <- "RNA"
FeaturePlot(cd8_l3_prolif, features = c("CCR9","TCF7","KLRG1","CX3CR1", "EOMES", "TOX", "MKI67"),
           min.cutoff = 0, ncol = 4)

In [None]:
options(repr.plot.width = 4, repr.plot.height = 3)

DimPlot(cd8_l3_prolif, label = T)

In [None]:
cd8_l3_prolif@meta.data  <- cd8_l3_prolif@meta.data  %>% 
mutate(annotations_manual = recode_factor(seurat_clusters, "0" = "Prolif1: GZMA GZMH PRF1",
                                     "1" = "Prolif2: CD160 HLA-DRA XCL2",
                                     "2" = "Prolif3: CCR7 LEF1 GPR183",
                                     "3" = "Prolif4: MKI67 MCM7 CCR9",
                                     "4" = "Prolif5: MKI67 CCNB1 TROAP",
                                     "5" = "Prolif6: KLRG1 MCM7 CX3CR1",
                                     "6" = "Prolif7: MKI67 H3C2 H2AC13"))

In [None]:
options(repr.plot.width = 7, repr.plot.height = 5)

DimPlot(cd8_l3_prolif, label = T, group.by = "annotations_manual", repel = T)

In [None]:
saveRDS(cd8_l3_prolif, "../data/processed/L3/cd8_l3_prolif.rds")

### Plotting all datasets

In [None]:
cd8_l3_list  <- list(cd8_l3_tem, cd8_l3_naive, cd8_l3_tcm, cd8_l3_temra, cd8_l3_prolif)
names(cd8_l3_list)  <- c("cd8_l3_tem", "cd8_l3_naive", "cd8_l3_tcm", "cd8_l3_temra", "cd8_l3_prolif")

In [None]:
for(i in 1:5){
    
seurat_dataset  <- cd8_l3_list[[i]]
seurat_dataset@misc$cols_annotations  <- scales::hue_pal(h.start = 20) (length(levels(factor(seurat_dataset$annotations_manual))))
seurat_dataset@misc$dataset_name  <- names(cd8_l3_list)[i]
options(repr.plot.width = 8, repr.plot.height = 6.5)
save_dimplot_plot(seurat_dataset = seurat_dataset)
    
options(repr.plot.width=16, repr.plot.height=5)
process_plots_from_dataset(seurat_dataset = seurat_dataset)
    
df4  <- create_df4(seurat_dataset)
freq  <- df4  %>% dplyr::select(1:3)
write.csv(freq, paste0("../tables/cd4/freq_", names(cd8_l3_list)[i],".csv"), row.names = FALSE)
saveRDS(seurat_dataset, paste0("../data/processed/L3/",names(cd8_l3_list)[i],".rds"))

}

# Population tree

In [None]:
cd8_l3_tem  <- readRDS("../data/processed/L3/cd8_l3_tem.rds")
cd8_l3_naive  <- readRDS("../data/processed/L3/cd8_l3_naive.rds")
cd8_l3_tcm  <- readRDS("../data/processed/L3/cd8_l3_tcm.rds")
cd8_l3_temra  <- readRDS("../data/processed/L3/cd8_l3_temra.rds")
cd8_l3_prolif  <- readRDS("../data/processed/L3/cd8_l3_prolif.rds")

In [None]:
cd8_l2_subcluster  <- readRDS("../data/processed/L2/cd8_l2_subcluster.rds")
cd8_l2_nk  <- readRDS("../data/processed/L2/cd8_l2_nk.rds")
cd8_l2_unc  <- readRDS("../data/processed/L2/cd8_l2_unc.rds")
cd8_l1_full_filt  <- readRDS("../data/processed/L1/cd8_l1_full_filt.rds")

## Sankey plot populations

In [None]:
cd8_md  <- cd8_l1_full_filt@meta.data  %>% dplyr::select(barcode, annotations_l1 = annotations_manual)

In [None]:
md_l2  <- rbind(cd8_l2_subcluster@meta.data %>% dplyr::select(barcode, annotations_manual), 
                data.frame(barcode = cd8_l2_nk@meta.data$barcode, annotations_manual = "NK cells"), 
                data.frame(barcode = cd8_l2_unc@meta.data$barcode, annotations_manual = "Unconventional"))

In [None]:
cd8_md  <- left_join(cd8_md, (md_l2  %>% dplyr::select(barcode, annotations_l2 = annotations_manual)))

In [None]:
cd8_md  %>% group_by(annotations_l1, annotations_l2)  %>% tally

In [None]:
md_l3  <- rbind(cd8_l3_naive@meta.data %>% dplyr::select(barcode, annotations_manual), 
                cd8_l3_prolif@meta.data %>% dplyr::select(barcode, annotations_manual), 
                cd8_l3_tcm@meta.data %>% dplyr::select(barcode, annotations_manual),
                cd8_l3_tem@meta.data %>% dplyr::select(barcode, annotations_manual),
                cd8_l3_temra@meta.data %>% dplyr::select(barcode, annotations_manual),
                cd8_l2_nk@meta.data %>% dplyr::select(barcode, annotations_manual),
                cd8_l2_unc@meta.data %>% dplyr::select(barcode, annotations_manual)            
                 )

In [None]:
md_l3

In [None]:
cd8_md  <- left_join(cd8_md, (md_l3  %>% dplyr::select(barcode, annotations_l3 = annotations_manual)))

In [None]:
cd8_md$annotations  <- "CD8"

In [None]:
data  <- cd8_md  %>% dplyr::select(-barcode)  %>% 
    group_by(annotations, annotations_l1, annotations_l2, annotations_l3)  %>% 
tally()

In [None]:
data  

Separate gd T cells and MAIT cells in L2

In [None]:
data  <- data  %>% mutate(annotations_l2 = case_when(
    annotations_l1 == "Unconventional T cells" & grepl(annotations_l3, pattern = "MAIT") ~ "MAIT cells",
    annotations_l1 == "Unconventional T cells" & grepl(annotations_l3, pattern = "CD8Tgd") ~ "gd T cells",
    TRUE ~ annotations_l2))

In [None]:
data

In [None]:
dir.create("../tables/sankey/")

In [None]:
write.csv(data, "../tables/sankey/cd8_sankey.csv")

Sankey plot using https://app.rawgraphs.io/

## Adding Level3 annotation metadata

In [None]:
md_new  <- cd8_md  %>% mutate(annotations_l2 = case_when(
    annotations_l1 == "Unconventional T cells" & grepl(annotations_l3, pattern = "MAIT") ~ "MAIT cells",
    annotations_l1 == "Unconventional T cells" & grepl(annotations_l3, pattern = "CD8Tgd") ~ "gd T cells",
    TRUE ~ annotations_l2))  %>% 
mutate(barcode = barcode, 
          annotations_l1 = ifelse(grepl(annotations_l1, pattern = "CD8"), "CD8 T cells",
          paste("CD8", annotations_l1)))  %>% 
       mutate(annotations_l2 = paste(annotations_l1, annotations_l2, sep = "---"),
              annotations_l3 = paste(annotations_l2, annotations_l3, sep = "---"))  %>% 
mutate(annotations_l3 = sub(annotations_l3, pattern = "_NA", replacement = ""))

In [None]:
md_new

In [None]:
cd8_l1_full_filt$annotations  <- NULL

In [None]:
cd8_l1_full_filt@meta.data  <- left_join(cd8_l1_full_filt@meta.data, md_new)

In [None]:
rownames(cd8_l1_full_filt@meta.data)  <- colnames(cd8_l1_full_filt)

In [None]:
options(repr.plot.width = 25, repr.plot.height = 15)

DimPlot(cd8_l1_full_filt, group.by = "annotations_l3", label = T, raster = T)

In [None]:
options(repr.plot.width = 4, repr.plot.height = 4)
DimPlot(cd8_l1_full_filt, group.by = "annotations_l3", label = F, raster = T) + NoLegend()

In [None]:
options(repr.plot.width = 4, repr.plot.height = 4)
DimPlot(cd8_l1_full_filt, group.by = "annotations_l2", label = F, raster = T) + NoLegend()

In [None]:
DimPlot(cd8_l1_full_filt, group.by = "annotations_l1", label = F, raster = T) + NoLegend()

In [None]:
options(repr.plot.width = 7.5, repr.plot.height = 4)
DimPlot(cd8_l1_full_filt, group.by = "annotations_l2", label = F, 
        raster = T, cols = c("peru",cd8_l2_subcluster@misc$cols[c(3,5,4,1,2)],
                              "#be87e7ff","#fe60cbff"))

In [None]:
DimPlot(cd8_l1_full_filt, group.by = "annotations_l1", label = T, raster = T)

In [None]:
saveRDS(cd8_l1_full_filt, "../data/processed/L1/cd8_l1_full_filt.rds")

In [None]:
cd8_l1_full_filt  <- readRDS("../data/processed/L1/cd8_l1_full_filt.rds")

In [None]:
cd8_l2_subcluster  <- readRDS("../data/processed/L2/cd8_l2_subcluster.rds")

In [None]:
cd8_l1_full_filt$annotations_l2  %>% table

## Table for quantification and Bayes

In [None]:
cd8_l1_full_filt  <- readRDS("../data/processed/L1/cd8_l1_full_filt.rds")

In [None]:
cd8_patient_meta  <- cd8_l1_full_filt@meta.data  %>% 
                            dplyr::select(Sample_ID, Condition, Condition2, 
                                          Disease, 
                                          Sex, Age, Age_group, Patient_ID, 
                                          Time, Experiment_ID)   %>% unique

In [None]:
colnames(cd8_l1_full_filt@meta.data )

In [None]:
df3  <- cd8_l1_full_filt@meta.data %>% 
  group_by(Sample_ID, annotations_l3) %>% 
  summarise(n = n()) %>% 
  unique() %>% 
ungroup   %>% 
pivot_wider(names_from = "annotations_l3", values_from = "n", values_fill = 0) 
df4  <- left_join((cd8_l1_full_filt@misc$all_md %>% dplyr::select(Sample_ID) %>% unique), df3)
df4[is.na(df4)] <- 0
df4  <- df4  %>% pivot_longer(!Sample_ID, values_to = "n", names_to = "annotations")

# As we've lost non-grouping variables, let's join them back
md_to_join <- cd8_l1_full_filt@misc$all_md %>% 
  unique()

df4  <- left_join(df4, md_to_join)

In [None]:
df4$Level  <- "L3"

In [None]:
df_l3  <- df4

In [None]:
df3  <- cd8_l1_full_filt@meta.data %>% 
  group_by(Sample_ID, annotations_l2) %>% 
  summarise(n = n()) %>% 
  unique() %>% 
ungroup   %>% 
pivot_wider(names_from = "annotations_l2", values_from = "n", values_fill = 0) 
df4  <- left_join((cd8_l1_full_filt@misc$all_md %>% dplyr::select(Sample_ID) %>% unique), df3)
df4[is.na(df4)] <- 0
df4  <- df4  %>% pivot_longer(!Sample_ID, values_to = "n", names_to = "annotations")

# As we've lost non-grouping variables, let's join them back
md_to_join <- cd8_l1_full_filt@misc$all_md %>% 
  unique()

df4  <- left_join(df4, md_to_join)
df4$Level  <- "L2"

In [None]:
df_l2  <- df4

In [None]:
df4

In [None]:
df3  <- cd8_l1_full_filt@meta.data %>% 
  group_by(Sample_ID, annotations_l1) %>% 
  summarise(n = n()) %>% 
  unique() %>% 
ungroup   %>% 
pivot_wider(names_from = "annotations_l1", values_from = "n", values_fill = 0) 
df4  <- left_join((cd8_l1_full_filt@misc$all_md %>% dplyr::select(Sample_ID) %>% unique), df3)
df4[is.na(df4)] <- 0
df4  <- df4  %>% pivot_longer(!Sample_ID, values_to = "n", names_to = "annotations")

# As we've lost non-grouping variables, let's join them back
md_to_join <- cd8_l1_full_filt@misc$all_md %>% 
  unique()

df4  <- left_join(df4, md_to_join)
df4$Level  <- "L1"

df_l1  <- df4

In [None]:
df_l3

In [None]:
df_all_levels  <- rbind(df_l1, df_l2, df_l3)

In [None]:
write.csv(df_all_levels, "../tables/populations_freq/all_levels_counts_with_preliminary_cd8.csv")

In [None]:
df_all_levels

In [None]:
df_all_levels_without_preliminary  <- df_all_levels  %>% dplyr::filter(Experiment_ID %in% c("Exp16", "Exp18", "Exp19", "Exp20"))

In [None]:
write.csv(df_all_levels_without_preliminary, "../tables/populations_freq/all_levels_counts_cd8.csv")

## Population phylogenetic tree

In [None]:
cd8_l1_full_filt$Patient_Time  %>% table

In [None]:
Idents(cd8_l1_full_filt)  <- cd8_l1_full_filt$annotations_l3

In [None]:
cd8_l1_full_filt <- BuildClusterTree(
  cd8_l1_full_filt,
  dims = 1:12,
  reorder = FALSE,
  reorder.numeric = FALSE
)

In [None]:
tree <- cd8_l1_full_filt@tools$BuildClusterTree
tree$tip.label <- tree$tip.label

In [None]:
tree$tip.label

In [None]:
as.character(tree$tip.label)

In [None]:
tree

In [None]:
p <- ggtree::ggtree(tree, aes(x, y)) +
  scale_y_reverse() +
  ggtree::geom_tree() +
  ggtree::theme_tree() +
  ggtree::geom_tiplab(offset = 1) +
  ggtree::geom_tippoint(shape = 16, size = 5) +
  coord_cartesian(clip = 'off') +
  theme(plot.margin = unit(c(0,18,0,0), 'cm'))

#ggsave('plots/cluster_tree.png', p, height = 4, width = 6)

In [None]:
options(repr.plot.width=10, repr.plot.height=12)
p

In [None]:
cd8_l1_full_filt

# SessionInfo

In [None]:
sessionInfo()