# Part 18: Analysis of scRNAseq data from Zhong et al., 2025

In [None]:
source("diabetes_analysis_v07.R")

This is a reanalysis of data published in [Zhong et al., 2025](https://www.nature.com/articles/s41467-024-53264-8). Raw data was downloaded from the GEO database under accession code [GSE221297](https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE221297).

# Loading and preprocessing data

This part uses the raw data which can be downloaded from the GEO database: [GSE221297](https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE221297). If you wish to recapitulate the analysis, please download the data and save it to the folder: `../data/published_data/Zhong_2025/` 

In [None]:
# List paths for reading individual datasets
file_paths <- list.dirs("../data/published_data/Zhong_2025/Raw/",
                       recursive = F)

# Use the short name for sample identification
file_paths2  <- list.dirs("../data/published_data/Zhong_2025/Raw/",
                       recursive = F, full.names = F)

In [None]:
file_paths2

Preprocess all datasets. 

In [None]:
dir.create("../data/published_data/Zhong_2025/temp_data")

In [None]:
process_dataset  <- function(i){
    
		seu_temp <- Read10X(file_paths[i])
    print("10x file uploaded")
		seu_temp <- CreateSeuratObject(seu_temp, min.cells = 1, min.features = 200)
		seu_temp$source <- file_paths2[i]

		seu_temp[["percent.mt"]] <- PercentageFeatureSet(object = seu_temp, pattern = "^MT-")
		seu_temp[["percent.rp"]] <- PercentageFeatureSet(object = seu_temp, pattern = "^RP[LS]")

		seu_temp=seu_temp[,unname(which(colSums(GetAssayData(seu_temp))!=0))]
    print("Seurat object created")
        seu_temp <- NormalizeData(seu_temp, verbose = FALSE)
        seu_temp <- ScaleData(seu_temp, verbose = FALSE)
        seu_temp <- FindVariableFeatures(seu_temp, nfeatures = 1000, verbose = FALSE)
        print("Seurat object normalized, scaled")

		seu_temp <- RunPCA(seu_temp, dims = 1:12)
		seu_temp <- RunUMAP(seu_temp, reduction = "pca", dims = 1:12)
            print("UMAP done!")

		seu_temp <- FindNeighbors(seu_temp, dims = 1:12)
		seu_temp <- FindClusters(seu_temp, resolution = 0.7)
            print("CLustering done!")

		print(VlnPlot(seu_temp, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rp"), ncol = 2, pt.size = 0))
		dir.create("temp_data")
		saveRDS(seu_temp, paste0("../../data/published_data/Zhong_2025/temp_data/",file_paths2[i],"_01_full.rds"))
		return(seu_temp)
	}

In [None]:
options(future.globals.maxSize = 20000 * 1024^2)

In [None]:
full_dataset2 <- map (1:12,process_dataset)

In [None]:
options(future.globals.maxSize = 90000 * 1024^2)

In [None]:
     full_dataset <- NormalizeData(full_dataset, verbose = FALSE)
        full_dataset <- ScaleData(full_dataset, verbose = FALSE)
        full_dataset <- FindVariableFeatures(full_dataset, nfeatures = 1000, verbose = FALSE)

In [None]:
full_dataset <- RunPCA(full_dataset, dims = 1:12)
full_dataset <- RunUMAP(full_dataset, reduction = "pca", dims = 1:12)
  

In [None]:
seu_temp <- FindNeighbors(seu_temp, dims = 1:12)
seu_temp <- FindClusters(seu_temp, resolution = 0.7)
            print("CLustering done!")

saveRDS(seu_temp, paste0("../data/published_data/Zhong_2025/241019_NatComm_merged_full.rds"))

In [None]:
file_paths <- list.files(full.names = T,
                         "../data/published_data/Zhong_2025/temp_data/",
                       recursive = F)

Load and merge preprocessed datasets. 

In [None]:
datasets  <- map(file_paths, readRDS)

In [None]:
merged_ds  <- merge(datasets[[1]], c(datasets[[2]], datasets[[3]], datasets[[4]], datasets[[5]], datasets[[6]], 
                                     datasets[[7]], datasets[[8]], datasets[[9]], datasets[[10]], datasets[[11]], 
                                     datasets[[12]]))

In [None]:
rm(datasets)
gc()

In [None]:
options(future.globals.maxSize = 90000 * 1024^2)

In [None]:
merged_ds <- NormalizeData(merged_ds, verbose = FALSE)
merged_ds <- ScaleData(merged_ds, verbose = FALSE)
merged_ds <- FindVariableFeatures(merged_ds, nfeatures = 1000, verbose = FALSE)

In [None]:
merged_ds <- RunPCA(merged_ds, dims = 1:12)
merged_ds <- RunUMAP(merged_ds, reduction = "pca", dims = 1:12)

In [None]:
merged_ds <- FindNeighbors(merged_ds, dims = 1:12)
merged_ds <- FindClusters(merged_ds, resolution = 0.7)
            print("CLustering done!")

saveRDS(merged_ds, paste0("../data/published_data/Zhong_2025/241019_NatComm_merged_full.rds"))

# Filtering T cells

Let's identify and subset T cells. 

In [None]:
FeaturePlot(merged_ds, features = c("CD3D","CD8A","CD14","MS4A1"), ncol = 4)

In [None]:
DimPlot(merged_ds, label = T, label.size = 12)

In [None]:
merged_ds  <- subset(merged_ds, seurat_clusters %in% c(0:4,11,13,14,16,20,24))

In [None]:
DimPlot(merged_ds, label = T, label.size = 10)

Recluster T cells. 

In [None]:
     merged_ds <- NormalizeData(merged_ds, verbose = FALSE)
        merged_ds <- ScaleData(merged_ds, verbose = FALSE)
        merged_ds <- FindVariableFeatures(merged_ds, nfeatures = 1000, verbose = FALSE)

merged_ds <- RunPCA(merged_ds, dims = 1:12)
merged_ds <- RunUMAP(merged_ds, reduction = "pca", dims = 1:12)
  

merged_ds <- FindNeighbors(merged_ds, dims = 1:12)
merged_ds <- FindClusters(merged_ds, resolution = 0.7)
            print("CLustering done!")

saveRDS(merged_ds, paste0("../data/published_data/Zhong_2025/241019_NatComm_merged_filt_tcells.rds"))

In [None]:
DimPlot(merged_ds, label = T, label.size = 10)

In [None]:
FeaturePlot(merged_ds, features = c("CD3D","CD8A","CD4","NCR1"), ncol = 4)

In [None]:
FeaturePlot(merged_ds, features = c("MKI67","NCR1","IL7R","CD4"), ncol = 4)

In [None]:
options(future.globals.maxSize = 50e+09)

In [None]:
plan("sequential")

In [None]:
merged_ds <- FindClusters(merged_ds, resolution = 0.2)

In [None]:
options(repr.plot.width = 6, repr.plot.height = 5)
DimPlot(merged_ds, label = T, label.size = 10)

In [None]:
options(repr.plot.width = 6, repr.plot.height = 5)
DimPlot(merged_ds, label = T, label.size = 10)

In [None]:
merged_ds  <- JoinLayers(merged_ds)

In [None]:
mrk  <- FindAllMarkers(merged_ds)

In [None]:
mrk  %>% dplyr::filter(cluster == 7)

In [None]:
merged_ds$barcode  <- colnames(merged_ds)

Create a downsampled object. 

In [None]:
merged_ds_10k  <- subset(merged_ds, barcode %in% sample(colnames(merged_ds), size = 10000))

## Automated annotation of cell types

In [None]:
mid.se <- celldex::MonacoImmuneData()
hpca.se  <- celldex::HumanPrimaryCellAtlasData()

load("../data/ref_wherry_new.RData")

In [None]:
 DefaultAssay(merged_ds_10k)  <- "RNA"
	
    ### Annotate the dataset with Monaco Immune dataset
counts  <- merged_ds_10k@assays$RNA@layers$counts
rownames(counts)  <- rownames(merged_ds_10k@assays$RNA)

In [None]:
		pred.singler <- SingleR(test = counts, ref = mid.se, assay.type.test=1,
		labels = mid.se$label.fine, fine.tune = F)
    

    	### Annotate the dataset with Wherry dataset
        pred.singler3 <- SingleR(test =  counts, ref = hpca.se,
		assay.type.test=1,
		labels = hpca.se$label.fine, fine.tune = F)

		all_labels <- data.frame(
		Monaco_single = pred.singler$labels,
		HPCA_single = pred.singler3$labels,
		
		barcode = colnames(merged_ds_10k))
		
		md2 <- merged_ds_10k@meta.data
		md2$barcode = colnames(merged_ds_10k)

		md3 <- left_join(md2, all_labels)
rownames(md3) <- colnames(merged_ds_10k)
		merged_ds_10k@meta.data <- md3
		

In [None]:
options(repr.plot.width = 20, repr.plot.height = 7.5)
DimPlot(merged_ds_10k, raster = F, group.by = "Monaco_single", label = T, repel = T)

In [None]:
options(repr.plot.width = 20, repr.plot.height = 10)
DimPlot(merged_ds_10k, raster = F, group.by = "HPCA_single", label = T)

In [None]:
DimPlot(merged_ds, label = T, label.size = 10)

Remove contaminating cell types. 

In [None]:
merged_ds  <- subset(merged_ds, seurat_clusters %in% c(0:6,10))

DimPlot(merged_ds, label = T, label.size = 10)

     merged_ds <- NormalizeData(merged_ds, verbose = FALSE)
        merged_ds <- ScaleData(merged_ds, verbose = FALSE)
        merged_ds <- FindVariableFeatures(merged_ds, nfeatures = 1000, verbose = FALSE)

merged_ds <- RunPCA(merged_ds, dims = 1:12)
merged_ds <- RunUMAP(merged_ds, reduction = "pca", dims = 1:12)
  

merged_ds <- FindNeighbors(merged_ds, dims = 1:12)
merged_ds <- FindClusters(merged_ds, resolution = 0.7)
            print("CLustering done!")

saveRDS(merged_ds, paste0("../data/published_data/Zhong_2025/241019_NatComm_merged_filt_tcells2.rds"))

In [None]:
DimPlot(merged_ds, label = T, label.size = 10)


In [None]:
DimPlot(merged_ds, label = T, label.size = 10)


In [None]:
Idents(merged_ds)  <- merged_ds$seurat_clusters

In [None]:
options(repr.plot.width = 16, repr.plot.height = 4)
VlnPlot(merged_ds, features = c("percent.mt", "percent.rp", "nCount_RNA", "nFeature_RNA"), ncol = 4, raster = F, pt.size = 0)

Filter out low quality cells. 

In [None]:
merged_ds  <- subset(merged_ds, percent.mt < 10 & nFeature_RNA > 750)

     merged_ds <- NormalizeData(merged_ds, verbose = FALSE)
        merged_ds <- ScaleData(merged_ds, verbose = FALSE)
        merged_ds <- FindVariableFeatures(merged_ds, nfeatures = 1000, verbose = FALSE)

merged_ds <- RunPCA(merged_ds, dims = 1:12)
merged_ds <- RunUMAP(merged_ds, reduction = "pca", dims = 1:12)
  

merged_ds <- FindNeighbors(merged_ds, dims = 1:12)
merged_ds <- FindClusters(merged_ds, resolution = 0.7)
            print("CLustering done!")

saveRDS(merged_ds, paste0("../data/published_data/Zhong_2025/241019_NatComm_merged_filt_tcells3.rds"))

In [None]:
options(repr.plot.width = 16, repr.plot.height = 12)
DimPlot(merged_ds, label = T, label.size = 10)

In [None]:
options(repr.plot.width = 16, repr.plot.height = 4)
FeaturePlot(merged_ds, features = c("CD3D","CD8A","CD4","NCR1"), ncol = 4)

FeaturePlot(merged_ds, features = c("MKI67","NCR1","IL7R","CD4"), ncol = 4)

FeaturePlot(merged_ds, features = c("FOXP3","GATA3","NCAM1","RORC"), ncol = 4)


# First analysis in disease groups

In [None]:
merged_ds$source  %>% table

In [None]:
merged_ds$Disease  <- gsub(merged_ds$source, pattern = "PR", replacement = "RR")

In [None]:
merged_ds$Disease  <- substr(merged_ds$Disease,12,12)

In [None]:
merged_ds$Disease  %>% table

In [None]:
Idents(merged_ds)  <- merged_ds$Disease

In [None]:
avg_exp  <- AverageExpression(merged_ds, return.seurat = F, group.by = "source")

In [None]:
df  <- avg_exp$RNA  %>% 
as.data.frame  %>% 
rownames_to_column("gene")  %>%  
pivot_longer(!gene, names_to = "source", values_to = "value") 

df$disease  <- gsub(df$source, pattern = "PR", replacement = "RR")
df$disease  <- substr(df$disease,12,12)

In [None]:
options(repr.plot.width = 4, repr.plot.height = 3)
df  %>%
filter(disease %in% c("N","P"))  %>% 
mutate(Disease = ifelse(disease == "N", "Hea", "Dia"))  %>% 
dplyr::filter(gene %in% c("BACH2","NELL2"
))  %>% 
ggplot(aes(x = Disease, y = value)) +
facet_wrap(~gene, ncol = 7, scales = "free") + 
geom_boxplot(outlier.shape = NA, aes(fill = Disease), alpha = 0.5) +
     geom_crossbar(stat='summary', fun='median', alpha = 0.6, width = 0.7) +
     geom_dotplot(binaxis='y', stackdir='center', dotsize=0) + 
     geom_jitter(binaxis='y', position=position_jitter(width = 0.1, height = 0.05), 
                 size = 2.5, stackdir='center', aes(color = Disease)) + 
     theme_classic() 

In [None]:
options(repr.plot.width = 12, repr.plot.height = 8)
df  %>%
mutate(disease = ifelse(disease == "N", "H", ifelse(disease == "P","D","PR")))  %>% 

dplyr::filter(gene %in% c("IL7R",
"LEF1",
"TCF7",
"CCR7",
"SELL",
"BACH2",
"NELL2",
"PRF1",
"NKG7",
"GZMB",
"CST7",
"GNLY",
"CX3CR1",
"CCL5",
"TNF",
"KLRG1",
"TBX21"
))  %>% 
ggplot(aes(x = disease, y = value)) +
facet_wrap(~gene, ncol = 7) + 
geom_boxplot(outlier.shape = NA, aes(fill = disease), alpha = 0.4) +
     geom_crossbar(stat='summary', fun='median', alpha = 0.6, width = 0.7) +
     geom_dotplot(binaxis='y', stackdir='center', dotsize=0) + 
     geom_jitter(binaxis='y', position=position_jitter(width = 0.1, height = 0.05), 
                 size = 2.5, stackdir='center', aes(color = disease)) + 
scale_fill_manual(values = c("indianred2","dodgerblue","darkgreen")) +
scale_color_manual(values = c("indianred2","dodgerblue","darkgreen")) +
     theme_classic() 

# Separating CD4 and CD8 T cells

In [None]:
Idents(merged_ds)  <- merged_ds$seurat_clusters

In [None]:
options(repr.plot.width = 8, repr.plot.height = 7)
DimPlot(merged_ds, label = T, label.size = 10)


In [None]:
options(repr.plot.width = 16, repr.plot.height = 4)
FeaturePlot(merged_ds, features = c("CD3D","CD8A","CD4","NCR1"), ncol = 4)


In [None]:
merged_ds <- FindNeighbors(merged_ds, dims = 1:12)
merged_ds <- FindClusters(merged_ds, resolution = 0.7)


In [None]:
options(repr.plot.width = 8, repr.plot.height = 7)
DimPlot(merged_ds, label = T, label.size = 10)


In [None]:
options(repr.plot.width = 24, repr.plot.height = 16)

FeaturePlot(merged_ds, features = c("CD8A"), split.by = "seurat_clusters", max.cutoff = 2) + plot_layout(ncol = 7, nrow = 4)


In [None]:
options(repr.plot.width = 24, repr.plot.height = 12)

FeaturePlot(merged_ds, features = c("CD4"), split.by = "seurat_clusters", max.cutoff = 2) + plot_layout(ncol = 7, nrow = 4)


In [None]:
merged_ds  <- JoinLayers(merged_ds)

In [None]:
which(rownames(merged_ds@assays$RNA) == "CD4")

In [None]:
which(rownames(merged_ds@assays$RNA) == "CD8A")

In [None]:
merged_ds$cd4_or_cd8  <- ifelse(merged_ds@assays$RNA$counts[16762,]>0 & merged_ds@assays$RNA$counts[3588,]>0, "Both",
                               ifelse(merged_ds@assays$RNA$counts[16762,]>0 & merged_ds@assays$RNA$counts[3588,]==0, "CD4",
                               ifelse(merged_ds@assays$RNA$counts[16762,]==0 & merged_ds@assays$RNA$counts[3588,]>0, "CD8",
                               "Unknown")))

In [None]:
merged_ds$cd4_or_cd8  %>% table

In [None]:
DimPlot(merged_ds, group.by = "cd4_or_cd8")

In [None]:
md_merged_ds  <- merged_ds@meta.data

In [None]:
options(repr.plot.width = 24, repr.plot.height = 12)

FeaturePlot(merged_ds, features = c("CD8A"), split.by = "seurat_clusters", max.cutoff = 2) + plot_layout(ncol = 7, nrow = 3)


In [None]:
options(repr.plot.width = 24, repr.plot.height = 12)

FeaturePlot(merged_ds, features = c("CD4"), split.by = "seurat_clusters", max.cutoff = 2) + plot_layout(ncol = 7, nrow = 3)


In [None]:
options(repr.plot.width = 8, repr.plot.height = 7)
DimPlot(merged_ds, label = T, label.size = 10)


In [None]:
md_merged_ds <- md_merged_ds  %>% mutate(cd4_or_cd8_2 = 
                                ifelse(cd4_or_cd8 == "Unknown" | cd4_or_cd8 == "Both",
                                  case_when(seurat_clusters %in% c(0,1,4,9,11) ~ "CD8",
                                            seurat_clusters %in% c(2,3,5,7,8,10,13) ~ "CD4",
                                            seurat_clusters %in% c(6,12,14,15,16,17,18) ~ "NK_DN"),
                                   cd4_or_cd8))

In [None]:
md_merged_ds$cd4_or_cd8_2  %>% table

In [None]:
merged_ds$cd4_or_cd8_2  <- md_merged_ds$cd4_or_cd8_2

In [None]:
DimPlot(merged_ds, group.by = "cd4_or_cd8_2")

In [None]:
saveRDS(merged_ds, paste0("../data/published_data/Zhong_2025/241019_NatComm_merged_filt_tcells3.rds"))

In [None]:
merged_ds <- readRDS("../data/published_data/Zhong_2025/241019_NatComm_merged_filt_tcells2.rds")

In [None]:
sub  <- subset(merged_ds, cd4_or_cd8_2 == "NK_DN")
     sub <- NormalizeData(sub, verbose = FALSE)
        sub <- ScaleData(sub, verbose = FALSE)
        sub <- FindVariableFeatures(sub, nfeatures = 1000, verbose = FALSE)

sub <- RunPCA(sub, dims = 1:12)
sub <- RunUMAP(sub, reduction = "pca", dims = 1:12)
  

sub <- FindNeighbors(sub, dims = 1:12)
sub <- FindClusters(sub, resolution = 0.7)
            print("CLustering done!")
nk_dn  <- sub

In [None]:
sub  <- subset(merged_ds, cd4_or_cd8_2 == "CD4")
sub <- NormalizeData(sub, verbose = FALSE)
sub <- ScaleData(sub, verbose = FALSE)
sub <- FindVariableFeatures(sub, nfeatures = 1000, verbose = FALSE)

sub <- RunPCA(sub, dims = 1:12)
sub <- RunUMAP(sub, reduction = "pca", dims = 1:12)
  

sub <- FindNeighbors(sub, dims = 1:12)
sub <- FindClusters(sub, resolution = 0.7)
            print("CLustering done!")
cd4  <- sub

In [None]:
sub  <- subset(merged_ds, cd4_or_cd8_2 == "CD8")
     sub <- NormalizeData(sub, verbose = FALSE)
        sub <- ScaleData(sub, verbose = FALSE)
        sub <- FindVariableFeatures(sub, nfeatures = 1000, verbose = FALSE)

sub <- RunPCA(sub, dims = 1:12)
sub <- RunUMAP(sub, reduction = "pca", dims = 1:12)
  

sub <- FindNeighbors(sub, dims = 1:12)
sub <- FindClusters(sub, resolution = 0.7)
            print("CLustering done!")
cd8  <- sub

In [None]:
DimPlot(cd4, group.by = "source")

In [None]:
DimPlot(cd8, group.by = "source")

In [None]:
DimPlot(nk_dn, group.by = "source")

In [None]:
DimPlot(cd4, group.by = "Disease")

In [None]:
DimPlot(cd8, group.by = "Disease")

In [None]:
DimPlot(nk_dn, group.by = "Disease")

# Removing PR group

As there is severe batch effect in the PR group, we will now analyze only Healthy and newly diagnosed samples.

## CD4 without PR

In [None]:
sub  <- subset(cd4, Disease != "R")


In [None]:
   sub <- NormalizeData(sub, verbose = FALSE)
        sub <- ScaleData(sub, verbose = FALSE)
        sub <- FindVariableFeatures(sub, nfeatures = 1000, verbose = FALSE)

sub <- RunPCA(sub, dims = 1:12)
sub <- RunUMAP(sub, reduction = "pca", dims = 1:12)
  

sub <- FindNeighbors(sub, dims = 1:12)
sub <- FindClusters(sub, resolution = 0.7)
            print("CLustering done!")
cd4_without_pr  <- sub

In [None]:
DimPlot(cd4_without_pr, label = T, label.size = 10)

In [None]:
options(repr.plot.width = 16, repr.plot.height = 16)
FeaturePlot(cd4_without_pr, features = c("CD3D","CD8A","RORC","LGALS3",
                                         "GATA3","MKI67","ISG15","NCAM1",
                                         "TRGC1","TRDC","FOXP3","CTLA4",
                                         "IL4","IL5","NFKBIA","CD4"), ncol = 4)


In [None]:
DimPlot(cd4_without_pr, group.by = "Disease")

## Treg

In [None]:
treg  <- subset(cd4_without_pr, seurat_clusters %in% c(7,9))

In [None]:
   treg <- NormalizeData(treg, verbose = FALSE)
        treg <- ScaleData(treg, verbose = FALSE)
        treg <- FindVariableFeatures(treg, nfeatures = 1000, verbose = FALSE)

treg <- RunPCA(treg, dims = 1:10)
treg <- RunUMAP(treg, reduction = "pca", dims = 1:10)
  

treg <- FindNeighbors(treg, dims = 1:10)

In [None]:
treg <- FindClusters(treg, resolution = 0.7)
            print("CLustering done!")
DimPlot(treg)

In [None]:
options(repr.plot.width = 6, repr.plot.height = 5)

DimPlot(treg, group.by = "Disease")

In [None]:
DimPlot(treg)

In [None]:
FeaturePlot(treg, features = "GZMK")

In [None]:
options(repr.plot.width = 16, repr.plot.height = 16)
FeaturePlot(treg, features = c("FOXP3","TIGIT","CD226","GZMK",
                                         "CCR4","CTLA4","IL7R","SELL",
                                         "ISG15","LAG3","IL10","TGFB1",
                                         "IL4","IL5","NFKBIA","CD4"), ncol = 4)


In [None]:
options(repr.plot.width = 12, repr.plot.height = 10)

seurat_meta_data <- treg@meta.data
seurat_meta_data$sample  <- seurat_meta_data$source

# Create grouped dataframe, calculate the frequencies of clusters
df4 <- seurat_meta_data %>% group_by(sample, seurat_clusters) %>% 
  summarise(n = n()) %>% 
  unique() %>% 
  mutate(freq = n / sum(n)) 


# As we've lost non-grouping variables, let's join them back
md_to_join <- seurat_meta_data %>% dplyr::select(sample, Disease) %>% unique()
df4  <- left_join(df4, md_to_join, by = "sample")

# The final plot

df4 %>% ggplot(aes(x = Disease, y = freq)) + # you can change the x to whatever variable you're interested in
  geom_boxplot(outlier.shape = NA) + 
  geom_dotplot(binaxis='y', stackdir='center', dotsize=0) + 
  geom_jitter(position=position_jitter(0.2), size = 2, aes(color = Disease)) + # in aes, you can also use shape or fill (for the shapes that allow it)
  facet_wrap(~seurat_clusters, scales = "free") +
  ylab("Frequency") +
  xlab("Condition") +
ggpubr::stat_compare_means() +
  ylim(c(0,NA)) + # This ensures that our x axis starts at zero, but feel free to remove this line
  theme_classic() +
  theme(strip.background = element_blank(), panel.grid = element_blank())


## CD8 without PR

In [None]:
sub  <- subset(cd8, Disease != "R")


In [None]:
   sub <- NormalizeData(sub, verbose = FALSE)
        sub <- ScaleData(sub, verbose = FALSE)
        sub <- FindVariableFeatures(sub, nfeatures = 1000, verbose = FALSE)

sub <- RunPCA(sub, dims = 1:12)
sub <- RunUMAP(sub, reduction = "pca", dims = 1:12)
  

sub <- FindNeighbors(sub, dims = 1:12)
sub <- FindClusters(sub, resolution = 0.7)
            print("CLustering done!")
cd8_without_pr  <- sub

In [None]:
DimPlot(cd8_without_pr)

In [None]:
options(repr.plot.width = 16, repr.plot.height = 16)
FeaturePlot(cd8_without_pr, features = c("SELL","EOMES","TBX21","LGALS3",
                                         "IFNG","MKI67","ISG15","NCAM1",
                                         "TRGC1","TRDC","IKZF2","CTLA4",
                                         "IL4","IL5","NFKBIA","CD4"), ncol = 4)

## NK without PR

In [None]:
sub  <- subset(nk_dn, Disease != "R")

In [None]:
sub <- NormalizeData(sub, verbose = FALSE)
sub <- ScaleData(sub, verbose = FALSE)
sub <- FindVariableFeatures(sub, nfeatures = 1000, verbose = FALSE)

sub <- RunPCA(sub, dims = 1:12)
sub <- RunUMAP(sub, reduction = "pca", dims = 1:12)
  

sub <- FindNeighbors(sub, dims = 1:12)
sub <- FindClusters(sub, resolution = 0.7)
            print("CLustering done!")
nk_dn_without_pr  <- sub

In [None]:
DimPlot(nk_dn_without_pr)

In [None]:
saveRDS(cd4_without_pr, "../data/published_data/Zhong_2025/241019_NatComm_merged_filt_cd4_noPR.rds")
saveRDS(cd8_without_pr, "../data/published_data/Zhong_2025/241019_NatComm_merged_filt_cd8_noPR.rds")
saveRDS(nk_dn_without_pr, "../data/published_data/Zhong_2025/241019_NatComm_merged_filt_nk_noPR.rds")