In [None]:
suppressMessages({
    library(Seurat)
    library(dplyr)
    library(ggplot2)
    library(patchwork)
    library(rlang)
    library(UpSetR)
    library(Seurat)
    library(UCell)
    library(ggExtra)
    library(fgsea)
    library(DOSE)
    library(org.Hs.eg.db)
    library(tidyverse)
    library(SeuratObject)
    library(DropletUtils)
    library(Ckmeans.1d.dp)
#   library("leiden")
    library(igraph)
    library(tidyverse)
    library(patchwork)
    library(viridis)
    library(Seurat)
    library(scCustomize)
    library(qs)
    library(ggpubr)
    library(clustree)
    library("ggraph")
    library("Nebulosa")
    library(ggalluvial)
    library("MAST")
    library("pheatmap")
    library("ape")
    library(EnhancedVolcano)
    library(wesanderson)
    library(ArchR)
    library(ComplexHeatmap)
})

In [None]:
if (!dir.exists("Images")) {dir.create("Images")}
if (!dir.exists("Tables"))  {dir.create("Tables")}
if (!dir.exists("RObjects")) {dir.create("RObjects")}

IMG_OUT = "Images"
DF_OUT  = "Tables"
RDS_OUT = "RObjects"

In [None]:
plan("multicore", workers = 5)
options(future.globals.maxSize = 1000000 * 1024^4)

In [None]:
#read in QC-ed and demultiplexed object (refer to methods in manuscript)

In [None]:
seu.obj <- readRDS("../obj.qc.rds")

In [None]:
#check dimensions of dataset
seu.obj@meta.data$Origin %>% table()
seu.obj@meta.data$Origin %>% is.na() %>% which() %>% length()

seu.obj@meta.data$Donor %>% table()
seu.obj@meta.data$Donor %>% is.na() %>% which() %>% length()

seu.obj@meta.data$DoubletStatus %>% table()
seu.obj@meta.data$DoubletStatus %>% table() %>% sum()

In [None]:
#sctransform object
options(warn=-1) # suppress the vomit
suppressMessages({
    seu.obj <- SCTransform(seu.obj, verbose = FALSE, vst.flavor = "v2", vars.to.regress = c("percent.mt", "percent.rb"))
})
options(warn=0)

In [None]:
#if integrating objects (Figure 6)

In [None]:
seu.obj1.list <- list(obj1 = obj1, obj2 = obj2, obj3 = obj3)

suppressMessages({
seu.obj1.list
for (i in 1:length(seu.obj1.list)) {
    seu.obj1.list[[i]] <- SCTransform(seu.obj1.list[[i]], verbose = FALSE, vst.flavor = "v2", 
                                     vars.to.regress = c("percent.mt", "percent.rb", "S.Score", "G2M.Score"),
                                    method = "glmGamPoi")
}
})

In [None]:
options(future.globals.maxSize= 4718592000)
seu.obj1.features <- SelectIntegrationFeatures(object.list = seu.obj1.list, nfeatures = 3000)
seu.obj1.list <- PrepSCTIntegration(object.list = seu.obj1.list, anchor.features = seu.obj1.features, 
    verbose = FALSE)

In [None]:
seu.obj1.anchors <- FindIntegrationAnchors(object.list = seu.obj1.list, normalization.method = "SCT", 
    anchor.features = seu.obj1.features, verbose = FALSE)
seu.obj1.integrated <- IntegrateData(anchorset = seu.obj1.anchors, normalization.method = "SCT", 
    verbose = FALSE)

In [None]:
#if merging objects (Figure 5)

In [None]:
DefaultAssay(seu.obj) <- "RNA"
DefaultAssay(seu.obj2) <- "RNA"

In [None]:
#remove uncommon genes
seu.obj_new <- seu.obj[(intersect(rownames(seu.obj[["SCT"]]), rownames(seu.obj2[["SCT"]]))), ]
seu.obj2_new <- seu.obj2[(intersect(rownames(seu.obj[["SCT"]]), rownames(seu.obj2[["SCT"]]))), ]

In [None]:
tt.features <- SelectIntegrationFeatures(object.list = list(seu.obj_new, seu.obj2_new), nfeatures = 3000)

In [None]:
merged <- merge(seu.obj_new, y = seu.obj2_new,
                 merge.data = TRUE)

VariableFeatures(merged[["SCT"]]) <- tt.features

In [None]:
#after this step treat objects similarly, whether integrated, merged or individual

In [None]:
# Removing TCR genes after SCTransform
seu.obj <- seu.obj[!grepl("^TR[ABDG][VJC]", rownames(seu.obj)), ]

In [None]:
seu.obj <- RunPCA(seu.obj, verbose = FALSE)
ElbowPlot(seu.obj, ndims = 50)
seu.obj <- FindNeighbors(seu.obj, dims=1:20, reduction="pca")
seu.obj <- FindClusters(seu.obj, algorithm=3, resolution=c(0.1, 0.2, 0.3, 0.4, 0.5), verbose = FALSE)

set.seed(12345)
    seu.obj <- RunUMAP(seu.obj, dims=1:20, reduction="pca", verbose = FALSE)

In [None]:
#refine dimensions used based on genes in PC
pdf(sprintf("%s/seu.obj.pcdims.pdf", IMG_OUT), width=9, height=10)
DimHeatmap(seu.obj, dims = 1:30, cells = 1000, balanced = TRUE)
dev.off()

#refine clustering resolution based on clustree
pdf(sprintf("%s/seu.obj.clustree.pdf", IMG_OUT), width=6, height=10)
clustree(seu.obj, prefix = "SCT_snn_res.")
dev.off()

In [None]:
#generate palette for clusters for UMAP
pal1 <- wes_palette("GrandBudapest1")
pal2 <- wes_palette("GrandBudapest2")
pal3 <- wes_palette("Cavalcanti1")
pal4 <- wes_palette("Darjeeling2")

pal16 <- c(pal1, pal2, pal3, pal4)

In [None]:
#generate umaps either together or plit based on origin (Figures 2B, 4A, 5A, 6H, S5A and S6C)
DimPlot(virus.integrated, label = T, cols = pal16) -> p1
DimPlot(virus.integrated, label = T, cols = pal16, split.by = "Virus") -> p2

save_result_generic(
   args= list("FUN"=p1,
    "width"=6, "height"=5, 
    "title"="umap.virus.int", type="plot")
)
save_result_generic(
   args= list("FUN"=p2,
    "width"=16, "height"=5, 
    "title"="umap.virus.int.split.by.virus", type="plot")
)

In [None]:
#if tcr genes were removed, and needs adding back

In [None]:
withTCR <- subset(withTCR, cells = WhichCells(object = seu.obj1.integrated))

In [None]:
seu.obj1.integrated[["new.counts"]] <- CreateAssayObject(counts = GetAssayData(object = withTCR, slot = "count", assay = "RNA"))

In [None]:
seu.obj1.integrated <- SCTransform(seu.obj1.integrated, assay = "new.counts", new.assay.name = "SCTnewcounts", do.correct.umi = TRUE, 
                               verbose = FALSE, vst.flavor = "v2", 
                               vars.to.regress = c("percent.mt", "percent.rb", "S.Score", "G2M.Score"))

In [None]:
#back to common analysis for all

In [None]:
seu.obj1.integrated[["HTO"]] <- withTCR[["HTO"]]
seu.obj1.integrated[["ADT"]] <- withTCR[["ADT"]]

In [None]:
#adt expression plot for all cells from an integrated object (Figure 2F)
virus.for.adt <- NormalizeData(virus.for.adt, assay="ADT", normalization.method="CLR", margin = 2)

DefaultAssay(virus.for.adt) <- "ADT"

adt1 <- c("CD4", "CD8", "CD45RA")

adt.umap <- list()

for (kp in adt1)
{
    FeaturePlot_scCustom(virus.for.adt, features = kp, pt.size = 0.5, reduction = "umap", na_color = "lightgray") -> adt.umap[[kp]]
}

print_plots_in_list(adt.umap, title = "umap.adt.virus")

In [None]:
#adt expression plot for one subset of cells from integrated object (Figure 6J)
seu.obj1@meta.data %>% filter(Origin == "CD4") %>% rownames() -> for.adt1
seu.obj1.for.adt1 <- subset(seu.obj1, cells = for.adt1)

seu.obj1.for.adt1 <- NormalizeData(seu.obj1.for.adt1, assay="ADT", normalization.method="CLR", margin = 2)

DefaultAssay(seu.obj1.for.adt1) <- "ADT"

adt.umap1 <- list()
adt1 <- c("CD45RA", "CCR7")
for (kp in adt1)
{
    FeaturePlot_scCustom(seu.obj1.for.adt1, features = kp, pt.size = 0.5, reduction = "umap", na_color = "lightgray") -> adt.umap1[[kp]]
}
adt.umap1

print_plots_in_list(adt.umap1, title = "ADT_UMAP_forCD4")

In [None]:
#proportion display across clusters or origin of interest (Figures 5B, 6I, S2B and S6D)
display_proportion(
    seuobj = seu.obj1, 
    group_by = "integrated_snn_res.0.3",
    check_proportion_of = "Origin", # was ratio
    palette = c("#46B1E1", "#1E4B66", "#EE8B3A")
) + theme_bw() + theme(panel.grid.major = element_blank(),
      panel.grid.minor = element_blank()) -> P23

display_proportion(
    seuobj = seu.obj1, 
    group_by = "Origin",
    check_proportion_of = "integrated_snn_res.0.3", # was ratio
    palette = c('#73f08e','#921b63','#fa3838','#481e2e','#48a4f5','#12090a','#6a401b','#dc268c','#824fae')
) + theme_bw() + theme(panel.grid.major = element_blank(),
      panel.grid.minor = element_blank()) -> P24

P24

save_result_generic(
   args= list("FUN"=P23,
    "width"=8, "height"=5, "title"="ORIGINPROP", type="plot")
)

save_result_generic(
   args= list("FUN"=P24,
    "width"=5, "height"=5, "title"="ORIGINPROP_reverse", type="plot")
)

In [None]:
#umaps for transcript expression with NA cutoff (0 values for transcripts) (Figures 2C, 2F, 4C, 5D and 6L)
DefaultAssay(seu.obj) <- "SCT" #or SCTnewcounts
genes <- c("CD40LG", "TNFRSF9", "CD4", "CD8A", "CD8B", 
           "GZMB", "PRF1", "GNLY", "ADGRG1", "ZNF683",
           "CCR7", "CD27", "CD28", "SELL", "LEF1")
genes.umap1 <- list()

for (kp in genes)
{
    FeaturePlot_scCustom(seu.obj, features = kp, pt.size = 0.5, reduction = "umap", na_color = "lightgray") -> adt.umap1[[kp]]
}
adt.umap1

print_plots_in_list(genes.umap1, title = "genes.umap1")

In [None]:
#perform DEG analysis (one vs all) or (group1 vs group2)

In [None]:
#pairwise comparison
Idents(virus.integrated) <- "integrated_snn_res.0.5"
PrepSCTFindMarkers(virus.integrated, assay = "SCT", verbose = TRUE) -> virus.integrated
deg.2.4 <- FindMarkers(
    virus.integrated,
    ident.1 = 2,
    ident.2 = 4,
    logfc.threshold = 0.25,
    verbose = FALSE)

deg.2.4$genes <- rownames(deg.2.4)

# Add a column to indicate significantly differentially expressed genes
deg.2.4 <- deg.2.4 %>% 
  mutate(significant = ifelse(abs(avg_log2FC) > 0.25 & p_val_adj < 0.05, "Significant", "Not significant"))

# Add a column to indicate cluster
deg.2.4 <- deg.2.4 %>% 
  mutate(cluster = ifelse(avg_log2FC > 0, "2", "4"))

# Order data based on p-value and select top n significant genes
top_genes <- deg.2.4 %>% group_by(cluster) %>%
  filter(p_val_adj < 0.005, abs(avg_log2FC) > 0.5) %>% 
  arrange(desc(abs(avg_log2FC))) %>% 
  slice_head(n = 50) %>% 
  pull(genes)

In [None]:
#generate volcano plots (Figure 2G)
keyvals <- ifelse(
    deg.2.4$avg_log2FC > 0.25, '#5B1A18',
      ifelse(deg.2.4$avg_log2FC < -0.25, '#E6A0C4',
        'black'))
  keyvals[is.na(keyvals)] <- 'black'
  names(keyvals)[keyvals == '#5B1A18'] <- 'Cluster 2 Up'
  names(keyvals)[keyvals == 'black'] <- 'NS'
  names(keyvals)[keyvals == '#E6A0C4'] <- 'Cluster 4 Up'

EnhancedVolcano(deg.2.4,
    lab = deg.2.4$genes,
    x = 'avg_log2FC',
    y = 'p_val_adj',
    title = 'Cluster 2 versus Cluster 4',
    subtitle = NULL,
    selectLab = top_genes,
    #xlim = c(-0.3, 0.75),
    xlab = "Fold Change",
    axisLabSize = 10,
    colCustom = keyvals,
    pCutoff = 0.05,
    FCcutoff = 0.25,
    pointSize = 2.0,
    labSize = 2.5,
    cutoffLineType = 'twodash',
    cutoffLineWidth = 0.8,
    legendLabels=c('Not sig.','FoldChange','p-value',
      'p-value & FoldChange'),  
    legendPosition = 'right',
    legendLabSize = 10,
    legendIconSize = 3.0,
    drawConnectors = T,
    widthConnectors = 0.2,
    max.overlaps = 20,
    gridlines.major = F,
    gridlines.minor = F,
    parseLabels = TRUE
    ) -> p
p

save_result_generic(
   args= list("FUN"=p,
    "width"=15, "height"=8, 
    "title"="Volcano.2.4", type="plot")
)

In [None]:
#one vs all comparison
suppressMessages({
PrepSCTFindMarkers(seu.obj, assay = "SCT", verbose = TRUE) -> seu.obj
Idents(seu.obj.filt) <- "SCT_snn_res.0.4"
seu.obj.mast <- FindAllMarkers(
  object = seu.obj.filt,
  verbose = FALSE,
  logfc.threshold = 0.25
)
})

# write CSVs
up_seu.obj.new_deg  <- seu.obj.mast %>% filter(avg_log2FC > 0.25) %>%  filter(p_val_adj < 0.05)

dn_seu.obj.new_deg  <- seu.obj.mast %>% filter(avg_log2FC < -0.25) %>%  filter(p_val_adj < 0.05)

write.csv(up_seu.obj.new_deg,  "Tables/up.seu.obj.wocl13.14.csv")

write.csv(dn_seu.obj.new_deg,  "Tables/dn.seu.obj.wocl13.14.csv")

In [None]:
diff.day.cyto %>% 
group_by(cluster) %>%
top_n(n = 50, wt = avg_log2FC) %>%
filter(pct.1 > 0.1) %>% 
filter(p_val_adj < 0.05) -> top25

In [None]:
#generate heatmap for DEGs using custom function (Figure 7E)
HMAP_sct(diff.integrated,  filtered_de_table = top25, cell.groups = "type") -> QCD12
QCD12

save_result_generic(
   args= list("FUN"=QCD12,
    "width"=10, "height"=12, "title"="HMAP_HC_both", type="plot")
)

In [None]:
#generate dotplot for selected genes either based on DEGs or interest (Figures 2E, 4B, 6P and S6I)

In [None]:
goi.cl <- c("IL6R", "CD27", "CRIP1", "UCP2", "CD52", "TCF7",
           "SELL", "IL32", "NELL2", "DUSP2", "CRIP2",
           "SMAD7", "BCL3",
           "FOS", "CENPK", "KLF9",
           "S1PR1",
           "ZEB1", "KLF12", "PBX3", "POU2F1", "HDAC4", "RUNX1", "ZBTB20", "TCF12", "BACH2", "TGFBR2",
           "RELA", "NFKBIA", "IRF1", "FOSB", "DUSP1", "EGR1",
           "FOXP3", "IL2RA", "TIGIT", "IKZF2", "CTLA4", "IL10RA",
           "CCL4", "CCL5", "CST7", "NKG7", "GNLY", "PRF1", "GZMB",
           "GZMK", "LINC02446", "KLRK1", "ZNF683", "ZEB2", "TBX21", "BHLHE40", "HOPX")

In [None]:
DefaultAssay(seu.obj) <- "SCT"
DotPlot_scCustom(
  seu.obj.filt,
  features = rev(goi.cl),
  group.by = "SCT_snn_res.0.4",
  #split.by = "Origin",  ##if splitting dotplot further based on another column in metadata
  colors_use = paletteContinuous(set = "horizonExtra", n = 5, reverse = TRUE),
  remove_axis_titles = TRUE,
  x_lab_rotate = 45,
  flip_axes = TRUE
) -> p1

p1$data$id <- factor(x = p1$data$id, levels = c("0", "1", "2", "3", "4", "5", "6", 
                        "7", "8", "9", "10", "11", "12"))

pdf(sprintf("%s/goi.cl_dot_coi.pdf", IMG_OUT), width=10, height=10)
p1
dev.off()

In [None]:
#generate clustered dotplot for selected genes either based on DEGs or interest (Figure S4H)
Idents(seu.obj.filt) <- "SCT_snn_res.0.4"
Clustered_DotPlot(seurat_object = seu.obj.filt, features = tf2,
                 colors_use_exp = paletteContinuous(set = "horizonExtra", n = 256, reverse = TRUE),
                 group.by = "SCT_snn_res.0.4", row_label_size = 6,
                 colors_use_idents = pal15, cluster_feature = FALSE) -> tf.dotplot3  ##pal15 refers to the custom-generated cluster colors

save_result_generic(
   args= list("FUN"=tf.dotplot3[[2]],
    "width"=8, "height"=6, "title"="dotplot_tf.narrowed.cl5.11.12", type="plot")
)

In [None]:
#module score analysis based on genelists sourced from literature or our bulk data (DESeq2 comparisons)

In [None]:
bulk.lists <- parse_marker_gmt("GMT/bulk.lists.narrowed.gmt", trim_first=FALSE)

options(warn=-1)
seu.obj -> seu.obj3
DefaultAssay(seu.obj3) <- "SCT"
NM <- names(bulk.lists)
for (n in NM)
{
	seu.obj3 <- AddModuleScore (
		seu.obj3, features=list(bulk.lists[[n]]), name=n, search = TRUE
	)
}

options(warn=0)

In [None]:
seu.obj3@meta.data %>% colnames() %>% .[24:29] -> list.enrich.colnames

In [None]:
#remove clusters with <1% cells
seu.obj3@meta.data %>% filter(integrated_snn_res.0.5 != "15") %>% rownames() -> bc.no15

In [None]:
#generate violin plots for columns from metadata (Figures 4D, S4A and S6H)
vlistViolin1 <- list()

for (kp in vlist.enrich.colnames)
{
    plot_violin_metadata(seu.obj3[, bc.no15]@meta.data[, c(17, 21:29)], "integrated_snn_res.0.5", kp) -> vlistViolin1[[kp]]
}

print_plots_in_list(vlistViolin1, title = "BULK.LISTS.NARROWED.violin")

In [None]:
#palette for module score umaps
pal <- wes_palette("Zissou1", 5, type = "continuous")

In [None]:
#generate umap for module scores (Figures 2D, 6K, 6M, S2A, S2C, S6E and S6G)
vlist.umap <- list()
#for all cells in umap
for (kp in vlist.enrich.colnames)
{
    FeaturePlot_scCustom(seu.obj3, colors_use = pal, features = kp, pt.size = 0.5, reduction = "umap", na_cutoff = NA) -> vlist.umap[[kp]]
}

#for cells split into constituent origins (or any other column in metadata)
vlist.umap2 <- list()

for (kp in vlist.enrich.colnames)
{
    FeaturePlot_scCustom(seu.obj3, colors_use = pal, features = kp, pt.size = 0.5, reduction = "umap", na_cutoff = NA, 
                         split.by = "Virus") -> vlist.umap2[[kp]]
}

print_plots_in_list(vlist.umap, title = "VLIST.UMAP.newcols")
print_plots_in_list(vlist.umap2, title = "VLIST.UMAP.split.virus.newcols")

In [None]:
#violin plots for specific clusters or groups
#first assign barcodes to a particular group of interest based on visualization criteria
#example clusters 2 and 4 from virus data (Figure 2H) split based on CD4 or CD8
ggplot(violin1(cl24, feature="GZMB", assay = "SCT", groups_to_compare=c("CD4_2", "CD8_2", "CD4_4", "CD8_4"), 
               cell_group_col="class.cluster"), aes(x=variable, y=value,fill=frac)) +
               geom_violin(scale="width", trim = F) +
               scale_fill_gradientn(colours=c("yellow", "firebrick"), limits = c(0,1)) + theme_bw() +
               theme(text = element_text(size=20), axis.text.x = element_text(size=5, angle=45),panel.grid.major = element_blank(), 
               panel.grid.minor = element_blank()) + ggtitle("GZMB") + geom_boxplot(width=0.05, color="black", fill = "white", outlier.shape = NA) +
               scale_x_discrete(limits = c("CD4_2", "CD8_2", "CD4_4", "CD8_4")) -> gzmb.coi


#example origin of D10 or D20 from in vitro differentiation data (Figure S6F) split based on early or late clusters from pseudotime
ggplot(violin2(seu.obj1, feature="TBX21", assay = "SCT", groups_to_compare=c("D10_early", "D10_late", "D20_early", "D20_late"), 
               cell_group_col="p.cluster"), aes(x=variable, y=value,fill=mean_exp)) +
               geom_violin(scale="width", trim = F) +
               scale_fill_gradientn(colours=c("#80BB17", "#6C244C")) + theme_bw() +
               theme(text = element_text(size=20), axis.text.x = element_text(size=8),panel.grid.major = element_blank(), 
               panel.grid.minor = element_blank()) + ggtitle("TBX21") + geom_boxplot(width=0.05, color="black", fill = "white", outlier.shape = NA) +
               scale_x_discrete(limits = c("D10_early", "D10_late", "D20_early", "D20_late")) -> tf.tbx21.violin

#save either as lists or individual plots

In [None]:
#Pseudotime analysis using Monocle3 (Figures 6N-6Q)

In [None]:
library(monocle3)

In [None]:
seu.seu.obj1 <- readRDS("seu.obj1.clustersplit.rds")
seu.seu.obj1[["UMAP"]] <- seu.seu.obj1[["umap"]]

In [None]:
DefaultAssay(seu.seu.obj1) <- "SCTnewcounts"
Idents(seu.seu.obj1) <- "integrated_snn_res.0.3"
DimPlot(seu.seu.obj1, label = TRUE, repel = TRUE, reduction = "UMAP")

In [None]:
seu.obj1 <- as.cell_data_set(x = seu.obj1)
seu.obj1 <- preprocess_cds(seu.obj1)
seu.obj1 <- cluster_cells(cds = seu.obj1, reduction_method = "UMAP")

In [None]:
#pdf(sprintf("%s/monocle_plotcells.pdf", IMG_OUT), width=6, height=6)
plot_cells(seu.obj1, , color_cells_by="partition", group_cells_by="partition", label_cell_groups=FALSE)
#dev.off()

#use single partitions

In [None]:
seu.obj1 <- learn_graph(seu.obj1, use_partition = TRUE)

In [None]:
#pdf(sprintf("%s/monocle_clusters.with.nodes.pdf", IMG_OUT), width=6, height=6)
plot_cells(seu.obj1, label_principal_points = TRUE)
#dev.off()

In [None]:
seu.obj1 <- order_cells(seu.obj1, reduction_method = "UMAP", root_pr_nodes = "Y_38")

# plot cells in pseudotime (Figure 6N)
#pdf(sprintf("%s/monocle_pseudotime_newcol.pdf", IMG_OUT), width=7, height=5)
plot_cells(seu.obj1,
           color_cells_by = "pseudotime",
           label_cell_groups=FALSE,
           label_leaves=TRUE,
           label_branch_points=TRUE) + scale_color_gradient2(low="#00AAD4", high="#FF2A2A", midpoint = 7)
#dev.off()

In [None]:
## Calculate size factors using built-in function in monocle3
seu.obj1 <- estimate_size_factors(seu.obj1)

## Add gene names into CDS
seu.obj1@rowRanges@elementMetadata@listData[["gene_short_name"]] <- rownames(seu.obj1[["SCTnewcounts"]])

In [None]:
# make the subset CDS
test_genes=c('CD27','CD28','GNLY','HOPX','NKG7','ZNF683')

In [None]:
cds_obj1 <- seu.obj1[rowData(seu.obj1)$gene_short_name %in% test_tf1, colData(seu.obj1)$Origin %in% "CD4"]
cds_obj2 <- seu.obj1[rowData(seu.obj1)$gene_short_name %in% test_tf1, colData(seu.obj1)$Origin %in% "Day10"]
cds_obj3 <- seu.obj1[rowData(seu.obj1)$gene_short_name %in% test_tf1, colData(seu.obj1)$Origin %in% "Day20"]

plot_genes_in_pseudotime(cds_obj1, color_cells_by="integrated_snn_res.0.3", min_expr=0.5
                        ) + scale_colour_manual(values = c('#73f08e','#921b63','#fa3838','#481e2e','#48a4f5','#12090a','#6a401b','#dc268c','#824fae')) -> p1
plot_genes_in_pseudotime(cds_obj2, color_cells_by="integrated_snn_res.0.3", min_expr=0.5
                        ) + scale_colour_manual(values = c('#73f08e','#921b63','#fa3838','#481e2e','#48a4f5','#12090a','#6a401b','#dc268c','#824fae')) -> p2
plot_genes_in_pseudotime(cds_obj3, color_cells_by="integrated_snn_res.0.3", min_expr=0.5
                        ) + scale_colour_manual(values = c('#73f08e','#921b63','#fa3838','#481e2e','#48a4f5','#12090a','#6a401b','#dc268c','#824fae')) -> p3

In [None]:
list(p1, p2, p3) -> p.all
#plot genes in pseudotime (Figure 6P)
print_plots_in_list1(p.all, title = "genes.of.interest.in.pseudotime.clusters.split.by.origin.newcols")

In [None]:
plot_violin_metadata(seu.obj1@meta.data, "integrated_snn_res.0.3", "pseudotime")
plot_violin_metadata(seu.obj1@meta.data, "cluster.split", "pseudotime") -> p2

#arrange clusters according to increasing pseudotime scores (Figure 6O)
p2$data$cluster.split <- factor(x = p2$data$cluster.split, levels = c("CD4_3", "CD4_0", "CD4_7", "CD4_8", "CD4_2", "CD4_6", "CD4_4", "CD4_5", "CD4_1",
                                                "Day10_3", "Day10_0", "Day10_7", "Day10_8", "Day10_2", "Day10_6", "Day10_4", "Day10_5", "Day10_1",
                                                "Day20_3", "Day20_0", "Day20_7", "Day20_8", "Day20_2", "Day20_6", "Day20_4", "Day20_5", "Day20_1"))

pdf(sprintf("%s/diff.pseudotime.cluster.split.violin.pdf", IMG_OUT), width=15, height=5)
p2
dev.off()

In [None]:
#analysis of scTCR-seq data

In [None]:
# setup paths
VDJ_CTYPE = "../TSCM_TEMRA_vdj/outs/vdj_t/clonotypes.csv"
VDJ_PATH  = read.csv("../TSCM_TEMRA_vdj/outs/vdj_t/filtered_contig_annotations.csv")

In [None]:
#change barcode pattern of cellranger output to match the one in seurat object
VDJ_PATH$barcode <- VDJ_PATH$barcode %>% gsub(pattern = "-1", replacement = "-1_1", x = .)
VDJ_PATH$barcode <- VDJ_PATH$barcode %>% gsub(pattern = "-2", replacement = "-1_2", x = .)

In [None]:
# Clonotype information from the QC cells only
ctd.qc <- build_clonotype_table(t.int, vdj_anno_path = VDJ_PATH, plots=FALSE)

# ctd$clonotype_table[-(1:2), 1] %>% as.numeric() %>% sum()
ctd.qc$clonotype_table[-(1:2), 1] %>% as.numeric() %>% sum()

write.csv(ctd.qc$clonotype_table, "Tables/clonotype_sharing_table_qc.csv")

In [None]:
ctd.qc$clonotype_by_barcode -> Z1

Z1 %>% add_count(raw_clonotype_id,  name="FREQ") -> Z1

all(Z1$frequency == Z1$FREQ)

ctd.qc$clonotype_by_barcode %>% dplyr::select(barcode, raw_clonotype_id, frequency, expanded) -> cx

In [None]:
tmp_1 <- t.int@meta.data %>% rownames_to_column("barcode")

merge(tmp_1, cx, by="barcode", all.x=TRUE) %>% dim()
merge(tmp_1, cx, by="barcode", all.x=TRUE) %>% column_to_rownames("barcode") -> tmp_2
head(tmp_2)

tmp_2 <- within(tmp_2, frequency[is.na(raw_clonotype_id)] <- "NA")
tmp_2 <- within(tmp_2, expanded[is.na(raw_clonotype_id)]  <- "NA")

tmp_2[rownames(t.int@meta.data),] -> tmp_3

t.int@meta.data$ClonotypeID        <- "NA"
t.int@meta.data$ClonotypeFrequency <- "NA"
t.int@meta.data$ClonotypeExpanded  <- "NA"

t.int@meta.data[t.int %>% colnames(), "ClonotypeID"] <- tmp_3[t.int %>% colnames(), "raw_clonotype_id"] %>% as.character()
t.int@meta.data[t.int %>% colnames(), "ClonotypeFrequency"] <- tmp_3[t.int %>% colnames(), "frequency"] %>% as.character()
t.int@meta.data[t.int %>% colnames(), "ClonotypeExpanded"] <- tmp_3[t.int %>% colnames(), "expanded"] %>% as.character()

In [None]:
#generate upset plot across classes (Figure 5E)
clonotypes_per_class <- list()
for (i in unique(t.int@meta.data$Class))
{
    clonotypes_per_class[[i]] <- t.int@meta.data %>%
    filter(Class == i) %>%
    pull(ClonotypeID) %>% .[!is.na(.)] %>% unique()
}

In [None]:
upset(
            fromList(clonotypes_per_class), 
            order.by = "freq", 
            text.scale = c(2, 2, 2, 2, 2, 2), 
            point.size=3, 
            line.size=1.5, 
            nsets=17, 
            number.angles=0
        ) -> QCD16

save_result_generic(
   args= list("FUN"=QCD16,
    "width"=16, "height"=9, 
    "title"="UPSET_CLONOTYPE_CLASS", type="plot")
)

In [None]:
#top25 clonotypes bar graph (Figure 7A)
# what fraction of expanded clonotypes belong to each origin
CLS <- diff.integrated@meta.data$Origin %>% unique() %>% sort() %>% as.character()

sort(table(diff.integrated@meta.data$ClonotypeID), decreasing = TRUE) %>%
.[. > 1] %>% head(25) %>% names() -> top25_clonotype_names

diff.integrated@meta.data %>% dplyr::select(Origin, ClonotypeID) -> SD3

In [None]:
ct_counts <- list()
for (i in CLS)
{
    ct_counts[[i]] <- list()
    
    for (j in top25_clonotype_names)
    {
        SD3 %>% filter(Origin==i) %>% 
        filter(ClonotypeID == j) %>% nrow() -> ct_counts[[i]][[j]]
    }
}

as.data.frame(do.call(cbind, ct_counts)) -> ct_df
ct_df

vec_cls <- c()
vec_cty <- c()
vec_val <- c()
for (i in CLS)
{    
    for (j in top25_clonotype_names)
    {
        SD3 %>% filter(Origin==i) %>% 
        filter(ClonotypeID == j) %>% nrow() -> k
        vec_cls <- append(vec_cls, i)
        vec_cty <- append(vec_cty, j)
        vec_val <- append(vec_val, k)
    }
}

ct2_df <- as.data.frame(cbind(vec_cls, vec_cty, vec_val))
colnames(ct2_df) <- c("Origin", "ClonotypeID", "Count")

In [None]:
ct2_df$Count <- as.numeric(ct2_df$Count)

cols <- c("Day10" = "#1E4B66","Day20" = "#EE8B3A")

# stacked barplot
ggplot(ct2_df, aes(fill=Origin, y=Count, x=ClonotypeID)) + 
    geom_bar(position="stack", stat="identity")+scale_x_discrete(limits=top25_clonotype_names) + scale_fill_manual(values = cols) +
theme_minimal()+ theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) + 
ylab("Clonotype frequency")-> C2_PLOT
C2_PLOT

save_result_generic(
   args= list("FUN"=C2_PLOT,
    "width"=8, "height"=5, "title"="BAR_CT_FREQUENCY_origin", type="plot")
)

In [None]:
#circos plots to show sharing across origin and clusters (Figures 5G, 7B and S5C)

In [None]:
#take the built clonotype table
ctd.qc$clonotype_table -> abc

In [None]:
#take all clonotypes of interest with their frequency
read.csv("exp.clono.seu.obj2.csv") -> exp.clono.seu.obj2
exp.clono.seu.obj2$x -> exp.clono.seu.obj2

In [None]:
abc[exp.clono.seu.obj2, ] -> def

In [None]:
#rearrange column based on interest of how to visualize in circos plot
def[, c("0_tscm","1_tscm","2_tscm","3_tscm","4_tscm","5_tscm","6_tscm","7_tscm","8_tscm","9_tscm","10_tscm","11_tscm","12_tscm","13_tscm","14_tscm",
"0_Precursors","1_Precursors","2_Precursors","3_Precursors","4_Precursors","5_Precursors","6_Precursors","7_Precursors","8_Precursors","9_Precursors","10_Precursors","11_Precursors","12_Precursors","13_Precursors","14_Precursors",
"0_Effectors","1_Effectors","2_Effectors","3_Effectors","4_Effectors","5_Effectors","6_Effectors","7_Effectors","8_Effectors","9_Effectors","10_Effectors","11_Effectors","12_Effectors","13_Effectors","14_Effectors")] -> def1

In [None]:
#do not need clusterID or class in the table for now
def1[-c(1,2), ] -> def2
def2 <- t(def2[exp.clono.seu.obj2, ])

In [None]:
forceMatrixToInteger <- function(m){
    apply (m, c (1, 2), function (x) {
         (as.integer(x))
    })
}

#covert matrix to integer matrix
def3 <- forceMatrixToInteger(def2)

In [None]:
c("0_tscm","1_tscm","2_tscm","3_tscm","4_tscm","5_tscm","6_tscm","7_tscm","8_tscm","9_tscm","10_tscm","11_tscm","12_tscm","13_tscm","14_tscm",
"0_Precursors","1_Precursors","2_Precursors","3_Precursors","4_Precursors","5_Precursors","6_Precursors","7_Precursors","8_Precursors","9_Precursors","10_Precursors","11_Precursors","12_Precursors","13_Precursors","14_Precursors",
"0_Effectors","1_Effectors","2_Effectors","3_Effectors","4_Effectors","5_Effectors","6_Effectors","7_Effectors","8_Effectors","9_Effectors","10_Effectors","11_Effectors","12_Effectors","13_Effectors","14_Effectors") -> def.names

In [None]:
#format circos as per requirement
def.names
def.names %>% length()

def.names[1:15] -> def.tscm
def.names[16:30] -> def.pre
def.names[31:45] -> def.eff

tscm.col <- "#548235"
pre.col <- "#00BFC4"
eff.col <- "#F8766D"

setNames(rep(tscm.col, length(def.tscm)), def.tscm) -> def.tscm.col
setNames(rep(pre.col, length(def.pre)), def.pre) -> def.pre.col
setNames(rep(eff.col, length(def.eff)), def.eff) -> def.eff.col


In [None]:
border_mat = matrix(NA, nrow = nrow(def3), ncol = ncol(def3))

border_mat[1:15, ] <- "#548235"
border_mat[16:30, ] <- "#00BFC4"
border_mat[31:45, ] <- "#F8766D"
#border_mat
rownames(border_mat) <- rownames(def3)
colnames(border_mat) <- colnames(def3)

In [None]:
my_cols <- c('#36a598','#681bb9','#efb0f8','#ca0f26','#9495e8',
             '#878a19','#ac26e0','#4f2691','#3704d4','#a14eaf',
             '#fec2b4','#d7c856','#6d435b','#77f0ab','#f78c1f')

In [None]:
def.names[c(1, 16, 31)] -> cl0
def.names[c(2, 17, 32)] -> cl1
def.names[c(3, 18, 33)] -> cl2
def.names[c(4, 19, 34)] -> cl3
def.names[c(5, 20, 35)] -> cl4
def.names[c(6, 21, 36)] -> cl5
def.names[c(7, 22, 37)] -> cl6
def.names[c(8, 23, 38)] -> cl7
def.names[c(9, 24, 39)] -> cl8
def.names[c(10, 25, 40)] -> cl9
def.names[c(11, 26, 41)] -> cl10
def.names[c(12, 27, 42)] -> cl11
def.names[c(13, 28, 43)] -> cl12
def.names[c(14, 29, 44)] -> cl13
def.names[c(15, 30, 45)] -> cl14

In [None]:
setNames(rep(my_cols[1], length(cl0)), cl0) -> cl0.col
setNames(rep(my_cols[2], length(cl1)), cl1) -> cl1.col
setNames(rep(my_cols[3], length(cl2)), cl2) -> cl2.col
setNames(rep(my_cols[4], length(cl3)), cl3) -> cl3.col
setNames(rep(my_cols[5], length(cl4)), cl4) -> cl4.col
setNames(rep(my_cols[6], length(cl5)), cl5) -> cl5.col
setNames(rep(my_cols[7], length(cl6)), cl6) -> cl6.col
setNames(rep(my_cols[8], length(cl7)), cl7) -> cl7.col
setNames(rep(my_cols[9], length(cl8)), cl8) -> cl8.col
setNames(rep(my_cols[10], length(cl9)), cl9) -> cl9.col
setNames(rep(my_cols[11], length(cl10)), cl10) -> cl10.col
setNames(rep(my_cols[12], length(cl11)), cl11) -> cl11.col
setNames(rep(my_cols[13], length(cl12)), cl12) -> cl12.col
setNames(rep(my_cols[14], length(cl13)), cl13) -> cl13.col
setNames(rep(my_cols[15], length(cl14)), cl14) -> cl14.col

grid.col <- c(cl0.col, cl1.col, cl2.col,
              cl3.col, cl4.col, cl5.col,
              cl6.col, cl7.col, cl8.col,
              cl9.col, cl10.col, cl11.col,
              cl12.col, cl13.col, cl14.col)

grid.col

In [None]:
circos.par(gap.after = c(rep(1, ncol(def3)-1), 10, rep(1, 14), 5, rep(1, 14), 5, rep(1, 14), 10))
chordDiagram(t(def3), grid.col = grid.col, annotationTrack = "grid",
             preAllocateTracks = list(#list(track.height = mm_h(5)),
                                 list(track.height = mm_h(5))),
             reduce = 0, transparency = 0.8,
             link.lwd = 0.5,    # Line width
             link.lty = 1,    # Line type
             link.border = t(border_mat),
             directional = -1, 
             direction.type = c("diffHeight", "arrows"),
             link.arr.type = "big.arrow")

highlight.sector(def.seu.obj[c(4, 14)], track.index = 1, col = "#548235", 
    text = "TSCM", cex = 0.8, text.col = "black", niceFacing = TRUE)
highlight.sector(def.pre[-6], track.index = 1, col = "#00BFC4", 
    text = "Precursors", cex = 0.8, text.col = "black", niceFacing = TRUE)
highlight.sector(def.eff, track.index = 1, col = "#F8766D", 
    text = "Effectors", cex = 0.8, text.col = "black", niceFacing = TRUE)
highlight.sector(colnames(def3), track.index = 1, col = "darkgrey", 
    text = "Clonotypes", cex = 0.8, text.col = "white", niceFacing = TRUE)

circos.clear()

In [None]:
pdf(sprintf("%s/clonotype.sharing.based.on.cluster.diff.cluster.color.pdf", IMG_OUT), width=10, height=10)
circos.par(gap.after = c(rep(1, ncol(def3)-1), 10, rep(1, 14), 5, rep(1, 14), 5, rep(1, 14), 10))
chordDiagram(t(def3), grid.col = grid.col, annotationTrack = "grid",
             preAllocateTracks = list(list(track.height = mm_h(5)),
                                 list(track.height = mm_h(5))),
             reduce = 0, transparency = 0.8,
             link.lwd = 0.5,    # Line width
             link.lty = 1,    # Line type
             link.border = t(border_mat),
             directional = -1, 
             direction.type = c("diffHeight", "arrows"),
             link.arr.type = "big.arrow")

for(si in get.all.sector.index()) {
    circos.axis(h = "top", labels.cex = 0.3, sector.index = si, track.index = 3)
}

circos.track(track.index = 2, panel.fun = function(x, y) {
    xlim = get.cell.meta.data("xlim")
    ylim = get.cell.meta.data("ylim")
    sector.name = get.cell.meta.data("sector.index")
    xplot = get.cell.meta.data("xplot")
    
    circos.lines(xlim, c(mean(ylim), mean(ylim)), lty = 3) # dotted line
    by = ifelse(abs(xplot[2] - xplot[1]) > 30, 0.2, 0.5)
    for(p in seq(by, 1, by = by)) {
        circos.text(p*(xlim[2] - xlim[1]) + xlim[1], mean(ylim) + 0.1, 
            paste0(p*100, "%"), cex = 0.3, adj = c(0.5, 0), niceFacing = TRUE)
    }
    
}, bg.border = NA)

highlight.sector(def.seu.obj[c(4, 14)], track.index = 1, col = "#548235", 
    text = "TSCM", cex = 0.8, text.col = "black", niceFacing = TRUE)
highlight.sector(def.pre[-6], track.index = 1, col = "#00BFC4", 
    text = "Precursors", cex = 0.8, text.col = "black", niceFacing = TRUE)
highlight.sector(def.eff, track.index = 1, col = "#F8766D", 
    text = "Effectors", cex = 0.8, text.col = "black", niceFacing = TRUE)
highlight.sector(colnames(def3), track.index = 1, col = "darkgrey", 
    text = "Clonotypes", cex = 0.8, text.col = "white", niceFacing = TRUE)

circos.clear()
dev.off()

In [None]:
#donor-wise circos generated using similar matrix with clusters instead of donors (Figure S5C)