## Runs DE on different clusters. Also runs DE within each cluster for each condition

In [None]:
rm_clust = "6"
integrated_f = "/data2/isshamie/mito_lineage/output/annotation/TcellDupi_may17_2021/MTblacklist/mergedSamples/allSamples.integrated.rds"
outdir = "/data2/isshamie/mito_lineage/output/annotation/TcellDupi_may17_2021/MTblacklist/mergedSamples/DErmClust/" 
sample_names = "pre,post"

nTop = 25000
assay="RNA"
cores = 4
comps_f = ""

In [None]:
clust_outdir = file.path(outdir, "clusters")
clust_atac_outdir = file.path(outdir, "clusters_atac")
cond_outdir = file.path(outdir, "conditions_clusters")
cons_outdir = file.path(outdir, "conditions_conserved")
dir.create(outdir, showWarnings = FALSE)
dir.create(clust_outdir, showWarnings = FALSE)
dir.create(cond_outdir, showWarnings = FALSE)
dir.create(cons_outdir, showWarnings = FALSE)
dir.create(clust_atac_outdir, showWarnings = FALSE)

In [None]:
sample_names <- unlist(strsplit(sample_names, ",")[[1]])
#samples <- unlist(strsplit(samples, ","))

sample_names

In [None]:
library(GenomicRanges)
library(Seurat)
library(Signac)
library(GenomeInfoDb)
library(EnsDb.Hsapiens.v75)
library(ggplot2)
library(patchwork)
set.seed(1234)
library(data.table)
library(magrittr)
library(cowplot)
library(metap)
library("RColorBrewer")

library(future)
plan()

plan("multiprocess", workers = cores)
#options(future.globals.maxSize = 50000 * 1024^2) # for 50 Gb RAM
options(future.globals.maxSize = 8000 * 1024^2)

In [None]:
integrated <- readRDS(integrated_f)#file.path(indir, paste0("allSamples.integrated.rds")))
integrated

In [None]:
integrated[['peak_region_fragments']]

In [None]:
integrated[[]]

## Compute DE for each cluster

In [None]:
plotDE <- function(integrated, de.results, i, outdir){
    try
    {
    plot1 <- VlnPlot(
      object = integrated,
      features = rownames(de.results)[1],
      pt.size = 0.1,
      idents = i
    )
    plot2 <- FeaturePlot(
      object = integrated,
      features = rownames(de.results)[1],
      pt.size = 0.1
    )

    plot3 <- FeaturePlot(
      object = integrated,
      features = rownames(de.results)[2],
      pt.size = 0.1
    )
    
    plot1 | plot2 | plot3
    
    ggsave(file.path(outdir,paste0("cluster_",i,".DE.GeneActivity.top2.png")))
    
    return(c(plot1, plot2, plot3))
    }
    return
}

## Loop through each cluster and run DA for RNA-seq

In [None]:

cluster.ids <- sort(unique(integrated$seurat_clusters))
for (i in cluster.ids){ #or however many clusters you have
try({
    print(paste("cluster", i))
    da.peaks <- FindMarkers(
                  object = integrated,
                  ident.1 = i, #"CD4 Naive",
                  min.pct = 0.1,
                  test.use = 'LR', latent.vars = 'peak_region_fragments'
                )
    print(file.path(clust_outdir, paste0("cluster_",i,".DE.GeneActivity.csv")))
    print(head(da.peaks))
    write.csv(da.peaks, file=file.path(clust_outdir, paste0("cluster_",i,".DE.GeneActivity.csv")))
    print(paste('saved cluster', i))
    plotDE(integrated, da.peaks, i, clust_outdir)
    print(paste('plotted cluster', i))
    
})
}

## Loop through each cluster and run DA for ATAC-seq

In [None]:
DefaultAssay(integrated) <- "ATAC"
cluster.ids <- sort(unique(integrated$seurat_clusters))
for (i in cluster.ids){ #or however many clusters you have
try({
    print(paste("cluster", i))
    da.peaks <- FindMarkers(
                  object = integrated,
                  ident.1 = i, #"CD4 Naive",
                  min.pct = 0.1,
                  test.use = 'LR', latent.vars = 'peak_region_fragments'
                )
    print(head(da.peaks))
    write.csv(da.peaks, file=file.path(clust_atac_outdir, paste0("cluster_",i,".DE.PeakActivity.csv")))
    print(paste('saved cluster', i))
    plotDE(integrated, da.peaks, i, clust_atac_outdir)
    print(paste('plotted cluster', i))
    
})
}

# put back to RNA
DefaultAssay(integrated) <- "RNA"


## Stimulus specific response

In [None]:
stimout <- function(outdir, sample_names){
    cluster.ids <- sort(unique(integrated$seurat_clusters))
    integrated$celltype.stim <- paste(integrated$seurat_clusters, integrated$orig.ident, sep = "_")
    integrated$celltype <- integrated$seurat_clusters
    Idents(integrated) <- "celltype.stim"

    for (c in cluster.ids){
        try({
            response <- FindMarkers(integrated, 
                                    ident.1 = paste0(c, "_", sample_names[[1]]), 
                                    ident.2 = paste0(c, "_", sample_names[[2]]), 
                                    verbose = FALSE,
                                    test.use = 'LR', min.pct = 0.1,
                                    latent.vars = 'peak_region_fragments'
                                   )
            if (!(dim(response)[1]==0)){

                print(head(response, n = 15))
                curr_clust <- subset(integrated, seurat_clusters == c)
                avg_curr_clust <- data.frame(log1p(AverageExpression(curr_clust, verbose = FALSE)$RNA))
                avg_curr_clust$gene <- rownames(avg_curr_clust)

                p1 <- ggplot(avg_curr_clust, aes_string(paste0("X", c, "_", sample_names[[1]]), paste0("X", c, "_", sample_names[[2]]))) + geom_point() + ggtitle(paste("Cluster", c))
                p1 <- LabelPoints(plot = p1, points = rownames(head(response, n = 15)), repel = TRUE)
                plot_grid(p1)
                write.csv(response, file=file.path(outdir,paste0("cluster_",i,".conditionDE.csv")))
                ggsave(file.path(outdir,paste0("cluster_",i,".conditionScatter.png")))

            }
        })

    }
}



In [None]:
if ((comps_f=="") | comps_f == "NULL"){
    stimout(cond_outdir, sample_names)

}else{
    comps <- unlist(strsplit(comps_f, ";")[[1]])
    comps <- lapply(comps, function(x) unlist(strsplit(x, ',')[[1]]))
    for (i in comps){
        print(i)
        curr_sample_names <- i[2:3]
        curr_cond_outdir <- file.path(cond_outdir, i[1])
        dir.create(curr_cond_outdir, showWarnings = FALSE)
        stimout(curr_cond_outdir, curr_sample_names)
    }
}

# comps_f <- "A,preA,postA"#;B,preB,postB"
# comps <- unlist(strsplit(comps_f, ";")[[1]])
# #samples <- unlist(strsplit(samples, ","))

# comps <- lapply(comps, function(x) unlist(strsplit(x, ',')[[1]]))
# comps
# for (i in comps){
#     print(i)
#     sample_names <- i[2:3]
# }
# sample_names


                    
# cluster.ids <- sort(unique(integrated$seurat_clusters))
# integrated$celltype.stim <- paste(integrated$seurat_clusters, integrated$orig.ident, sep = "_")
# integrated$celltype <- integrated$seurat_clusters
# Idents(integrated) <- "celltype.stim"

# for (c in cluster.ids){
#     try({
#         response <- FindMarkers(integrated, 
#                                 ident.1 = paste0(c, "_", sample_names[[1]]), 
#                                 ident.2 = paste0(c, "_", sample_names[[2]]), 
#                                 verbose = FALSE,
#                                 test.use = 'LR', min.pct = 0.1,
#                                 latent.vars = 'peak_region_fragments'
#                                )
#         if (!(dim(response)[1]==0)){

#             print(head(response, n = 15))
#             curr_clust <- subset(integrated, seurat_clusters == c)
#             avg_curr_clust <- data.frame(log1p(AverageExpression(curr_clust, verbose = FALSE)$RNA))
#             avg_curr_clust$gene <- rownames(avg_curr_clust)

#             p1 <- ggplot(avg_curr_clust, aes_string(paste0("X", c, "_", sample_names[[1]]), paste0("X", c, "_", sample_names[[2]]))) + geom_point() + ggtitle(paste("Cluster", c))
#             p1 <- LabelPoints(plot = p1, points = rownames(head(response, n = 15)), repel = TRUE)
#             plot_grid(p1)
#             write.csv(response, file=file.path(cond_outdir,paste0("cluster_",i,".conditionDE.csv")))
#             ggsave(file.path(cond_outdir,paste0("cluster_",i,".conditionScatter.png")))

#         }
#     })

# }

## Gene Markers plot

In [None]:
clrs =  brewer.pal(n = length(sample_names), name = "RdBu")

### Immune markers taken from Seurat

In [None]:
Idents(integrated) <- "seurat_clusters"

In [None]:
FeaturePlot(integrated, features = c("CD3D", "SELL", "CREM", "CD8A", "GNLY", "CD79A", "FCGR3A", 
    "CCL2", "PPBP"), min.cutoff = "q9")

ggsave(file=file.path(outdir,"seuratImmuneEmbed.png"))


In [None]:
markers.to.plot <- c("CD3D", "CREM", "HSPH1", "SELL", "GIMAP5", "CACYBP", "GNLY", "NKG7", "CCL5", 
    "CD8A", "MS4A1", "CD79A", "MIR155HG", "NME1", "FCGR3A", "VMO1", "CCL2", "S100A9", "HLA-DQA1", 
    "GPR183", "PPBP", "GNG11", "HBA2", "HBB", "TSPAN13", "IL3RA", "IGJ")
DotPlot(integrated, features = rev(markers.to.plot), dot.scale = 8, cols=clrs,  
    split.by = "orig.ident") + RotatedAxis()
ggsave(file=file.path(outdir,"seuratImmuneDotPlot.png"))

### Immune markers taken from Dawn Lin et al Flt3l paper

In [None]:
immune.markers = c('KIT',
                   'LY6E',
                   'ITGAX',
                   'SLAMF1',
                   'CD34',
                   'FCGR3A',
                   'PTPRC',
                   'SLAMF2',
                   'IL7R',
                   'ITGAM')

DotPlot(integrated, features = rev(immune.markers), dot.scale = 8, 
        cols = clrs,
    split.by = "orig.ident") + RotatedAxis()
ggsave(file=file.path(outdir,"linImmuneDotPlot.png"))

In [None]:
FeaturePlot(integrated, features = immune.markers, min.cutoff = "q9")
ggsave(file=file.path(outdir,"linImmuneEmbed.png"))

## Conserved markers to help identify cell type

In [None]:
topMarkers = c()
for (i in cluster.ids){ #or however many clusters you have
try({
    print(paste("cluster", i))
    cons.markers <- FindConservedMarkers(integrated, ident.1 = i, 
                                         grouping.var = "orig.ident", verbose = TRUE)

    write.csv(cons.markers, file=file.path(cons_outdir,paste0("cluster_",i,".conservedOverStim.GeneActivity.csv")))
    print(paste('saved cluster', i))
    print(head(cons.markers))
    topMarkers = c(topMarkers, rownames(cons.markers)[1])
    print(topMarkers)
})
}

head(cons.markers)

In [None]:
sessionInfo()