## Runs DE on different clusters. Also runs DE within each cluster for each condition

In [None]:
integrated_f = "/data2/mito_lineage/output/annotation/data/jan21_2021/MTblacklist/mergedSamples/allSamples.integrated.rds" 
outdir = "/data2/mito_lineage/output/annotation/data/jan21_2021/MTblacklist/mergedSamples/DE_TF"
sample_names = "Control,Flt3l"
cores = 8

genome = "/data2/mito_lineage/data/external/GRCh38_MT_blacklist/fasta/genome.fa"
comps_f = ""
#cond.comparisons <- "A,preA,postA;B,preB,postB"

## Libraries

In [None]:
library(motifmatchr)
library(JASPAR2020)
library(TFBSTools)
library(BSgenome.Hsapiens.UCSC.hg38)

In [None]:
library(GenomicRanges)
library(Seurat)
library(Signac)
library(GenomeInfoDb)
library(EnsDb.Hsapiens.v75)
library(ggplot2)
library(patchwork)
set.seed(1234)
library(data.table)
library(magrittr)
library(cowplot)
library(metap)
library("RColorBrewer")

library(future)
plan()

plan("multiprocess", workers = cores)
#options(future.globals.maxSize = 50000 * 1024^2) # for 50 Gb RAM
options(future.globals.maxSize = 8000 * 1024^2)

In [None]:
clust_outdir = file.path(outdir, "clusters")
clust_atac_outdir = file.path(outdir, "clusters_atac")
cond_outdir = file.path(outdir, "conditions_clusters")
cons_outdir = file.path(outdir, "conditions_conserved")
dir.create(outdir, showWarnings = FALSE)
dir.create(clust_outdir, showWarnings = FALSE)
dir.create(cond_outdir, showWarnings = FALSE)
dir.create(cons_outdir, showWarnings = FALSE)
dir.create(clust_atac_outdir, showWarnings = FALSE)

In [None]:
sample_names <- unlist(strsplit(sample_names, ",")[[1]])
#samples <- unlist(strsplit(samples, ","))

sample_names

In [None]:
# # extract gene annotations from EnsDb
# annotations <- GetGRangesFromEnsDb(ensdb = EnsDb.Hsapiens.v75)

# # change to UCSC style since the data was mapped to hg19
# seqlevelsStyle(annotations) <- 'UCSC'
# genome(annotations) <- "hg38"

# # add the gene information to the object
# Annotation(integrated) <- annotations


# gene.activities <- GeneActivity(integrated)


In [None]:
integrated <- readRDS(integrated_f)#file.path(indir, paste0("allSamples.integrated.rds")))
integrated

In [None]:
integrated[['peak_region_fragments']]

In [None]:
integrated[[]]

# Get matrix

In [None]:
# extract position frequency matrices for the motifs
pwm <- getMatrixSet(
  x = JASPAR2020,
  opts = list(species = 9606, all_versions = FALSE)
)


In [None]:
pwm

In [None]:
genome

In [None]:
fa.file <- Rsamtools::FaFile(genome, index=sprintf("%s.fai", genome)) #,


In [None]:
DefaultAssay(integrated) <- "ATAC"

In [None]:
chrom.assay <- integrated[["ATAC"]]

In [None]:

# add motif information
chrom.assay <- AddMotifs(chrom.assay, genome = fa.file, pfm = pwm)

In [None]:
chrom.assay

In [None]:

# add motif information
#integrated <- AddMotifs(integrated, genome = fa.file, pfm = pwm, assay='ATAC')

In [None]:
integrated = SetAssayData(integrated, slot="motifs", Motifs(chrom.assay) )

In [None]:
integrated


In [None]:
#names <- ConvertMotifID(object = motif, id = ids)


In [None]:
integrated = RunChromVAR(integrated, genome=fa.file)


In [None]:
DefaultAssay(integrated) <- 'chromvar'

In [None]:
p1 <- DimPlot(integrated, label = TRUE, pt.size = 0.1) + NoLegend()

# look at the activity of Mef2c
p2 <- FeaturePlot(
  object = integrated,
  features = "MA0497.1",
  min.cutoff = 'q10',
  max.cutoff = 'q90',
  pt.size = 0.1
)
p1 + p2

In [None]:
integrated[["chromvar"]]

In [None]:
differential.activity <- FindMarkers(
  object = integrated,
  ident.1 = 1,
  only.pos = TRUE,
  mean.fxn = rowMeans,
  fc.name = "avg_diff"
)

MotifPlot(
  object = integrated,
  motifs = head(rownames(differential.activity)),
  assay = 'ATAC'
)


In [None]:
plotDE <- function(integrated, de.results, i, outdir){
    try
    {
    plot1 <- MotifPlot(object = integrated,
                       motifs = head(rownames(de.results)),
                       assay = 'ATAC')
    plot2 <- FeaturePlot(
      object = integrated,
      features = rownames(de.results)[1],
      pt.size = 0.1
    )

    plot3 <- FeaturePlot(
      object = integrated,
      features = rownames(de.results)[2],
      pt.size = 0.1
    )
    
    plot2 | plot3
    ggsave(file.path(outdir,paste0("cluster_",i,".DE.TF.top2.png")))
    ggsave(file.path(outdir,paste0("cluster_",i,".DE.TF.Motifs.png")))
    ggsave(file.path(outdir,paste0("cluster_",i,".DE.TF.Motifs.pdf")))
    return(c(plot1, plot2, plot3))
    }
    return
}

## Compute DE for each cluster

In [None]:
cluster.ids <- sort(unique(integrated$seurat_clusters))
for (c in cluster.ids){ #or however many clusters you have
try({
    print(paste("cluster", c))
    da <- FindMarkers(
      object = integrated,
      ident.1 = c,
      only.pos = TRUE,
      mean.fxn = rowMeans,
      fc.name = "avg_diff"
    )

    print(file.path(clust_outdir, paste0("cluster_",c,".DE.TF.csv")))
    print(head(da))
    write.csv(da, file=file.path(clust_outdir, paste0("cluster_",c,".DE.TF.csv")))
    print(paste('saved cluster', c))
    plotDE(integrated, da, c, clust_outdir)
    print(paste('plotted cluster', c))
    
})
}

## Stimulus specific response

In [None]:
stimout <- function(outdir, sample_names){
    cluster.ids <- sort(unique(integrated$seurat_clusters))
    integrated$celltype.stim <- paste(integrated$seurat_clusters, integrated$orig.ident, sep = "_")
    integrated$celltype <- integrated$seurat_clusters
    Idents(integrated) <- "celltype.stim"

    for (c in cluster.ids){
        try({
            response <- FindMarkers(object=integrated,
                                    ident.1 = paste0(c, "_", sample_names[[1]]), 
                                    ident.2 = paste0(c, "_", sample_names[[2]]), 
                                    only.pos = TRUE,
                                    mean.fxn = rowMeans,
                                    fc.name = "avg_diff")
    
            if (!(dim(response)[1]==0)){
                print(head(response, n = 15))
                curr_clust <- subset(integrated, seurat_clusters == c)
                avg_curr_clust <- data.frame(log1p(AverageExpression(curr_clust, verbose = FALSE)$chromvar))
                avg_curr_clust$gene <- rownames(avg_curr_clust)

                p1 <- ggplot(avg_curr_clust, aes_string(paste0("X", c, "_", sample_names[[1]]), paste0("X", c, "_", sample_names[[2]]))) + geom_point() + ggtitle(paste("Cluster", c))
                p1 <- LabelPoints(plot = p1, points = rownames(head(response, n = 15)), repel = TRUE)
                plot_grid(p1)
                write.csv(response, file=file.path(outdir,paste0("cluster_",c,".conditionDE.TF.csv")))
                ggsave(file.path(outdir,paste0("cluster_",c,".conditionDE.TF.Scatter.png")))

            }
        })

    }
}



In [None]:
if ((comps_f=="") | comps_f == "NULL"){
    stimout(cond_outdir, sample_names)

}else{
    comps <- unlist(strsplit(comps_f, ";")[[1]])
    comps <- lapply(comps, function(x) unlist(strsplit(x, ',')[[1]]))
    for (i in comps){
        print(i)
        curr_sample_names <- i[2:3]
        curr_cond_outdir <- file.path(cond_outdir, i[1])
        dir.create(curr_cond_outdir, showWarnings = FALSE)
        stimout(curr_cond_outdir, curr_sample_names)
    }
}


In [None]:
sessionInfo()