In [None]:
# Input info
exp <- "Flt3l"
SE_f <- "/data/isshamie/mito_lineage/output/annotation/cd34norm/MTblacklist/Flt3l/Flt3l.merged.rds"

# Saving
outdir <- "/data/isshamie/mito_lineage/output/annotation/cd34norm/MTblacklist/Flt3l/lareau/sample" #"/data2/mito_lineage/Analysis/annotation/output/data/"
#outdir <- "/data/isshamie/mito_lineage/output/annotation/jan21_2021/MTblacklist/Flt3l/" #Flt3l.merged.rds"

# Parameters
nTop = 25000


In [None]:
library(future)
plan()
#plan("multiprocess", workers = workers)
options(future.globals.maxSize = 8000 * 1024^2)

In [None]:
source("/data2/mito_lineage/R_scripts/annotations/01_CD34_projection.R")

In [None]:
library(Seurat)
library(Signac)
library(GenomeInfoDb)
library(EnsDb.Hsapiens.v75)
library(ggplot2)
library(patchwork)
set.seed(1234)
library(data.table)
library(cowplot)
library(umap)
library(magrittr)

## Import data

In [None]:
combined <- readRDS(SE_f)

In [None]:
varPeaks <- (VariableFeatures(combined[["ATAC"]]))

In [None]:
ext <- subset(x = combined, subset = orig.ident == "granja_cd34")
curr <- subset(x = combined, subset = orig.ident == exp)


In [None]:
# Subset on varPeaks in both 
varPeaks_both <- rownames(ext)[(varPeaks %in% rownames(ext)) & (varPeaks %in% rownames(curr))]
length(varPeaks_both)
#curr_varP <- rownames(curr)[varPeaks]


In [None]:
ext_var <- GetAssayData(ext)[varPeaks_both,]
dim(ext_var)

In [None]:
curr_var <- GetAssayData(curr)[varPeaks_both,]
dim(curr_var)

# Run the workflow - Using our dataset as reference and projecting external data onto our coordinates

## 1. Run LSI 1st Iteration

In [None]:
lsi1 <- calcLSI(curr[["ATAC"]]@data, nComponents = 25, binarize = TRUE, nFeatures = NULL)
clust1 <- louvainIgraphClusters(lsi1[[1]], 10)



## 2. Make Pseudo Bulk Library


In [None]:
message("Making PseudoBulk...")
clusterSums <- groupSums(mat = curr[["ATAC"]]@data, groups = clust1) #Group Sums
logMat <- edgeR::cpm(clusterSums, log = TRUE, prior.count = 3) #log CPM matrix
varPeaks <- head(order(matrixStats::rowVars(logMat), decreasing = TRUE), nTop) #Top variable peaks
dim(varPeaks)

## 3. Run LSI 2nd Iteration

In [None]:
lsi2 <- calcLSI(curr[["ATAC"]]@data[varPeaks,,drop=FALSE], nComponents = 25, binarize = TRUE, nFeatures = NULL)
clust2 <- louvainIgraphClusters(lsi2[[1]][,c(2:25)], 30)
print("Number of clusters")
print(length(unique(clust2)))
print("LSI2 dimensions")
print(dim(lsi2$matSVD))


### 3.2 build mean for clusters

In [None]:
means_lsi <- build_means_for_clusters(lsi2$matSVD, clust2)
lsiPeaks <- varPeaks
lsiReference <- lsi2


## 4. Run UMAP on 2nd LSI

In [None]:
set.seed(1)
umap <- umap::umap(
  lsi2$matSVD[,2:25], 
  n_neighbors = 55, # original 55
  min_dist = 0.45, # original 0.45
  metric = "cosine", 
  verbose = TRUE    )
#set.seed(10)

## 4.2 Build UMAP means for each cluster

In [None]:
umap_clust2 <- louvainIgraphClusters(umap$layout, k = 100)
length(unique(umap_clust2))
umap_means_lsi <- build_means_for_clusters(umap$layout, umap_clust2)

## 5. Project external data to our samples LSI-embedded coordinate system

In [None]:
lsiProjection <- projectLSI(ext[["ATAC"]]@data[varPeaks,,drop=FALSE], lsi2)


## Remove any 0 cells

In [None]:
noZero.inds <- names(which(!((apply(data.matrix(lsiProjection[,2:25]==0),MARGIN=1, all)))))
umapProjection <- round(predict(umap, data.matrix(lsiProjection[noZero.inds,2:25])), 2)
#umapProjection <- round(predict(umap, data.matrix(lsiProjection[,2:25])), 2)

## 6. Plot our clustered results

In [None]:
# Multiply by -1 to make the pseudotime read left to right
ref_df <- data.frame(umap$layout*-1, (curr[[]]), Clusters = clust2)

p0 <- ggplot(ref_df, aes(x= X1, y = X2, color = Clusters)) +
   geom_point(size = 0.5) +
   labs(x = "UMAP1", y= "UMAP2", color = "") +
  pretty_plot() + L_border() + theme(legend.position = "bottom") 

print(p0)
ggsave2(p0, file = file.path(outdir, paste0(exp, ".merged.lsi.Batchlabels.png")))

## 7. Project the ref and query on UMAP coordinates

In [None]:
# Merge the query and ref into projection_df
merged_projection <- data.frame(
  celltype = c((ext[[]])[rownames(umapProjection),]$Group, rep("none", dim(ref_df)[1])),
  umap1 = c(umapProjection[,1]*-1, ref_df$X1),
  umap2 = c(umapProjection[,2]*-1, ref_df$X2)
)


In [None]:
p1 <- ggplot(merged_projection[dim(merged_projection)[1]:1,], aes(x= umap1, y = umap2, color = celltype, label = celltype)) +
  geom_point(size = 0.5) +
  labs(x = "UMAP1", y= "UMAP2", color = "CD1 FACS ") +
  pretty_plot() + L_border() + theme(legend.position = "bottom") +
  scale_color_manual(values = c(ejc_color_maps, "none" = "lightgrey", "Monocytes" = "orange2")) 
print(p1)

ggsave2(p1, file = file.path(outdir, paste0(exp, ".project.ext.cellLabels.png")))


## 8. LSI classify from the reference projected into sample space
(Also try the umap project and classify

In [None]:
## Classifying each cluster to nearest reference cell
use_means <- FALSE
if(use_means){
    projected_clustersbasic <- classify_from_reference(data.matrix(lsiProjection)[rownames(umapProjection),1:25],t(means_lsi)[,1:25])    
}else{
    ## Classifying each cell to nearest reference cell.
    projected_clustersbasic <- classify_from_reference(data.matrix(lsiProjection)[rownames(umapProjection),1:25],
                                                   (lsi2$matSVD)[,1:25])    
}
head(projected_clustersbasic)


                                                   



In [None]:
#annovec <- c("myeloid", "myeloid", "CD4", "CD4", "Bcell", "CD8", "myeloid", "myeloid", "myeloid", "NKcell", "CD4", "CD8", "Bcell")
annovec <- ext[["Group"]]
#names(annovec) <- paste0("mc", as.character(1:13))
projected_clusters <- annovec[as.character(projected_clustersbasic),]
projected_clusters
curr[["predict"]] <- projected_clusters

group_or_pred <- function(x){
    if (is.na(x["Group"])){
      return(x["predict"])
    }
    return(x["Group"])
    
    }
curr[["group_or_predict"]] <- apply(curr[[]], MARGIN=1, group_or_pred)

In [None]:
ext[["group_or_predict"]] <- apply(ext[[]], MARGIN=1, group_or_pred)

In [None]:
# Ref umap
plot_df <- data.frame(umap$layout*-1, (curr[[]]), Clusters = clust2)

# query projection 
merged_predicted <- data.frame(
  celltype = c((ext[[]])[rownames(umapProjection),]$Group), 
               plot_df[,"group_or_predict"]),
  umap1 = c(umapProjection[,1]*-1, plot_df$X1),
  umap2 = c(umapProjection[,2]*-1, plot_df$X2),
  orig.ident=c(rep("reference",dim(umapProjection)[1]),plot_df$orig.ident)
)
head(merged_predicted)

## 9. Plot both annotated cell from external and predicted cell type in our sample overlaid on UMAP

In [None]:
p1 <- ggplot(merged_predicted[dim(merged_predicted)[1]:1,], aes(x= umap1, y = umap2, color = celltype, label = celltype)) +
  geom_point(size = 0.5) +
  labs(x = "UMAP1", y= "UMAP2", color="celltype") +
  pretty_plot() + L_border() + theme(legend.position = "bottom") +
  scale_color_manual(values = c(ejc_color_maps, "none" = "lightgrey","Monocytes" = "orange2")) 
print(p1)

ggsave2(p1, file = file.path(outdir, paste0(exp, ".project.cellLabels.png")))


## 10. Plot predicted cell type of our sample overlaid on UMAP

In [None]:
# Multiply by -1 to make the pseudotime read left to right
plot_df <- data.frame(umap$layout*-1, (curr[[]]))

p0 <- ggplot(plot_df, aes(x= X1, y = X2, color = group_or_predict)) +
   geom_point(size = 0.5) +
   labs(x = "UMAP1", y= "UMAP2", color = "celltype") +
   pretty_plot() + L_border() + theme(legend.position = "bottom") +
   scale_color_manual(values = c(ejc_color_maps, "none" = "lightgrey","Monocytes" = "orange2")) 


print(p0)
ggsave2(p0, file = file.path(outdir, paste0(exp, ".project.sample.cellLabels.umap.png")))


## Save the merged projections and celltype

In [None]:
write.table(merged_predicted, file = file.path(outdir, paste0(exp, ".merged.predicted_celltype.txt")),quote=F)

## 11. Plot predicted cell type abundance 

In [None]:
tbl <- with(plot_df, table(orig.ident, predict))
tbl
p1 <- ggplot(as.data.frame(tbl), aes(factor(orig.ident), Freq, fill = predict)) +
    geom_bar(position = 'stack', stat='identity') +
    ggtitle(paste(exp, "Predicted cell type")) + 
    theme(plot.title=element_text(size=24))
ggsave2(p1, file = file.path(outdir, paste0(exp, ".project.sample.cellLabels.abundace.png")))


## Plot abundance for external data

In [None]:
ext_df <- ext[[]]# projection_df #data.frame(umap$layout*-1, (ext[[]]))

tbl <- with(ext_df, table(orig.ident, Group))
tbl
p1 <- ggplot(as.data.frame(tbl), aes(factor(orig.ident), Freq, fill = Group)) +
    geom_bar(position = 'stack', stat='identity') +
    ggtitle(paste(exp, "Predicted cell type")) + 
    theme(plot.title=element_text(size=24))
ggsave2(p1, file = file.path(outdir, paste0(exp, ".project.ext.cellLabels.abundace.png")))


## Save the sample LSI projection results and cluster results

In [None]:
write.table(plot_df, file = file.path(outdir, paste0(exp, "_umapProjection.txt")),quote=F)

## Save a barcode,cluster csv file
### 1 with sample ID in each cell barcode and without (to be used for 10x Loupe browser).

In [None]:
write.table(plot_df[, "predict", drop=FALSE], file = file.path(outdir, paste0(exp, ".clusters.csv")), col.names=FALSE,
          row.names=TRUE, quote=FALSE, sep=",")

In [None]:
init_bc <- function(x){
    x <- unlist(stringr::str_split(x, pattern="_", simplify = T))
    return(as.character(x[2]))
}
plot_df$BC <- sapply(as.character(rownames(plot_df)), init_bc)                        

write.table(plot_df[, c("BC", "predict"), drop=FALSE], file = file.path(outdir, paste0(exp, ".clusters.BC.csv")), col.names=FALSE,
          row.names=FALSE, quote=FALSE, sep=",")