# Myeloid integration

# load

In [None]:
options(warn = -1, verbose=FALSE)
#!/usr/bin/env Rscript 
library(dplyr)
library(Seurat)
library(httr)
library(readr)
library(pheatmap)
library(RColorBrewer)
library(ggplot2)
library(cowplot)
library(patchwork)
library(unixtools)
library(ggrepel)
library(repr)
library(ggmin)
library(harmony)
library(SeuratWrappers)
library(Nebulosa)
library(ggthemes)
library(purrr)
library(radiant.data)
library(presto)
library(pryr)
set_config(config(ssl_verifypeer = 0L))
ulimit::memory_limit(100000)
set.tempdir("/datastore/lucy/tmp/")
setwd("/datastore/lucy/CosMx")

# Load Korsunsky lab functions

In [None]:
source("./R/utils.R")
start_upR(clusterfiles = TRUE)

# Load synovial tissue single-cell data

In [None]:
sc.myeloid<-readRDS("./cache/sc.myeloid.harmony_down.RDS")

In [None]:
dimlist(sc.myeloid)

## cosmx

In [None]:
cosmx.syno<-readRDS("./cache/CosMxcoarseGrainharmonyObj_markers.RDS")

In [None]:
dimlist(cosmx.syno) 

In [None]:
unique(cosmx.syno$metadata$celltype.coarse)

In [None]:
celltype<-c("Myeloid","DC")

### subset cosmx cells

In [None]:
cell.ids<-cosmx.syno$metadata %>% 
    subset(celltype.coarse %in% celltype) %>% 
    with(cellID)

In [None]:
length(cell.ids)

In [None]:
sampleFOVcondition <- read.table("/datastore/lucy/CosMx/cache/SampleFOV-condition.csv", header=TRUE, sep=",")

In [None]:
cosmx.cell.ids<-cosmx.syno$metadata %>% 
    subset(celltype.coarse %in% celltype) %>% 
    with(cellID)

In [None]:
rownames(cosmx.syno$metadata) <- cosmx.syno$metadata$cellID

In [None]:
cosmx.syno$metadata <- merge(cosmx.syno$metadata, sampleFOVcondition, all=FALSE)

In [None]:
cosmx.syno$metadata %>% subset(cellID %in% cell.ids) %>% head

In [None]:
cosmx.syno$metadata$SampleFOV <- paste0(cosmx.syno$metadata$Condition,"_",cosmx.syno$metadata$SampleFOV)

In [None]:
cosmx.syno$metadata %>% subset(cellID %in% cell.ids) %>% head

In [None]:
cosmx.syno$metadata$Experiment <- "CosMx"

In [None]:
cosmx.myeloid<-list()
cosmx.myeloid$counts<-cosmx.syno$counts[, cell.ids]
cosmx.myeloid$metadata<-cosmx.syno$metadata %>% subset(cellID %in% cell.ids)

In [None]:
dimlist(cosmx.myeloid)

In [None]:
all(cosmx.myeloid$metadata$cellID == colnames(cosmx.myeloid$counts))

## now let us integrate

In [None]:
genes_use<-rownames(sc.myeloid$counts)
length(genes_use)

In [None]:
sc.myeloid$metadata

In [None]:
obj<-list()
obj$counts<-cbind(cosmx.myeloid$counts[genes_use, ], sc.myeloid$counts[genes_use, ])
obj$metadata<-dplyr::bind_rows(
    list(
        cosmx = cosmx.myeloid$metadata %>% 
            dplyr::select(cellID, SampleID, SampleFOV, celltype.coarse, Experiment),
        sc = sc.myeloid$metadata %>% 
            dplyr::select(cellID, sample, cosmx.myeloid, cosmx400.myeloid, Experiment)
    ),
    .id = "source") %>% 
    mutate(
        SampleID = if_else(SampleID %in% NA, sample, SampleID),
        SampleFOV = if_else(
            SampleFOV %in% NA, paste(SampleID, SampleFOV, sep = "_"), SampleFOV
            )
    ) 


In [None]:
head(obj$metadata)

In [None]:
ngenes_qc<-3
ncounts_qc<-7
cells_qc<-colnames(QC_gcmat(obj$counts, gene_thresh = ngenes_qc, count_thresh = ncounts_qc)) 
norm_value<-median(c(
    median(colSums(cosmx.myeloid$counts[genes_use, colnames(cosmx.myeloid$counts) %in% cells_qc])), 
    median(colSums(sc.myeloid$counts[genes_use, colnames(sc.myeloid$counts) %in% cells_qc])))
    )

norm_value

In [None]:
median(colSums(cosmx.myeloid$counts[genes_use, colnames(cosmx.myeloid$counts) %in% cells_qc]))
median(colSums(sc.myeloid$counts[genes_use, colnames(sc.myeloid$counts) %in% cells_qc]))

In [None]:
batch<-c('source', 'SampleID', 'SampleFOV')
cluster_res<-c(1.5, 2.5, 3.5)
theta_harmony<-c(2, 0) 
sigma_harmony<-0.2
system.time({
    integrated.myeloid<-QC_harmony_pipeline(
        obj, 
        ngenes_threshold = ngenes_qc, 
        ncounts_threshold = ncounts_qc, 
        normval = norm_value,
        do_cluster_after = TRUE,
        do_umap_after = TRUE,
        resolution_clustering = cluster_res, 
        clustering_ncores = 10,
        vars_use = batch,
        theta = theta_harmony,
        sigma = sigma_harmony,
        max.iter.harmony = 12,
        max.iter.cluster = 50,
        return_object = TRUE
    )

    integrated.myeloid$sigma_harmony<-sigma_harmony
    integrated.myeloid$vars_use<-batch
    integrated.myeloid$theta_harmony<-theta_harmony    

    })


In [None]:
saveRDS(integrated.myeloid, "./cache/new.integrated.myeloid.RDS")

In [None]:
graphics::plot(harmony:::HarmonyConvergencePlot(integrated.myeloid$Hobj))

## load

In [None]:
integrated.myeloid<-readRDS("./cache/new.integrated.myeloid.RDS")

In [None]:
dimlist(integrated.myeloid)

# PCA

In [None]:
dimlist(integrated.myeloid)

In [None]:
p_before<-plot_dim_red(integrated.myeloid$pca_res$embeddings, integrated.myeloid$Humap$clusters, integrated.myeloid$metadata, "cellID", "SampleID", "PCA before Harmony", "PCA")
p_after<-plot_dim_red(integrated.myeloid$H, integrated.myeloid$Humap$clusters, integrated.myeloid$metadata, "cellID", "SampleID", "PCA after Harmony", "PCA")

In [None]:
fig.size(5, 20)
p_before  | 
p_after 

In [None]:
p_before<-plot_dim_red(integrated.myeloid$pca_res$embeddings, integrated.myeloid$Humap$clusters, integrated.myeloid$metadata, "cellID", "source", "PCA before Harmony", "PCA")
p_after<-plot_dim_red(integrated.myeloid$H, integrated.myeloid$Humap$clusters, integrated.myeloid$metadata, "cellID", "source", "PCA after Harmony", "PCA")

In [None]:
fig.size(5, 15)
p_before |
p_after

In [None]:
p_before<-plot_dim_red(integrated.myeloid$pca_res$embeddings, integrated.myeloid$Humap$clusters, integrated.myeloid$metadata, "cellID", "cosmx400.myeloid", "PCA before Harmony", "PCA")
p_after<-plot_dim_red(integrated.myeloid$H, integrated.myeloid$Humap$clusters, integrated.myeloid$metadata, "cellID", "cosmx400.myeloid", "PCA after Harmony", "PCA")

In [None]:
fig.size(5, 20)
p_before | 
p_after

In [None]:
fig.size(10, 20)
p_after |
p_after + facet_wrap(~ source)

In [None]:
fig.size(10, 20)
p_after |
p_after + facet_wrap(~ source + color_col) 

# UMAP

In [None]:
table(integrated.myeloid$metadata$source)

## colored by Sample

In [None]:
p_before<-plot_dim_red(integrated.myeloid$umap$embedding, integrated.myeloid$Humap$clusters, integrated.myeloid$metadata, "cellID", "SampleID", "UMAP before Harmony", "UMAP")
p_after<-plot_dim_red(integrated.myeloid$Humap$embedding, integrated.myeloid$Humap$clusters, integrated.myeloid$metadata, "cellID", "SampleID", "UMAP after Harmony", "UMAP")

In [None]:
fig.size(5, 20)
p_before | 
p_after 

In [None]:
fig.size(5, 10)
p_after 

In [None]:
p_before<-plot_dim_red(integrated.myeloid$umap$embedding, integrated.myeloid$Humap$clusters, integrated.myeloid$metadata, "cellID", "SampleFOV", "UMAP before Harmony", "UMAP") + guides(color=FALSE)
p_after<-plot_dim_red(integrated.myeloid$Humap$embedding, integrated.myeloid$Humap$clusters, integrated.myeloid$metadata, "cellID", "SampleFOV", "UMAP after Harmony", "UMAP")+ guides(color=FALSE)

In [None]:
fig.size(5, 20)
p_before | 
p_after 

In [None]:
fig.size(5, 10)
p_after 

In [None]:
p_before<-plot_dim_red(integrated.myeloid$umap$embedding, integrated.myeloid$Humap$clusters, integrated.myeloid$metadata, "cellID", "source", "UMAP before Harmony", "UMAP")
p_after<-plot_dim_red(integrated.myeloid$Humap$embedding, integrated.myeloid$Humap$clusters, integrated.myeloid$metadata, "cellID", "source", "UMAP after Harmony", "UMAP")

In [None]:
plot_dim_red

In [None]:
fig.size(5, 15)
p_before | 
p_after 

In [None]:
fig.size(5, 8)
p_after + facet_wrap(~ color_col)

## colored by cell type

In [None]:
p_before<-plot_dim_red(integrated.myeloid$umap$embedding, integrated.myeloid$Humap$clusters, integrated.myeloid$metadata, "cellID", "cosmx400.myeloid", "UMAP before Harmony", "UMAP")
p_after<-plot_dim_red(integrated.myeloid$Humap$embedding, integrated.myeloid$Humap$clusters, integrated.myeloid$metadata, "cellID", "cosmx400.myeloid", "UMAP after Harmony", "UMAP")

In [None]:
fig.size(5, 20)
p_before  | 
p_after 

In [None]:
p_before<-plot_dim_red(integrated.myeloid$umap$embedding, integrated.myeloid$Humap$clusters, integrated.myeloid$metadata, "cellID", "Experiment", "UMAP before Harmony", "UMAP")
p_after<-plot_dim_red(integrated.myeloid$Humap$embedding, integrated.myeloid$Humap$clusters, integrated.myeloid$metadata, "cellID", "Experiment", "UMAP after Harmony", "UMAP")

In [None]:
fig.size(5, 20)
p_before  | 
p_after 

In [None]:
fig.size(5, 10) 
p_after<-plot_dim_red(
    integrated.myeloid$Humap$embedding[integrated.myeloid$metadata %>% subset(source %in% "sc") %>% with(cellID), ],
    metadata = integrated.myeloid$metadata %>% subset(source %in% "sc"),
    cell_id_colname = "cellID",
    color_by = "cosmx400.myeloid", 
    plot_title = "UMAP of only single cell data", 
    dim_red_type = "UMAP"
)

In [None]:
fig.size(5, 10)
p_after 
fig.size(10, 10)
p_after + facet_wrap(~color_col)

In [None]:
head(integrated.myeloid$Humap$clusters)

## colored by clust

In [None]:
p<-plot_dim_red(
    integrated.myeloid$Humap$embedding, 
    integrated.myeloid$Humap$clusters, 
    integrated.myeloid$metadata, 
    "cellID", 
    "Clust2.5", 
    "UMAP after Harmony", 
    "UMAP",
    plot_labels = TRUE)
fig.size(5, 20)
p + facet_wrap(~ source)

In [None]:
p<-plot_dim_red(
    integrated.myeloid$Humap$embedding, 
    integrated.myeloid$Humap$clusters, 
    integrated.myeloid$metadata, 
    "cellID", 
    "Clust3.5", 
    "UMAP after Harmony", 
    "UMAP",
    plot_labels = TRUE)
fig.size(5, 20)
p + facet_wrap(~ source)

In [None]:
fig.size(30, 30)
p + facet_wrap(~ color_col)

# annotation:
1. Find correlations b/w sc named clusters and unnamed clusters

In [None]:
integrated.myeloid$metadata <- integrated.myeloid$metadata %>% 
    cbind(integrated.myeloid$Humap$clusters)

In [None]:
tail(integrated.myeloid$metadata)

In [None]:
integrated.myeloid$metadata$cosmx400.myeloid %>% unique

In [None]:
find_glmer_markers_amp

In [None]:
presto.presto <- function (formula, design, response, size_varname, features = NULL, 
    effects_cov = c(""), ncore = 1, nsim = 100, family = "poisson", 
    min_sigma = 0, verbose = 0L) 
{
    if (is.null(features)) {
        features <- rownames(response)
    }
    if (family %in% c("poisson", "binomial", "nb")) {
        message("CAUTION: if using GLMM, make sure your counts are integers!")
    }
    design$EXPOSURE <- design[[size_varname]]
    fstr <- gsub(size_varname, "EXPOSURE", as.character(formula))
    formula <- as.formula(sprintf("%s~%s", fstr[[2]], fstr[[3]]), 
        env = .GlobalEnv)
    if (verbose > 0) {
        message("Set up models")
    }
    model_base <- fit_model.presto(formula, design, response[features[[1]], 
        ], family)
    priornames_df <- as.data.frame(VarCorr(model_base))[, 1:3]
    if (isGLMM(model_base)) {
        priornames_df <- rbind(priornames_df, tibble(grp = "Residual", 
            var1 = NA, var2 = NA))
    }
    has_offset <- !all(map_lgl(model_base@resp$offset, identical, 
        0))
    betanames_df <- make_betanames_df(model_base, has_offset)
    features <- intersect(features, rownames(response))
    if (ncore == 1) {
        future::plan(sequential)
    }
    else if (ncore %in% c(0, Inf)) {
        ncore <- 15
        future::plan(multisession)
    }
    else {
        .ncore <<- 15
        future::plan(future::multisession(workers = .ncore))
        rm(.ncore)
    }
    if (verbose > 0) {
        message("Learn the models")
    }
    lres <- furrr::future_map(features, glmm_uni, formula, design, 
        response, effects_cov, family, nsim, has_offset, min_sigma)
    names(lres) <- features
    lres <- lres[which(purrr::map_lgl(as.integer(map_int(lres, 
        "status")), identical, 0L))]
    if (verbose > 0) {
        message("Aggregate the results")
    }
    res <- collapse_lres(lres)
    if (verbose > 0) {
        message("Cleap up names")
    }
    covmat_names <- tibble(grpvar_orig = rownames(res$covmat)) %>% 
        left_join(subset(betanames_df, term %in% c("(Intercept)", 
            "Fixed"))) %>% dplyr::mutate(newname = case_when(is.na(grpvar) ~ 
        grpvar_orig, TRUE ~ as.character(glue::glue("{grpvar}.{grp}.{term}")))) %>% 
        with(newname)
    dimnames(res$covmat) <- list(covmat_names, covmat_names, 
        colnames(res$beta))
    res$betanames_df <- betanames_df
    res$priornames_df <- priornames_df
    res$meta_data <- design
    if (has_offset) {
        res$design <- list(EXPOSURE = model_base@resp$offset, 
            t(model_base@pp$X), model_base@pp$Zt) %>% purrr::reduce(Matrix::rbind2)
    }
    else {
        res$design <- list(t(model_base@pp$X), model_base@pp$Zt) %>% 
            purrr::reduce(Matrix::rbind2)
    }
    row.names(res$design) <- res$betanames_df$grp
    res$response <- response[names(lres), ]
    if (verbose > 0) {
        message("Compute gene means")
    }
    res <- genemeans.presto(res, xpm = 1e+06)
    res$has_offset <- has_offset
    res$family <- family
    res$size_varname <- size_varname
    res$nsim <- nsim
    res$formula_str <- as.character(formula)
    return(res)
}

In [None]:
#These are internal functions i.e. unexported so we must access with :::
collapse_vecs <- presto:::collapse_vecs
collapse_mats <- presto:::collapse_mats
collapse_lres <- presto:::collapse_lres

In [None]:
        future::plan(multisession)


In [None]:
cluster_col_name<-"cosmx400.myeloid"
term_1<-paste0("1 + (1|", cluster_col_name, ")")
term_2<-c(paste0("(1|", cluster_col_name, ":SampleID)"), paste0("(1|SampleID)"), "offset(logUMI)")
formula_glmer_input<-createFormula("y", term_1, term_2)

sc.cells.idx<-integrated.myeloid$metadata %>% subset(source %in% "sc") %>% with(cellID)
effects_marginal_sc_ref<-find_glmer_markers_amp(
    integrated.myeloid$counts[, colnames(integrated.myeloid$counts) %in% sc.cells.idx],
    integrated.myeloid$metadata %>% subset(cellID %in% sc.cells.idx),
    formula_glmer_input, 
    cluster_col_name, 
    c("SampleID", cluster_col_name) 
)

In [None]:
cluster_col_name<-"Clust3.5"
term_1<-paste0("1 + (1|", cluster_col_name, ")")
term_2<-c(paste0("(1|", cluster_col_name, ":SampleID)"), paste0("(1|SampleID)"), "offset(logUMI)")
formula_glmer_input<-createFormula("y", term_1, term_2)

sc.cells.idx<-integrated.myeloid$metadata %>% subset(source %in% "sc") %>% with(cellID)
effects_marginal_sc_clust<-find_glmer_markers_amp(
    integrated.myeloid$counts[, colnames(integrated.myeloid$counts) %in% sc.cells.idx],
    integrated.myeloid$metadata %>% cbind(integrated.myeloid$Humap$clusters) %>% subset(cellID %in% sc.cells.idx),
    formula_glmer_input, 
    cluster_col_name, 
    c("SampleID", cluster_col_name) 
)

In [None]:
head(integrated.myeloid$metadata)

In [None]:
genes_use_corr<-rbind(effects_marginal_sc_ref, effects_marginal_sc_clust) %>% 
    subset(abs(logFC) > 0.5 & pvalue < 0.05) %>% 
    with(feature) %>% 
    unique
corr_mat_sc<-rbind(effects_marginal_sc_ref, effects_marginal_sc_clust) %>% 
    subset(feature %in% genes_use_corr) %>% 
    dplyr::select(contrast, feature, logFC) %>% 
    spread(contrast, logFC, fill = NA) %>% 
    column_to_rownames("feature") %>% 
    as.matrix %>% 
    cor 

In [None]:
sc_clust_names<-effects_marginal_sc_ref %>% with(contrast) %>% unique

.mat<-corr_mat_sc[sc_clust_names, ]
h<-.mat %>% 
    Heatmap(
        name = 'logFC',
        col = circlize::colorRamp2(c(-1, 0.1, 1), c(muted("blue"), "white", "red")),
        width = ncol(.mat)*unit(5, "mm"), 
        height = nrow(.mat)*unit(5, "mm"),
        column_names_gp = grid::gpar(fontsize = 10),
        row_names_gp = grid::gpar(fontsize = 10),
        column_title = stringr::str_wrap("Marker correlations b/w cosmx clusters and sc reference labels", 35),
        column_title_gp= grid::gpar(fontsize = 15),
        row_dend_side = "right",
        heatmap_legend_param = list(
            legend_direction = "horizontal", 
            legend_width = unit(6, "cm")
        ),
        use_raster = TRUE
        
)

fig.size(7, 12)
draw(h, heatmap_legend_side = "left")

In [None]:
integrated.myeloid$metadata %>% 
    cbind(integrated.myeloid$Humap$clusters) %>% 
    subset(source %in% "sc") %>% 
    with(table(cosmx400.myeloid, Clust3.5)) %>% 
    prop.table(1) %>% 
    prop.table(2)

### Confusion matrix

In [None]:
h<-integrated.myeloid$metadata %>% 
    cbind(integrated.myeloid$Humap$clusters) %>% 
    subset(source %in% "sc") %>% 
    with(table(cosmx400.myeloid, Clust3.5)) %>% 
    prop.table(1) %>% 
    prop.table(2) %>% 
    Heatmap(
        name = "Freq",
        column_title = stringr::str_wrap("Confusion matrix of unnamed sc cells vs sc reference annotations", width = 35),
        width = ncol(18)*unit(5, "mm"), 
        height = nrow(.mat)*unit(5, "mm"),
        # row_dend_side = "right",
        heatmap_legend_param = list(
            legend_direction = "horizontal", 
            legend_width = unit(6, "cm")
        )
    )
fig.size(7, 12)
draw(h, heatmap_legend_side = "bottom")

In [None]:
top20 <- effects_marginal_sc_clust %>% group_by(contrast) %>% slice_max(n = 20, order_by = logFC)

In [None]:
unique(top20$contrast)

In [None]:
top20[top20$contrast=="16",]

## Subset and recluster

In [None]:
subclustering <- subcluster_cells(integrated.myeloid$counts, integrated.myeloid$metadata, integrated.myeloid$Humap$snn, 
                             "Clust3.5", 4, c(0.2,0.4))

In [None]:
head(subclustering)

In [None]:
merge_clusters

In [None]:
head(integrated.myeloid$metadata)

In [None]:
merged.clusters <- merge_clusters(integrated.myeloid$metadata, subclustering, "Clust3.5", subclustering[[1]]$)

In [None]:
integrated.myeloid$metadata <- merged.clusters

In [None]:
head(merged.clusters)

In [None]:
subclustering <- subcluster_cells(integrated.myeloid$counts, integrated.myeloid$metadata, integrated.myeloid$Humap$snn, 
                             "Clust3.5", 19, 0.4)

In [None]:
subclustering

In [None]:
merged.clusters <- merge_clusters(integrated.myeloid$metadata, subclustering, "Clust3.5", "Clust0.4")

In [None]:
integrated.myeloid$metadata <- merged.clusters

In [None]:
head(merged.clusters)

In [None]:
subclustering <- subcluster_cells(integrated.myeloid$counts, integrated.myeloid$metadata, integrated.myeloid$Humap$snn, 
                             "Clust3.5", 30, c(0.2))

In [None]:
subclustering

In [None]:
head(integrated.myeloid$metadata)

In [None]:
merged.clusters <- merge_clusters(integrated.myeloid$metadata, subclustering, "Clust3.5", "Clust0.2")

In [None]:
integrated.myeloid$metadata <- merged.clusters

In [None]:
head(merged.clusters)

In [None]:
plot1 <- plot_dim_red(
        dim_red_embeddings = integrated.myeloid$Humap$embedding,
        metadata = integrated.myeloid$metadata, 
        cell_id_colname = "cellID",
        color_by = "Clust3.5",
        plot_title = paste0("UMAP after Harmony colored by celltype - ", integrated.myeloid$source[1], " genes"),
        dim_red_type = "UMAP"
    )

In [None]:
fig.size(7,20)
plot1

In [None]:
fig.size(10, 20)
plot1 + facet_wrap(~ color_col)

In [None]:
h<-integrated.myeloid$metadata %>% 
    cbind(integrated.myeloid$Humap$clusters) %>% 
    subset(source %in% "sc") %>% 
    with(table(cosmx400.myeloid, Clust3.5)) %>% 
    prop.table(1) %>% 
    prop.table(2) %>% 
    Heatmap(
        name = "Freq",
        column_title = stringr::str_wrap("Confusion matrix of unnamed sc cells vs sc reference annotations", width = 35),
        width = 40*unit(5, "mm"), 
        height = 20*unit(5, "mm"),
        # row_dend_side = "right",
        heatmap_legend_param = list(
            legend_direction = "horizontal", 
            legend_width = unit(6, "cm")
        )
    )
fig.size(7, 12)
draw(h, heatmap_legend_side = "bottom")

In [None]:
cluster_col_name<-"cosmx400.myeloid"
term_1<-paste0("1 + (1|", cluster_col_name, ")")
term_2<-c(paste0("(1|", cluster_col_name, ":SampleID)"), paste0("(1|SampleID)"), "offset(logUMI)")
formula_glmer_input<-createFormula("y", term_1, term_2)

sc.cells.idx<-integrated.myeloid$metadata %>% subset(source %in% "sc") %>% with(cellID)
effects_marginal_sc_ref<-find_glmer_markers_amp(
    integrated.myeloid$counts[, colnames(integrated.myeloid$counts) %in% sc.cells.idx],
    integrated.myeloid$metadata %>% subset(cellID %in% sc.cells.idx),
    formula_glmer_input, 
    cluster_col_name, 
    c("SampleID", cluster_col_name) 
)

In [None]:
cluster_col_name<-"Clust3.5"
term_1<-paste0("1 + (1|", cluster_col_name, ")")
term_2<-c(paste0("(1|", cluster_col_name, ":SampleID)"), paste0("(1|SampleID)"), "offset(logUMI)")
formula_glmer_input<-createFormula("y", term_1, term_2)

sc.cells.idx<-integrated.myeloid$metadata %>% subset(source %in% "sc") %>% with(cellID)
effects_marginal_sc_clust<-find_glmer_markers_amp(
    integrated.myeloid$counts[, colnames(integrated.myeloid$counts) %in% sc.cells.idx],
    integrated.myeloid$metadata %>% cbind(integrated.myeloid$Humap$clusters) %>% subset(cellID %in% sc.cells.idx),
    formula_glmer_input, 
    cluster_col_name, 
    c("SampleID", cluster_col_name) 
)

In [None]:
head(integrated.myeloid$metadata)

In [None]:
genes_use_corr<-rbind(effects_marginal_sc_ref, effects_marginal_sc_clust) %>% 
    subset(abs(logFC) > 0.5 & pvalue < 0.05) %>% 
    with(feature) %>% 
    unique
corr_mat_sc<-rbind(effects_marginal_sc_ref, effects_marginal_sc_clust) %>% 
    subset(feature %in% genes_use_corr) %>% 
    dplyr::select(contrast, feature, logFC) %>% 
    spread(contrast, logFC, fill = NA) %>% 
    column_to_rownames("feature") %>% 
    as.matrix %>% 
    cor 

In [None]:
top20 <- effects_marginal_cosmx %>% group_by(contrast) %>% slice_max(n = 20, order_by = logFC)

In [None]:
top20[top20$contrast=="10",]

In [None]:
sc_clust_names<-effects_marginal_sc_ref %>% with(contrast) %>% unique

.mat<-corr_mat_sc[sc_clust_names, ]
h<-.mat %>% 
    Heatmap(
        name = 'logFC',
        col = circlize::colorRamp2(c(-1, 0.1, 1), c(muted("blue"), "white", "red")),
        width = ncol(.mat)*unit(5, "mm"), 
        height = nrow(.mat)*unit(5, "mm"),
        column_names_gp = grid::gpar(fontsize = 10),
        row_names_gp = grid::gpar(fontsize = 10),
        column_title = stringr::str_wrap("Marker correlations b/w cosmx clusters and sc reference labels", 35),
        column_title_gp= grid::gpar(fontsize = 15),
        row_dend_side = "right",
        heatmap_legend_param = list(
            legend_direction = "horizontal", 
            legend_width = unit(6, "cm")
        ),
        use_raster = TRUE
        
)

fig.size(7, 12)
draw(h, heatmap_legend_side = "left")

## Fractions of cells from different sources in each cluster

In [None]:
integrated.myeloid$metadata %>% 
    with(table(Clust3.5, source)) %>% 
    as.data.frame %>% 
    mutate(Clust3.5 = as.character(Clust3.5)) %>% 
    spread(source, Freq) %>% 
    rename_at(vars(-Clust3.5), ~ paste0("ncells_", .x)) %>% 
    mutate(
        Freq_sc = ncells_sc/(ncells_sc + ncells_cosmx),
        Freq_cosmx = ncells_cosmx/(ncells_sc + ncells_cosmx)
    ) 

In [None]:
mat_ann<-t(.mat)
annotations_sc<-data.frame(cluster = rownames(mat_ann), type_harmony = colnames(mat_ann)[max.col(mat_ann)])

In [None]:
annotations_sc

In [None]:
fig.size(5, 20)
integrated.myeloid$metadata %>% 
    with(table(Clust3.5, source)) %>% 
    as.data.frame %>% 
    mutate(Clust3.5 = as.character(Clust3.5)) %>% # converting to char so that o
    spread(source, Freq) %>% 
    rename_at(vars(-Clust3.5), ~ paste0("ncells_", .x)) %>% 
    mutate(
        Freq_sc = ncells_sc/(ncells_sc + ncells_cosmx),
        Freq_cosmx = ncells_cosmx/(ncells_sc + ncells_cosmx)
    ) %>% 
    mutate(across(where(is.numeric), ~round(.x, 2))) %>% 
    gather(source, Freq, -c(Clust3.5, starts_with("ncells"))) %>% 
    # mutate(Clust1.5 = as.numeric(Clust1.5)) %>%
    dplyr::arrange(Clust3.5) %>% 
    ggplot() + 
        geom_col(aes(factor(Clust3.5), Freq, fill = source)) + 
        scale_fill_tableau() + 
        ggtitle("Fraction of cells from different technologies in each cluster") +
        theme(plot.title = element_textbox_simple()) + 
        NULL

In [None]:
availableCores()

In [None]:
annotations_sc %>%  
    arrange(type_harmony) %>% 
    group_by(type_harmony) %>% 
    summarise(clusters = paste(cluster, collapse = "_"))

In [None]:
annotations_sc

In [None]:
manual.anno.clust <- c("0" = "unmapped_0",
"1" = "SPP1+ STM",
"2" = "unmapped_2",
"3" = "FOLR2highCLEC10A+ STM",
"4_0" = "CD14highS100A12+ monoSTM",
"4_1" = "CD14highS100A12+ monoSTM",
"4_2" = "iDC3 CD14highCD163+",
"5" = "iDC3 CD14highCD163+",
"6" = "TREM2+LPL+ STM_6",
"7" = "TNF+ICAM1+ STM",
"8" = "DC2 CD1c+_doublet",
"9" = "unmapped_9",
"10" = "TREM2+ STM_10",
"11" = "unmapped_11",
"12" = "DC3 CD1clowCD163+",
"13" = "unmapped_13",
"14" = "SPP1+TREM2low STM",
"15" = "ISG15+CXCL10+ STM",
"16" = "FOLR2highLYVE1+ STM_16",
"17" = "TREM2+LPL+ STM_17",
"18" = "TREM2+ STM_18",
"19_0" = "DC2 CD1c+",
"19_1" = "DC2 CD1c+",
"19_2" = "DC2 CCR7+",
"20" = "FOLR2highEGR1+ STM",
"21" = "SPP1+ STM",
"22" = "BIRC5+ cycling STM",
"23" = "FOLR2highCLEC10A+ STM",
"24" = "FOLR2highLYVE1+ STM_24",
"25" = "CLEC10A+ATF3+ STM",
"26" = "DC3 CD1clowCD163+",
"27" = "FOLR2highLYVE1+ STM_27",
"28" = "unmapped_28",
"29" = "TREM2+LPL+ STM_29",
"30_1" = "DC1 CLEC9A+",
"30_0" = "DC2 CCR7+",
"31" = "TREM2+LPL+ STM_31",
"32" = "unmapped_32",
"33" = "FCGR3A+ monoSTM",
"34" = "BIRC5+ cycling STM",
"35" = "unmapped_35")

In [None]:
manual.anno <- c("0" = "unmapped_0",
"1" = "SPP1+ STM",
"2" = "unmapped_2",
"3" = "FOLR2highCLEC10A+ STM",
"4_0" = "CD14highS100A12+ monoSTM",
"4_1" = "CD14highS100A12+ monoSTM",
"4_2" = "iDC3 CD14highCD163+",
"5" = "iDC3 CD14highCD163+",
"6" = "TREM2+LPL+ STM",
"7" = "TNF+ICAM1+ STM",
"8" = "DC2 CD1c+",
"9" = "unmapped_9",
"10" = "TREM2+ STM",
"11" = "unmapped_11",
"12" = "DC3 CD1clowCD163+",
"13" = "unmapped_13",
"14" = "SPP1+TREM2low STM",
"15" = "ISG15+CXCL10+ STM",
"16" = "FOLR2highLYVE1+ STM",
"17" = "TREM2+LPL+ STM",
"18" = "TREM2+ STM",
"19_0" = "DC2 CD1c+",
"19_1" = "DC2 CD1c+",
"19_2" = "DC2 CD1c+CCR7+",
"20" = "FOLR2highEGR1+ STM",
"21" = "SPP1+ STM",
"22" = "BIRC5+ cycling STM",
"23" = "FOLR2highCLEC10A+ STM",
"24" = "FOLR2highLYVE1+ STM",
"25" = "CLEC10A+ATF3+ STM",
"26" = "DC3 CD1clowCD163+",
"27" = "FOLR2highLYVE1+ STM",
"28" = "unmapped_28",
"29" = "TREM2+LPL+ STM",
"30_1" = "DC1 CLEC9A+",
"30_0" = "DC2 CD1c+CCR7+",
"31" = "TREM2+LPL+ STM",
"32" = "unmapped_32",
"33" = "FCGR3A+ monoSTM",
"34" = "BIRC5+ cycling STM",
"35" = "unmapped_35")

# transfer labels

In [None]:
integrated.myeloid$metadata$type_harmony <- plyr::revalue(as.character(integrated.myeloid$metadata$Clust3.5),
                        manual.anno)

In [None]:
integrated.myeloid$metadata$type_harmony_clust <- plyr::revalue(as.character(integrated.myeloid$metadata$Clust3.5),
                        manual.anno.clust)

In [None]:
`%notin%` <- Negate(`%in%`)

In [None]:
unique(integrated.myeloid$metadata$type_harmony)

In [None]:
p<-plot_dim_red(
    integrated.myeloid$Humap$embedding,  
    clusters = NULL,
    integrated.myeloid$metadata, 
    "cellID", 
    color_by = "type_harmony", 
    "UMAP after Harmony", 
    "UMAP"
)

In [None]:
fig.size(5, 15)
p

In [None]:
fig.size(5, 12)
p + facet_wrap(~ source)

In [None]:
fig.size(30, 30)
p + facet_wrap(~ color_col)

## correlations b/w sc reference and newly annotated cosmx cells

In [None]:
integrated.myeloid$metadata<-integrated.myeloid$metadata %>% 
    separate(SampleFOV, c("SampleID", "FOV"), remove = FALSE, sep="_(?=[^_]+$)")

In [None]:
integrated.myeloid$metadata<-integrated.myeloid$metadata %>% 
    cbind(integrated.myeloid$Humap$clusters)

In [None]:
colnames(integrated.myeloid$metadata)

In [None]:
cluster_col_name<-"Clust3.5"
term_1<-paste0("1 + (1|", cluster_col_name, ")")
term_2<-c(paste0("(1|SampleFOV/", cluster_col_name, ")"), paste0("(1|SampleID/", cluster_col_name, ")"), "offset(logUMI)")
formula_glmer_input<-createFormula("y", term_1, term_2)
cosmx.cells.idx<-integrated.myeloid$metadata %>% subset(source %in% "cosmx") %>% with(cellID)
effects_marginal_cosmx<-find_glmer_markers(
    integrated.myeloid$counts[, colnames(integrated.myeloid$counts) %in% cosmx.cells.idx],
    integrated.myeloid$metadata %>% cbind(integrated.myeloid$Humap$clusters) %>% subset(cellID %in% cosmx.cells.idx),
    formula_glmer_input, 
    cluster_col_name, 
    c("SampleID", "FOV", cluster_col_name) 
)

In [None]:
cluster_col_name<-"type_harmony"
term_1<-paste0("1 + (1|", cluster_col_name, ")")
term_2<-c(paste0("(1|SampleFOV/", cluster_col_name, ")"), paste0("(1|SampleID/", cluster_col_name, ")"), "offset(logUMI)")
formula_glmer_input<-createFormula("y", term_1, term_2)

cosmx.cells.idx<-integrated.myeloid$metadata %>% subset(source %in% "cosmx") %>% with(cellID)
effects_marginal_cosmx<-find_glmer_markers(
    integrated.myeloid$counts[, colnames(integrated.myeloid$counts) %in% cosmx.cells.idx],
    integrated.myeloid$metadata %>% cbind(integrated.myeloid$Humap$clusters) %>% subset(cellID %in% cosmx.cells.idx),
    formula_glmer_input, 
    cluster_col_name, 
    c("SampleID", "FOV", cluster_col_name) 
)

In [None]:
genes_use_corr<-rbind(effects_marginal_sc_ref, effects_marginal_sc_clust) %>% 
    subset(abs(logFC) > 0.5 & pvalue < 0.05) %>% 
    with(feature) %>% 
    unique
corr_mat_sc<-rbind(effects_marginal_sc_ref, effects_marginal_sc_clust) %>% 
    subset(feature %in% genes_use_corr) %>% 
    dplyr::select(contrast, feature, logFC) %>% 
    spread(contrast, logFC, fill = NA) %>% 
    column_to_rownames("feature") %>% 
    as.matrix %>% 
    cor 

In [None]:
seurat.myeloid <- CreateSeuratObject(counts=integrated.myeloid$counts,
                                     data=integrated.myeloid$logcpx)

In [None]:
colnames(integrated.myeloid$metadata)

In [None]:
integrated.myeloid$metadata$cosmx.clusters <- integrated.myeloid$metadata$type_harmony_clust

In [None]:
colnames(integrated.myeloid$metadata)

In [None]:
meta <- as.data.frame(integrated.myeloid$metadata) %>% dplyr::select(SampleID, SampleFOV, cosmx.clusters, cellID) 

In [None]:
seurat.myeloid <- Seurat::AddMetaData(seurat.myeloid, meta)

In [None]:
Idents(seurat.myeloid) <- "cosmx.clusters"

In [None]:
rownames(integrated.myeloid$counts)

In [None]:
fig.size(10, 20)
VlnPlot(seurat.myeloid, "CD209", cols=rainbow(n=length(unique(Idents(seurat.myeloid)))), slot="data")

In [None]:
unique(effects_marginal_cosmx$contrast)

In [None]:
top20 <- effects_marginal_cosmx %>% group_by(contrast) %>% slice_max(n = 20, order_by = logFC)

In [None]:
top20[top20$contrast=="unmapped_2",]

In [None]:
top20[top20$contrast=="DC2 CD1c+_doublet",]

In [None]:
top20[top20$contrast=="DC2 CD1c+",]

In [None]:
# markers of sc cell clusters - annotated based on the marker correlations with sc ref
cluster_col_name<-"Clust3.5"
term_1<-paste0("1 + (1|", cluster_col_name, ")")
term_2<-c(paste0("(1|", cluster_col_name, ":SampleID)"), paste0("(1|SampleID)"), "offset(logUMI)")
formula_glmer_input<-createFormula("y", term_1, term_2)

sc.cells.idx<-integrated.myeloid$metadata %>% subset(source %in% "sc") %>% with(cellID)
effects_marginal_sc_clust_ann<-find_glmer_markers_amp(
    integrated.myeloid$counts[, colnames(integrated.myeloid$counts) %in% sc.cells.idx],
    integrated.myeloid$metadata %>% cbind(integrated.myeloid$Humap$clusters) %>% subset(cellID %in% sc.cells.idx),
    formula_glmer_input, 
    cluster_col_name, 
    c("SampleID", cluster_col_name) 
)

In [None]:
# markers of sc cell clusters - annotated based on the marker correlations with sc ref
cluster_col_name<-"type_harmony"
term_1<-paste0("1 + (1|", cluster_col_name, ")")
term_2<-c(paste0("(1|", cluster_col_name, ":SampleID)"), paste0("(1|SampleID)"), "offset(logUMI)")
formula_glmer_input<-createFormula("y", term_1, term_2)

sc.cells.idx<-integrated.myeloid$metadata %>% subset(source %in% "sc") %>% with(cellID)
effects_marginal_sc_clust_ann<-find_glmer_markers_amp(
    integrated.myeloid$counts[, colnames(integrated.myeloid$counts) %in% sc.cells.idx],
    integrated.myeloid$metadata %>% cbind(integrated.myeloid$Humap$clusters) %>% subset(cellID %in% sc.cells.idx),
    formula_glmer_input, 
    cluster_col_name, 
    c("SampleID", cluster_col_name) 
)

In [None]:
corr_sc_cosmx<-rbind(
    effects_marginal_sc_ref %>% mutate(contrast = paste0(contrast, "_sc")), 
    effects_marginal_cosmx %>% mutate(contrast = paste0(contrast, "_cosmx"))
) %>% 
    subset(feature %in% genes_use_corr) %>% 
    dplyr::select(contrast, feature, logFC) %>% 
    spread(contrast, logFC, fill = NA) %>% 
    column_to_rownames("feature") %>% 
    as.matrix %>% 
    cor %>% 
    identity

sc_ref_clusters<-effects_marginal_sc_ref %>% mutate(contrast = paste0(contrast, "_sc")) %>% with(contrast) %>% unique

.mat<-corr_sc_cosmx[sc_ref_clusters, ]
h<-.mat %>% 
    Heatmap(
        name = 'logFC',
        col = circlize::colorRamp2(c(-1, 0.1, 1), c(muted("blue"), "white", "red")),
        width = ncol(.mat)*unit(6, "mm"), 
        height = nrow(.mat)*unit(6, "mm"),
        cluster_columns = FALSE,
        column_names_gp = grid::gpar(fontsize = 10),
        row_names_gp = grid::gpar(fontsize = 10),
        column_title = stringr::str_wrap("Marker correlations b/w cosmx clusters and sc reference labels", 35),
        column_title_gp= grid::gpar(fontsize = 15),
        # heatmap_legend_param = list(
        #     legend_direction = "horizontal", 
        #     legend_width = unit(6, "cm")
        # ),
        use_raster = TRUE,
        cell_fun = function(j, i, x, y, width, height, fill) {
            grid.text(sprintf("%.1f", .mat[i, j]), x, y, gp = gpar(fontsize = 5))
            }
        
)

In [None]:
fig.size(10, 18)
draw(h, heatmap_legend_side = "left")

In [None]:
top20 <- effects_marginal_cosmx %>% group_by(contrast) %>% slice_max(n = 20, order_by = logFC)

In [None]:
corr_sc_cosmx<-rbind(
    effects_marginal_sc_ref %>% mutate(contrast = paste0(contrast, "_sc")), 
    effects_marginal_sc_clust_ann %>% mutate(contrast = paste0(contrast, "_sc_ann")), 
    effects_marginal_cosmx %>% mutate(contrast = paste0(contrast, "_cosmx"))
) %>% 
    subset(feature %in% genes_use_corr) %>% 
    dplyr::select(contrast, feature, logFC) %>% 
    spread(contrast, logFC, fill = NA) %>% 
    column_to_rownames("feature") %>% 
    as.matrix %>% 
    cor %>% 
    identity

sc_ref_clusters<-effects_marginal_sc_ref %>% mutate(contrast = paste0(contrast, "_sc")) %>% with(contrast) %>% unique

.mat<-corr_sc_cosmx[sc_ref_clusters, ]
h<-.mat %>% 
    Heatmap(
        name = 'logFC',
        col = circlize::colorRamp2(c(-1, 0.1, 1), c(muted("blue"), "white", "red")),
        width = ncol(.mat)*unit(6, "mm"), 
        height = nrow(.mat)*unit(6, "mm"),
        cluster_columns = FALSE,
        column_names_gp = grid::gpar(fontsize = 10),
        row_names_gp = grid::gpar(fontsize = 10),
        column_title = stringr::str_wrap("Marker correlations b/w cosmx clusters and sc reference labels", 35),
        column_title_gp= grid::gpar(fontsize = 15),
        # heatmap_legend_param = list(
        #     legend_direction = "horizontal", 
        #     legend_width = unit(6, "cm")
        # ),
        use_raster = TRUE,
        cell_fun = function(j, i, x, y, width, height, fill) {
            grid.text(sprintf("%.1f", .mat[i, j]), x, y, gp = gpar(fontsize = 5))
            }
        
)

In [None]:
fig.size(10, 18)
draw(h, heatmap_legend_side = "left")

In [None]:
# markers of sc cell clusters - annotated based on the marker correlations with sc ref
cluster_col_name<-"type_harmony_clust"
term_1<-paste0("1 + (1|", cluster_col_name, ")")
term_2<-c(paste0("(1|SampleFOV/", cluster_col_name, ")"), paste0("(1|SampleID/", cluster_col_name, ")"), "offset(logUMI)")
formula_glmer_input<-createFormula("y", term_1, term_2)
cosmx.cells.idx<-integrated.myeloid$metadata %>% subset(source %in% "cosmx") %>% with(cellID)
effects_marginal_cosmx<-find_glmer_markers(
    integrated.myeloid$counts[, colnames(integrated.myeloid$counts) %in% cosmx.cells.idx],
    integrated.myeloid$metadata %>% cbind(integrated.myeloid$Humap$clusters) %>% subset(cellID %in% cosmx.cells.idx),
    formula_glmer_input, 
    cluster_col_name, 
    c("SampleID", "FOV", cluster_col_name) 
)

In [None]:
top20 <- effects_marginal_cosmx %>% group_by(contrast) %>% slice_max(n = 20, order_by = logFC)

In [None]:
top20[top20$contrast=="FOLR2highLYVE1+ STM_3",]

In [None]:
top20[top20$contrast=="FOLR2highLYVE1+ STM_16",]

In [None]:
top20[top20$contrast=="FOLR2highLYVE1+ STM_24",]

In [None]:
top20[top20$contrast=="FOLR2highLYVE1+ STM_27",]

In [None]:
integrated.myeloid$metadata$total.counts <- colSums(integrated.myeloid$counts)
table <- data.frame(integrated.myeloid$metadata$type_harmony, 
                    integrated.myeloid$metadata$total.counts, 
                    integrated.myeloid$metadata$source)
colnames(table) <- c("cluster","counts", "source")
ggplot(table, aes(x=cluster, y=counts)) + 
  geom_bar(stat = "identity") + theme(axis.text.x=element_text(angle=90)) + facet_wrap(~source)

In [None]:
head(integrated.myeloid$metadata)

In [None]:
integrated.myeloid$metadata$type_harmony_clean <- NULL

In [None]:
integrated.myeloid$metadata$type_harmony_clust <- plyr::revalue(as.character(integrated.myeloid$metadata$type_harmony_clust),
                        c("unmapped_0"="FOLR2highLYVE1+ STM", "unmapped_11"="FOLR2+ STM/FLS doublet", 
                          "unmapped_13"="DC3/FLS doublet", "unmapped_28"="FOLR2+ STM/FLS/Plasma doublet", 
                          "unmapped_32"="FOLR2highCLEC10A+ STM", "unmapped_35"="FOLR2+ STM/Bcell doublet",
                          "unmapped_9"="HBA1+CLEC9A+ DC1", "unmapped_2"="TGFB2+BIRC5+ STM","FOLR2highLYVE1+ STM_3"="FOLR2highCLEC10A+ STM"))
    

In [None]:
integrated.myeloid$metadata$type_harmony_clean <- plyr::revalue(as.character(integrated.myeloid$metadata$type_harmony_clust),
                        c("unmapped_0"="FOLR2highLYVE1+ STM", "unmapped_11"="FOLR2+ STM/FLS doublet", 
                          "unmapped_13"="DC3/FLS doublet", "unmapped_28"="FOLR2+ STM/FLS/Plasma doublet", 
                          "unmapped_32"="FOLR2highCLEC10A+ STM", "unmapped_35"="FOLR2+ STM/Bcell doublet",
                          "unmapped_9"="HBA1+CLEC9A+ DC1", "unmapped_2"="TGFB2+BIRC5+ STM","FOLR2highLYVE1+ STM_3"="FOLR2highCLEC10A+ STM"))
    

In [None]:
unique(integrated.myeloid$metadata$type_harmony_clean)

In [None]:
integrated.myeloid$metadata$type_harmony_clean <- plyr::revalue(as.character(integrated.myeloid$metadata$type_harmony_clean),
                        c( "DC2 CD1c+_doublet"="DC2 CD1c+"))
    

In [None]:
unique(integrated.myeloid$metadata$type_harmony_clean)

In [None]:
integrated.myeloid$metadata$type_harmony_clust_clean <- plyr::revalue(as.character(integrated.myeloid$metadata$type_harmony_clust),
                        c("unmapped_0"="FOLR2highLYVE1+ STM", "unmapped_11"="FOLR2+ STM/FLS doublet", 
                          "unmapped_13"="DC3/FLS doublet", "unmapped_28"="FOLR2+ STM/FLS/Plasma doublet", 
                          "unmapped_32"="FOLR2highCLEC10A+ STM", "unmapped_35"="FOLR2+ STM/Bcell doublet",
                          "unmapped_9"="HBA1+CLEC9A+ DC1", "unmapped_2"="TGFB2+BIRC5+ STM"))
    

In [None]:
integrated.myeloid$metadata$type_harmony_clean <- plyr::revalue(as.character(integrated.myeloid$metadata$type_harmony_clust_clean),
                        c("TREM2+LPL+ STM_6"="TREM2+LPL+",
                          "TREM2+ STM_10"="TREM2+ STM",
                          "FOLR2highLYVE1+ STM_16"="FOLR2highLYVE1+ STM",
                          "TREM2+LPL+ STM_17"="TREM2+LPL+",
                          "TREM2+ STM_18"= "TREM2+ STM",
                          "FOLR2highLYVE1+ STM_24"="FOLR2highLYVE1+ STM",
                          "FOLR2highLYVE1+ STM_27"="FOLR2highLYVE1+ STM",
                          "TREM2+LPL+ STM_29"="TREM2+LPL+",
                          "TREM2+LPL+ STM_31"="TREM2+LPL+",
                         "DC2 CD1c+_doublet"="DC2 CD1c+"))
    

In [None]:
unique(integrated.myeloid$metadata$type_harmony_clean)

In [None]:
table(integrated.myeloid$metadata$type_harmony_clean)

In [None]:
integrated.myeloid$metadata$Clust1.5 <- NULL
integrated.myeloid$metadata$Clust2.5 <- NULL
integrated.myeloid$metadata$Clust3.5 <- NULL

In [None]:
p<-plot_dim_red(
    integrated.myeloid$Humap$embedding, 
    integrated.myeloid$Humap$clusters, 
    integrated.myeloid$metadata, 
    "cellID", 
    color_by = "type_harmony_clean", 
    "UMAP after Harmony", 
    "UMAP"
)
p

In [None]:
saveRDS(integrated.myeloid, "./cache/integrated.myeloid.renamed.rds")

In [None]:
integrated.myeloid <- readRDS("./cache/integrated.myeloid.renamed.rds")

## Where are the clusters in space?

In [None]:
cellgeoms<-readRDS("/datastore/lucy/CosMx/cache/raw/Glasgow_cellgeoms_QC.RDS")

In [None]:
head(cellgeoms)

In [None]:
sampleFOVcondition <- read.table("/datastore/lucy/CosMx/cache/SampleFOV-condition.csv", header=TRUE, sep=",")

In [None]:
cosmx.cell.ids<-integrated.myeloid$metadata %>% 
    subset(source %in% "cosmx") %>% 
    with(cellID)

In [None]:
head(integrated.myeloid$metadata)

In [None]:
rownames(integrated.myeloid$metadata) <- integrated.myeloid$metadata$cellID

In [None]:
niche_anno <-readRDS("./cache/tissueSegmentation/spatialObjregions.RDS")$metadata
niche_anno<-niche_anno %>% dplyr::select(cellID,highres.regions) 
head(niche_anno)

In [None]:
cellgeoms <- cellgeoms %>% left_join(niche_anno, by = "cellID")

In [None]:
#Isolate cosmx data now its annotated
myeloid.anno.cosmx <- list()
myeloid.anno.cosmx$counts <- integrated.myeloid$counts[,cosmx.cell.ids]
myeloid.anno.cosmx$metadata <- integrated.myeloid$metadata[cosmx.cell.ids,]

In [None]:
cellgeoms <- merge(cellgeoms, sampleFOVcondition, all=FALSE)

In [None]:
cellgeoms$SampleFOV <- paste0(cellgeoms$Condition,"_",cellgeoms$SampleFOV)

In [None]:
unique(cellgeoms$type_harmony_clust)

In [None]:
col_pal <- rainbow(n=length(unique(cellgeoms$type_harmony_clean)))

In [None]:
library(ggdark)
fig.size(6, 8)
cellgeoms %>% 
    with(SampleFOV) %>%  
    unique %>% 
    # head(1) %>% 
    purrr::map(~ cellgeoms %>% 
        tidyr::unite("SampleFOV", c("Condition","SampleID", "FOV")) %>% 
        # subset(!grepl("Doublet*|Low_Quality", celltype.coarse)) %>% 
        subset(SampleFOV %in% .x) %>% 
        ggplot() + 
            geom_sf(aes(geometry = centroid, fill = type_harmony_clean), linewidth = 0.2) + 
            scale_fill_manual(values = col_pal, na.value = "black", na.translate = TRUE) + 
            ggtitle(.x) + dark_theme_classic() 
    )


## cell fractions

In [None]:
head(integrated.myeloid$metadata)

In [None]:
head(sampleFOVcondition)

In [None]:
integrated.myeloid$metadata <- integrated.myeloid$metadata %>% 
left_join(dplyr::select(sampleFOVcondition, c("SampleFOV","Condition","PatientID")), by = "SampleFOV")

In [None]:
head(integrated.myeloid$metadata)

In [None]:
integrated.myeloid$metadata$SampleFOV <- paste0(integrated.myeloid$metadata$Condition,"_",integrated.myeloid$metadata$SampleFOV)

In [None]:
num_clust <- length(unique(integrated.myeloid$metadata$type_harmony_clean))

In [None]:
fig.size(10, 25)
.vals<-unique(integrated.myeloid$metadata$type_harmony_clean)
.vals<-.vals[!is.na(.vals)]
.pal<-colorRampPalette(tableau_color_pal('Tableau 20')(20))(length(.vals))
names(.pal)<-.vals

integrated.myeloid$metadata %>% 
    filter(source == "cosmx") %>% 
    with(table(type_harmony_clean, SampleFOV)) %>% 
    prop.table(margin = 2) %>% 
    as.data.frame %>% 
    separate(SampleFOV, c("SampleID", "FOV"), remove = FALSE, sep="_(?=[^_]+$)") %>% 
    ggplot() + 
        geom_col(aes(x = FOV, y = Freq, fill = type_harmony_clean)) + 
        scale_fill_manual(values = .pal) +
        guides(x =  guide_axis(angle = 90)) +
        ggtitle(paste0("Abundance of cells across Samples")) + 
        facet_wrap(~ SampleID, scales = "free_x") + 
        NULL
fig.size(10, 25)
integrated.myeloid$metadata %>% 
    subset(source %in% "cosmx") %>% 
    with(table(type_harmony_clean, SampleFOV)) %>%
    as.data.frame %>% 
    separate(SampleFOV, c("SampleID", "FOV"), remove = FALSE, sep="_(?=[^_]+$)") %>% 
    # left_join(integrated.tcell$metadata %>% subset(source %in% "cosmx") %>% dplyr::select(SampleID, SampleFOV, FOV), by = "SampleFOV") %>% 
    ggplot() + 
        geom_col(aes(x = FOV, y = Freq, fill = type_harmony_clean)) + 
        scale_fill_manual(values = .pal) +
        guides(x =  guide_axis(angle = 90)) +
        ggtitle(paste0("Abundance of cells across Samples")) + 
        facet_wrap(~ SampleID, scales = "free") + 
        NULL



In [None]:
integrated.myeloid$metadata %>% 
    subset(source %in% "cosmx") %>% 
    with(table(type_harmony_clean, SampleFOV, PatientID)) %>%
    as.data.frame

## add data to cache

In [None]:
integrated.myeloid$effects_marginal_sc_ref<-effects_marginal_sc_ref
integrated.myeloid$effects_marginal_sc_clust<-effects_marginal_sc_clust
integrated.myeloid$effects_marginal_sc_clust_ann<-effects_marginal_sc_clust_ann
integrated.myeloid$effects_marginal_cosmx_coarse<-effects_marginal_cosmx

In [None]:
head(integrated.myeloid$effects_marginal_cosmx_coarse)

## Cleaning up labels and re-calculating correlations

+ I would like to preserve the labels above and also merge all the unmapped and predominantly cosmx clusters appropriately

In [None]:
unique(integrated.myeloid$metadata$type_harmony_clean)

In [None]:
fig.size(5, 8)

left_join(
    table(integrated.myeloid$metadata$type_harmony) %>% as.data.frame,
    table(integrated.myeloid$metadata$type_harmony_clean) %>% as.data.frame,
    by = "Var1",
    suffix = c("_before_merge", "_after_merge")
) %>% 
    mutate_if(is.numeric, ~replace(., is.na(.), 0)) %>% 
    # gather(source, Freq, -(Var1)) %>% 
    ggplot(aes(Freq_before_merge, Freq_after_merge)) + 
        geom_point(aes(color = Var1)) + 
        geom_abline(slope = 1) + 
        # scale_color_tableau() + 
        ggtitle("Cell numbers before and after merge are the same")

In [None]:
h<-integrated.myeloid$metadata %>% 
    subset(source %in% "sc") %>% 
    with(table(cosmx400.myeloid, type_harmony_clean)) %>% 
    prop.table(1) %>% 
    prop.table(2) %>% 
    Heatmap(
        name = "Freq",
        column_title = stringr::str_wrap("Confusion matrix of sc cells in new integrated clusters vs sc reference annotations", width = 35),
        width = 40*unit(5, "mm"), 
        height = 20*unit(5, "mm"),
        # row_dend_side = "right",
        heatmap_legend_param = list(
            legend_direction = "horizontal", 
            legend_width = unit(6, "cm")
        )
    )
fig.size(7, 12)
draw(h, heatmap_legend_side = "bottom")

## correlations b/w sc reference and newly annotated cosmx cells

In [None]:
cluster_col_name<-"type_harmony_clean"
term_1<-paste0("1 + (1|", cluster_col_name, ")")
term_2<-c(paste0("(1|SampleFOV/", cluster_col_name, ")"), paste0("(1|SampleID/", cluster_col_name, ")"), "offset(logUMI)")
formula_glmer_input<-createFormula("y", term_1, term_2)

cosmx.cells.idx<-integrated.myeloid$metadata %>% subset(source %in% "cosmx") %>% with(cellID)
effects_marginal_cosmx_clean<-find_glmer_markers(
    integrated.myeloid$counts[, colnames(integrated.myeloid$counts) %in% cosmx.cells.idx],
    integrated.myeloid$metadata %>% cbind(integrated.myeloid$Humap$clusters) %>% subset(cellID %in% cosmx.cells.idx),
    formula_glmer_input, 
    cluster_col_name, 
    c("SampleID", "FOV", cluster_col_name) 
)

In [None]:
unique(effects_marginal_cosmx_clean$contrast)

In [None]:
effects_marginal_cosmx_clean$contrast <- paste0("Myeloid_",effects_marginal_cosmx_clean$contrast)

In [None]:
myeloid.clust <- c('Myeloid_DC1 CLEC9A+',
                                 'Myeloid_DC2 CD1c+',
                                 'Myeloid_DC2 CCR7+',
                                 'Myeloid_DC3 CD1clowCD163+',
                                 'Myeloid_iDC3 CD14highCD163+',
                                 'Myeloid_FOLR2highCLEC10A+ STM',
                                 'Myeloid_DC2 CD1c+_doublet',
                                 
                                 'Myeloid_CLEC10A+ATF3+ STM',
                                 'Myeloid_FOLR2highEGR1+ STM',
                                 'Myeloid_FOLR2highLYVE1+ STM',
                                 'Myeloid_TGFB2+BIRC5+ STM',
                                 'Myeloid_BIRC5+ cycling STM',
                                 'Myeloid_TREM2+ STM',
                                 'Myeloid_TREM2+LPL+ STM',
                                 'Myeloid_SPP1+TREM2low STM',
                                 'Myeloid_SPP1+ STM',
                                 'Myeloid_TNF+ICAM1+ STM',
                                 'Myeloid_ISG15+CXCL10+ STM',
                                 'Myeloid_CD14highS100A12+ monoSTM',
                                 'Myeloid_FCGR3A+ monoSTM')

In [None]:
effects_marginal_cosmx_clean_sub <- effects_marginal_cosmx_clean[which(effects_marginal_cosmx_clean$contrast%in%myeloid.clust),]

In [None]:
effects_marginal_cosmx_clean_sub %>% 
    group_by(contrast) %>% 
    filter(pvalue < 1e-02) %>% 
    filter(logFC > 0.5) %>% 
    top_n(n = 20, wt = logFC) %>% 
    mutate(rank = rank(-logFC)) %>% 
    ungroup %>% 
    # dplyr::mutate(feature = paste0(feature, "_", logFC)) %>% 
    dplyr::select(contrast, feature, rank) %>% 
    spread(contrast, feature, fill = NA)

In [None]:
effects_marginal_cosmx_clean_sub

In [None]:
# pick signficant genes to show in the heatmap
# arrange the genes such that the gene with max logFC is at the center and values decrease as we move away from center
genes_use_df<-data.table(effects_marginal_cosmx_clean_sub)[
    pvalue < 0.05][
    , .SD[order(-zscore)][1, ], by = feature][
    , .SD[order(-logFC)][1:20, ], by = contrast][
    , .SD[!is.na(feature)]][
    , .SD[order(logFC)], by = contrast][
    , ":=" (len = .N, rank = frank(logFC)), by = contrast][
    , .SD[c(seq(1, round(.SD$len[1]/3), 1), seq(.SD$len[1], round(.SD$len[1]/3)+1, -1)), ], by = contrast][
    , .(feature, logFC, zscore, contrast, pvalue) # the genes in this list will be unique because I assigned one z-score to each gene
]

In [None]:

message("Checking that the genes are unique: ", nrow(genes_use_df) == length(unique(genes_use_df$feature)))
message("Number of genes in heatmap: ", nrow(genes_use_df))

# select the top 3 genes with highest logFC in each cluster and assign each gene to a cell type based on it logFC - the gene belongs to the celltype with higher logFC
genes_to_mark_celltypes<-genes_use_df[, .SD[order(-logFC)][1:5, ], by = contrast][
    , .(contrast, feature)
]
genes_to_mark_celltypes<-genes_to_mark_celltypes %>% 
    # subset(! contrast %in% c("Neutrophils", "T cells")) %>% 
    rbind(genes_use_df[feature %in% c("MPO", "CYSTM1", "EFNA4"), .(contrast, feature)]) %>% 
    rbind(genes_use_df[feature %in% c("CD3G", "CD3E", "IL32"), .(contrast, feature)])

.mat<-effects_marginal_cosmx_clean_sub %>% 
    subset(feature %in% genes_use_df$feature) %>% 
    dplyr::select(cluster, logFC, feature) %>% 
    tidyr::spread(cluster, logFC) %>% 
    tibble::column_to_rownames('feature') %>% 
    as.matrix() %>% t() 
.mat<-.mat[, genes_use_df$feature]

message(
    "Are all genes that are being marked also present in the matrix?: ", 
    length(setdiff(genes_to_mark_celltypes$feature, colnames(.mat))) == 0
)

genes_idx<-data.frame(idx = which(colnames(.mat) %in% genes_to_mark_celltypes$feature)) %>% 
    mutate(feature = colnames(.mat)[idx])

genes_to_mark_celltypes<-genes_to_mark_celltypes %>% left_join(genes_idx, by = "feature")

In [None]:

unique()
cols <- 
names(cols)<-char(levels(effects_marginal_cosmx_clean))
message("Colors being used: ")
print(cols)
message("Check if any cell types above are not getting a color because of a smaller palette")


fig.size(5, 15)
ha<-columnAnnotation(
    col_ann = anno_mark(
        at = genes_to_mark_celltypes$idx,
        side = "bottom",
        labels = genes_to_mark_celltypes$feature)
    )

ha2<-columnAnnotation(show_legend = FALSE,
    celltypes = genes_use_df$contrast,
    col = list(celltypes = cols)
    )

# ha2<-columnAnnotation(
#     celltypes = anno_block(
#         gp = gpar(fill = col_pal), 
#         labels = genes_use_df$contrast %>% unique

# ))
split<-genes_use_df$contrast
colnames(.mat)<-NULL
.mat %>% 
    Heatmap(
        name = 'logFC',
        col = circlize::colorRamp2(c(0, 0.1, 3), c('white', 'white', muted('blue'))),
        # column_names_gp = grid::gpar(fontsize = 10),
        row_dend_side = "right",
        cluster_rows = FALSE,
        cluster_columns = FALSE,
        top_annotation = ha2, 
        # row_km = 12,
        bottom_annotation = ha,
        column_split = split
    )

In [None]:
# markers of sc cell clusters - annotated based on the marker correlations with sc ref
cluster_col_name<-"type_harmony_clean"
term_1<-paste0("1 + (1|", cluster_col_name, ")")
term_2<-c(paste0("(1|", cluster_col_name, ":SampleID)"), paste0("(1|SampleID)"), "offset(logUMI)")
formula_glmer_input<-createFormula("y", term_1, term_2)

sc.cells.idx<-integrated.myeloid$metadata %>% subset(source %in% "sc") %>% with(cellID)
effects_marginal_sc_ann_clean<-find_glmer_markers_amp(
    integrated.myeloid$counts[, colnames(integrated.myeloid$counts) %in% sc.cells.idx],
    integrated.myeloid$metadata %>% cbind(integrated.myeloid$Humap$clusters) %>% subset(cellID %in% sc.cells.idx),
    formula_glmer_input, 
    cluster_col_name, 
    c("SampleID", cluster_col_name) 
)

In [None]:
genes_use_corr<-rbind(effects_marginal_sc_ref, effects_marginal_cosmx_clean) %>% 
    subset(abs(logFC) > 0.5 & pvalue < 0.05) %>% 
    with(feature) %>% 
    unique

In [None]:
corr_sc_cosmx<-rbind(
    effects_marginal_sc_ref %>% mutate(contrast = paste0(contrast, "_sc")), 
    effects_marginal_cosmx_clean %>% mutate(contrast = paste0(contrast, "_cosmx"))
) %>% 
    subset(feature %in% genes_use_corr) %>% 
    dplyr::select(contrast, feature, logFC) %>% 
    spread(contrast, logFC, fill = NA) %>% 
    column_to_rownames("feature") %>% 
    as.matrix %>% 
    cor %>% 
    identity

sc_ref_clusters<-effects_marginal_sc_ref %>% mutate(contrast = paste0(contrast, "_sc")) %>% with(contrast) %>% unique

.mat<-corr_sc_cosmx[sc_ref_clusters, ]
h<-.mat %>% 
    Heatmap(
        name = 'logFC',
        col = circlize::colorRamp2(c(-1, 0.1, 1), c(muted("blue"), "white", "red")),
        width = ncol(.mat)*unit(6, "mm"), 
        height = nrow(.mat)*unit(6, "mm"),
        cluster_columns = FALSE,
        column_names_gp = grid::gpar(fontsize = 10),
        row_names_gp = grid::gpar(fontsize = 10),
        column_title = stringr::str_wrap("Marker correlations b/w cosmx clusters and sc reference labels", 35),
        column_title_gp= grid::gpar(fontsize = 15),
        # heatmap_legend_param = list(
        #     legend_direction = "horizontal", 
        #     legend_width = unit(6, "cm")
        # ),
        use_raster = TRUE,
        cell_fun = function(j, i, x, y, width, height, fill) {
            grid.text(sprintf("%.1f", .mat[i, j]), x, y, gp = gpar(fontsize = 5))
            }
        
)

In [None]:
fig.size(10, 18)
vp<-grid::viewport(width = unit(1, "inch"), height = unit(1, "inch"))
pushViewport(vp)
draw(h, heatmap_legend_side = "left")

In [None]:
p<-plot_dim_red(
    integrated.myeloid$Humap$embedding, 
    integrated.myeloid$Humap$clusters, 
    integrated.myeloid$metadata, 
    "cellID", 
    color_by = "type_harmony_clean", 
    "UMAP after Harmony", 
    "UMAP"
)

In [None]:
fig.size(5, 12)
p + facet_wrap(~ source)

## cell fractions

In [None]:
fig.size(6, 25)
.vals<-unique(integrated.myeloid$metadata$type_harmony_clean)
.vals<-.vals[!is.na(.vals)]
.pal<-colorRampPalette(tableau_color_pal('Tableau 10')(10))(length(.vals))
names(.pal)<-.vals

integrated.myeloid$metadata %>% 
    filter(source == "cosmx") %>% 
    with(table(type_harmony_clean, SampleFOV)) %>% 
    prop.table(margin = 2) %>% 
    as.data.frame %>% 
    ggplot() + 
        geom_col(aes(x = SampleFOV, y = Freq, fill = type_harmony_clean)) + 
        scale_fill_manual(values = .pal) +
        guides(x =  guide_axis(angle = 90)) +
        ggtitle(paste0("Abundance of cells across Samples")) + 
        NULL
fig.size(10, 25)
integrated.myeloid$metadata %>% 
    subset(source %in% "cosmx") %>% 
    with(table(type_harmony_clean, SampleFOV)) %>%
    as.data.frame %>% 
    separate(SampleFOV, c("SampleID", "FOV"), remove = FALSE, sep="_(?=[^_]+$)") %>% 
    # left_join(integrated.myeloid$metadata %>% subset(source %in% "cosmx") %>% dplyr::select(SampleID, SampleFOV, FOV), by = "SampleFOV") %>% 
    ggplot() + 
        geom_col(aes(x = FOV, y = Freq, fill = type_harmony_clean)) + 
        scale_fill_manual(values = .pal) +
        guides(x =  guide_axis(angle = 90)) +
        ggtitle(paste0("Abundance of cells across Samples")) + 
        facet_wrap(~ SampleID, scales = "free") + 
        NULL



# Marker logFC scatter plots

## on coarse data

In [None]:
genes_bad<-full_join(
        integrated.myeloid$effects_marginal_sc_clust_ann %>%  dplyr::select(c(logFC, feature, contrast, pvalue)), 
        integrated.myeloid$effects_marginal_cosmx_coarse %>% 
            filter(pvalue > 1e-02) %>% 
            dplyr::select(c(logFC, feature, contrast, pvalue)) %>% 
            group_by(contrast) %>% 
            mutate(logFCrank = rank(logFC)) %>% 
            ungroup, 
        by = c('feature', 'contrast'), 
        suffix = c('_sc', '_cosmx')
        ) %>% 
    dplyr::select(c(contrast, everything())) %>% 
    filter(logFC_cosmx < 0.07) %>% 
    group_by(contrast) %>% 
    top_n(wt = (logFC_sc), n = 5) %>% 
    mutate(type = 'bad genes') 
    
genes_good<-full_join(
        integrated.myeloid$effects_marginal_sc_clust_ann %>% filter(logFC > 0.5 & pvalue < 1e-02) %>% dplyr::select(c(logFC, feature, contrast, pvalue)), 
        integrated.myeloid$effects_marginal_cosmx_coarse %>% filter(logFC > 0.5 & pvalue < 1e-02) %>% dplyr::select(c(logFC, feature, contrast, pvalue)), 
        by = c('feature', 'contrast'), 
        suffix = c('_sc', '_cosmx')
        ) %>% 
    dplyr::select(c(contrast, everything())) %>% 
    group_by(contrast) %>% 
    top_n(wt = (logFC_cosmx), n = 5) %>% 
    mutate(type = 'good genes') 
    

In [None]:
genes_label<-rbind(genes_good, genes_bad)

In [None]:
nrow(genes_label)

In [None]:
integrated.myeloid$metadata$type_harmony %>% unique

In [None]:
# genepanel<-readRDS("../data_new/genepanel_cosmx.RDS")

In [None]:
# genes_label_ann<-c("CD24", "CD27", "IGHM", "CD11b", "IGD", "TCL1A", "Lsc1", "CD1C")

In [None]:
# genepanel[grepl(paste(genes_label_ann, collapse = "|"), genepanel)]

In [None]:
# genes_label_ann_df<-inner_join(
#         integrated.myeloid$effects_marginal_sc_ref %>%  dplyr::select(c(logFC, feature, contrast, pvalue)), 
#         integrated.myeloid$effects_marginal_cosmx_coarse %>% 
#             subset(feature %in% genes_label_ann) %>% 
#             dplyr::select(c(logFC, feature, contrast, pvalue)) %>% 
#             group_by(contrast) %>% 
#             mutate(logFCrank = rank(logFC)) %>% 
#             ungroup, 
#         by = c('feature', 'contrast'), 
#         suffix = c('_sc', '_cosmx')
#         ) %>% 
#     dplyr::select(c(contrast, everything()))

In [None]:
# genes_label_ann_df<-c()

In [None]:
# genes_label<-rbind(genes_label, genes_label_ann_df)

In [None]:
nrow(genes_label)

In [None]:
fig.size(10, 20)
inner_join(
        integrated.myeloid$effects_marginal_sc_clust_ann, 
        integrated.myeloid$effects_marginal_cosmx_coarse,  
        by = c('feature', 'contrast'), 
        suffix = c('_sc', '_cosmx')
        ) %>% 
    ggplot(aes(logFC_cosmx, logFC_sc)) + 
        geom_hline(yintercept = 0, linetype = 2) + 
        geom_vline(xintercept = 0, linetype = 2) + 
        geom_abline() + 
        geom_point(size = .5, alpha = 0.2) + 
        facet_wrap(~contrast, scales = 'free') + 
        geom_smooth(method = 'lm') + 
        ggtitle(paste0('Marker logFC comparison')) +
        geom_point(
            data = genes_label, 
            aes(logFC_cosmx, logFC_sc, color = type)
            ) + 
        geom_text_repel(
            data = genes_label, 
            aes(logFC_cosmx, logFC_sc, label = feature, color = type),
            min.segment.length = 0,
            max.overlaps = Inf,
            show.legend = FALSE,
            fontface = 'bold',
            box.padding = 1
            ) + 
        guides(color = guide_legend(override.aes = list(shape = 16, alpha = 1, size = 5))) + 
        scale_color_tableau('Classic Blue-Red 6') + 
        # geom_text(data = genes_label %>%  mutate(x = , aes(x = 1, y=c(2, 3, 5, 7, 9), label = c(genes_label$type))
        NULL

# geom_text(data=annotation, aes( x=x, y=y, label=label),                 , 
#            color="orange", 
#            size=7 , angle=45, fontface="bold" )



In [None]:
fig.size(8, 12)
inner_join(integrated.myeloid$effects_marginal_sc_clust_ann, integrated.myeloid$effects_marginal_cosmx_coarse, by = c('feature', 'contrast'), suffix = c('_sc', '_cosmx')) %>% 
    ggplot(aes(logFC_cosmx, logFC_sc)) + 
        geom_hline(yintercept = 0, linetype = 2) + 
        geom_vline(xintercept = 0, linetype = 2) + 
        geom_abline() + 
        geom_point(size = .5) + 
        facet_wrap(~contrast) + 
        geom_smooth(method = 'lm') + 
        ggtitle(paste0('Marker logFC comparison'))


## on clean data

In [None]:
genes_bad<-full_join(
        integrated.myeloid$effects_marginal_sc_ref %>%  dplyr::select(c(logFC, feature, contrast, pvalue)), 
        integrated.myeloid$effects_marginal_cosmx_clean %>% 
            filter(pvalue > 1e-02) %>% 
            dplyr::select(c(logFC, feature, contrast, pvalue)) %>% 
            group_by(contrast) %>% 
            mutate(logFCrank = rank(logFC)) %>% 
            ungroup, 
        by = c('feature', 'contrast'), 
        suffix = c('_sc', '_cosmx')
        ) %>% 
    dplyr::select(c(contrast, everything())) %>% 
    filter(logFC_cosmx < 0.07) %>% 
    group_by(contrast) %>% 
    top_n(wt = (logFC_sc), n = 5) %>% 
    mutate(type = 'bad genes') 
    
genes_good<-full_join(
        integrated.myeloid$effects_marginal_sc_ref %>% filter(logFC > 0.5 & pvalue < 0.05) %>% dplyr::select(c(logFC, feature, contrast, pvalue)), 
        integrated.myeloid$effects_marginal_cosmx_clean %>% filter(logFC > 0.5 & pvalue < 0.05) %>% dplyr::select(c(logFC, feature, contrast, pvalue)), 
        by = c('feature', 'contrast'), 
        suffix = c('_sc', '_cosmx')
        ) %>% 
    dplyr::select(c(contrast, everything())) %>% 
    group_by(contrast) %>% 
    top_n(wt = (logFC_cosmx), n = 5) %>% 
    mutate(type = 'good genes') 
    

genes_label<-rbind(genes_good, genes_bad)

fig.size(10, 20)
inner_join(
        integrated.myeloid$effects_marginal_sc_ref, 
        integrated.myeloid$effects_marginal_cosmx_clean,  
        by = c('feature', 'contrast'), 
        suffix = c('_sc', '_cosmx')
        ) %>% 
    ggplot(aes(logFC_cosmx, logFC_sc)) + 
        geom_hline(yintercept = 0, linetype = 2) + 
        geom_vline(xintercept = 0, linetype = 2) + 
        geom_abline() + 
        geom_point(size = .5, alpha = 0.2) + 
        facet_wrap(~contrast, scales = 'free') + 
        geom_smooth(method = 'lm') + 
        ggtitle(paste0('Marker logFC comparison')) +
        geom_point(
            data = genes_label, 
            aes(logFC_cosmx, logFC_sc, color = type)
            ) + 
        geom_text_repel(
            data = genes_label, 
            aes(logFC_cosmx, logFC_sc, label = feature, color = type),
            min.segment.length = 0,
            max.overlaps = Inf,
            show.legend = FALSE,
            fontface = 'bold',
            box.padding = 1
            ) + 
        guides(color = guide_legend(override.aes = list(shape = 16, alpha = 1, size = 5))) + 
        scale_color_tableau('Classic Blue-Red 6') + 
        # geom_text(data = genes_label %>%  mutate(x = , aes(x = 1, y=c(2, 3, 5, 7, 9), label = c(genes_label$type))
        NULL

# geom_text(data=annotation, aes( x=x, y=y, label=label),                 , 
#            color="orange", 
#            size=7 , angle=45, fontface="bold" )



fig.size(8, 12)
inner_join(integrated.myeloid$effects_marginal_sc_ref, integrated.myeloid$effects_marginal_cosmx_clean, by = c('feature', 'contrast'), suffix = c('_sc', '_cosmx')) %>% 
    ggplot(aes(logFC_cosmx, logFC_sc)) + 
        geom_hline(yintercept = 0, linetype = 2) + 
        geom_vline(xintercept = 0, linetype = 2) + 
        geom_abline() + 
        geom_point(size = .5) + 
        facet_wrap(~contrast) + 
        geom_smooth(method = 'lm') + 
        ggtitle(paste0('Marker logFC comparison'))


### comparing with sc cells with ann. to see if that changes anything - nope doesnt change

In [None]:
genes_bad<-full_join(
        integrated.myeloid$effects_marginal_sc_ann_clean %>%  dplyr::select(c(logFC, feature, contrast, pvalue)), 
        integrated.myeloid$effects_marginal_cosmx_clean %>% 
            filter(pvalue > 1e-02) %>% 
            dplyr::select(c(logFC, feature, contrast, pvalue)) %>% 
            group_by(contrast) %>% 
            mutate(logFCrank = rank(logFC)) %>% 
            ungroup, 
        by = c('feature', 'contrast'), 
        suffix = c('_sc', '_cosmx')
        ) %>% 
    dplyr::select(c(contrast, everything())) %>% 
    filter(logFC_cosmx < 0.07) %>% 
    group_by(contrast) %>% 
    top_n(wt = (logFC_sc), n = 5) %>% 
    mutate(type = 'bad genes') 
    
genes_good<-full_join(
        integrated.myeloid$effects_marginal_sc_ann_clean %>% filter(logFC > 0.5 & pvalue < 0.05) %>% dplyr::select(c(logFC, feature, contrast, pvalue)), 
        integrated.myeloid$effects_marginal_cosmx_clean %>% filter(logFC > 0.5 & pvalue < 0.05) %>% dplyr::select(c(logFC, feature, contrast, pvalue)), 
        by = c('feature', 'contrast'), 
        suffix = c('_sc', '_cosmx')
        ) %>% 
    dplyr::select(c(contrast, everything())) %>% 
    group_by(contrast) %>% 
    top_n(wt = (logFC_cosmx), n = 5) %>% 
    mutate(type = 'good genes') 
    

genes_label<-rbind(genes_good, genes_bad)

fig.size(10, 20)
inner_join(
        integrated.myeloid$effects_marginal_sc_ann_clean, 
        integrated.myeloid$effects_marginal_cosmx_clean,  
        by = c('feature', 'contrast'), 
        suffix = c('_sc', '_cosmx')
        ) %>% 
    ggplot(aes(logFC_cosmx, logFC_sc)) + 
        geom_hline(yintercept = 0, linetype = 2) + 
        geom_vline(xintercept = 0, linetype = 2) + 
        geom_abline() + 
        geom_point(size = .5, alpha = 0.2) + 
        facet_wrap(~contrast, scales = 'free') + 
        geom_smooth(method = 'lm') + 
        ggtitle(paste0('Marker logFC comparison')) +
        geom_point(
            data = genes_label, 
            aes(logFC_cosmx, logFC_sc, color = type)
            ) + 
        geom_text_repel(
            data = genes_label, 
            aes(logFC_cosmx, logFC_sc, label = feature, color = type),
            min.segment.length = 0,
            max.overlaps = Inf,
            show.legend = FALSE,
            fontface = 'bold',
            box.padding = 1
            ) + 
        guides(color = guide_legend(override.aes = list(shape = 16, alpha = 1, size = 5))) + 
        scale_color_tableau('Classic Blue-Red 6') + 
        # geom_text(data = genes_label %>%  mutate(x = , aes(x = 1, y=c(2, 3, 5, 7, 9), label = c(genes_label$type))
        NULL

# geom_text(data=annotation, aes( x=x, y=y, label=label),                 , 
#            color="orange", 
#            size=7 , angle=45, fontface="bold" )



fig.size(8, 12)
inner_join(integrated.myeloid$effects_marginal_sc_ann_clean, integrated.myeloid$effects_marginal_cosmx_clean, by = c('feature', 'contrast'), suffix = c('_sc', '_cosmx')) %>% 
    ggplot(aes(logFC_cosmx, logFC_sc)) + 
        geom_hline(yintercept = 0, linetype = 2) + 
        geom_vline(xintercept = 0, linetype = 2) + 
        geom_abline() + 
        geom_point(size = .5) + 
        facet_wrap(~contrast) + 
        geom_smooth(method = 'lm') + 
        ggtitle(paste0('Marker logFC comparison'))


# what are the correlations b/w clusters in sc data?

In [None]:
# sc_corr_raw<-map(unique(markers_sc_corr$contrast), function(clust_name){
    
#     cosmx_clust<-markers_sc_corr %>% 
#         subset(contrast %in% clust_name)
#     # print(head(cosmx_clust))
    
#     map(unique(markers_sc_corr$contrast), function(sc_clust){
        
#         marker_corr<-data.frame(
#             cosmx_clust = clust_name,
#             sc_clust = sc_clust,
#             corr = markers_sc_corr %>% 
#                 subset(contrast %in% sc_clust) %>% 
#                 left_join(cosmx_clust, by = "feature", suffix = c("_sc", "_cosmx")) %>% 
#                 with(cor(logFC_sc, logFC_cosmx)) 
#             )
        
        
#     }) %>% rbindlist
        
# }) %>% 
#     rbindlist %>% 
#     spread(sc_clust, corr) %>% 
#     column_to_rownames("cosmx_clust") %>% 
#     as.matrix() %>% 
#     t() 

# sc_corr_raw %>% Heatmap()

# find correlation of genes per cluster

In [None]:
.t<-inner_join(integrated.myeloid$effects_marginal_sc_ann_clean, integrated.myeloid$effects_marginal_cosmx_clean, by = c('feature', 'cluster'), suffix = c('_sc', '_cosmx')) %>% 
    group_by(cluster) %>% 
    summarise(corr.val = cor(logFC_cosmx, logFC_sc))

In [None]:
head(.t)

In [None]:
fig.size(8, 8) 
.t %>% 
    ggplot() + 
        geom_point(aes(cluster, corr.val)) + 
        ggtitle(paste0('Correlations b/w sc and cosmx markers in each cluster')) +
        theme(plot.title = element_textbox_simple()) +
        ylim(min(0, min(.t$corr.val)), 1) + 
        guides(x =  guide_axis(angle = 90)) +
        NULL

## cosmx clean ann marker list

In [None]:
integrated.myeloid$effects_marginal_cosmx_clean %>% head

In [None]:
integrated.myeloid$effects_marginal_cosmx_clean %>% 
    group_by(contrast) %>% 
    subset(logFC > 0.5 & pvalue < 0.05 ) %>% 
    top_n(n = 20, wt = logFC) %>% 
    mutate(rank = rank(-logFC)) %>% 
    ungroup %>% 
    # mutate(feature_new = paste(feature, logFC)) %>% 
    dplyr::select(contrast, feature, rank) %>% 
    spread(contrast, feature, fill = NA)

## cosmx marker list

In [None]:
integrated.myeloid$effects_marginal_cosmx_coarse %>% 
    group_by(contrast) %>% 
    subset(logFC > 0.5 & pvalue < 0.05 ) %>% 
    top_n(n = 20, wt = logFC) %>% 
    mutate(rank = rank(-logFC)) %>% 
    ungroup %>% 
    mutate(feature = paste(feature, logFC)) %>% 
    dplyr::select(contrast, feature, rank) %>% 
    spread(contrast, feature, fill = NA)

# Viz

### extract the full gene counts from main df

In [None]:
cellids.finetype.cosmx<-integrated.myeloid$metadata %>% 
    subset(source %in% "cosmx") %>%  with(cellID)

In [None]:
length(cellids.finetype.cosmx)

In [None]:
genecounts.finetype.cosmx<-cosmx.syno$metadata %>% subset(cellID %in% cellids.finetype.cosmx)

In [None]:
p_counts<-plot_dim_red_cont(
        integrated.myeloid$Humap$embedding[cellids.finetype.cosmx, ], 
        cosmx.syno$metadata %>% subset(cellID %in% cellids.finetype.cosmx), 
        "cellID",
        "nCounts",
        "celltype.coarse",
        "nCounts of CosMx cells(from all 960 genes)", 
        "UMAP"
    )
p_gene<-plot_dim_red_cont(
        integrated.myeloid$Humap$embedding[cellids.finetype.cosmx, ], 
        cosmx.syno$metadata %>% subset(cellID %in% cellids.finetype.cosmx), 
        "cellID",
        "nGene",
        "celltype.coarse",
        "nGenes of CosMx cells(from all 960 genes)", 
        "UMAP"
    )


In [None]:
fig.size(5, 15)
p_counts | p_gene

### now where are the unmapped clusters?

In [None]:
integrated.myeloid$Humap$embedding[cellids.finetype.cosmx,  ] %>% nrow

In [None]:
p_finetyped<-plot_dim_red(
    integrated.myeloid$Humap$embedding[cellids.finetype.cosmx,  ],
    rep(1, nrow(integrated.myeloid$Humap$embedding[cellids.finetype.cosmx,  ])),
    integrated.myeloid$metadata %>% subset(cellID %in% cellids.finetype.cosmx), 
    "cellID", 
    "type_harmony", 
    "UMAP after Harmony of cosmxfinetype",
    "UMAP", 
    plot_labels = TRUE
)

In [None]:
fig.size(6, 6)
p_finetyped + theme(legend.position = "none") 

In [None]:
p_finetyped_nolabels<-plot_dim_red(
    integrated.myeloid$Humap$embedding[cellids.finetype.cosmx,  ],
    rep(1, nrow(integrated.myeloid$Humap$embedding[cellids.finetype.cosmx,  ])),
    integrated.myeloid$metadata %>% subset(cellID %in% cellids.finetype.cosmx), 
    "cellID", 
    "type_harmony", 
    "UMAP of cosmx finetype",
    "UMAP", 
    plot_labels = FALSE
)

In [None]:
fig.size(5, 15)
p_counts | p_finetyped_nolabels 

### bar plots: median nCounts per finetyped cluster

In [None]:
cosmx.syno$metadata %>% 
    subset(cellID %in% cellids.finetype.cosmx) %>% 
    left_join(integrated.myeloid$metadata %>%  
              subset(cellID %in% cellids.finetype.cosmx) %>%  
              dplyr::select(cellID, type_harmony),
        by = "cellID"
    ) %>% 
    group_by(type_harmony) %>% 
    summarise(
        medianCounts = median(nCounts),
        medianGenes = median(nGene)
    ) %>%  
    ungroup %>% 
    ggplot() + 
        geom_col(aes(medianGenes, type_harmony))

## Gene counts in UMAP space – finetype fine type specific genes (the selected genes?)

In [None]:
gc_select<-data.frame(
    nCounts = colSums(integrated.myeloid$counts[, colnames(integrated.myeloid$counts) %in% cellids.finetype.cosmx]),
    nGene = colSums(integrated.myeloid$counts[, colnames(integrated.myeloid$counts) %in% cellids.finetype.cosmx] > 0)
) %>% 
    rownames_to_column("cellID") %>% 
    left_join(integrated.myeloid$metadata %>% dplyr::select(cellID, type_harmony, SampleID), by = "cellID")

In [None]:
head(gc_select)

In [None]:
p_counts_selected<-plot_dim_red_cont(
        integrated.myeloid$Humap$embedding[cellids.finetype.cosmx, ], 
        gc_select,
        "cellID",
        "nCounts",
        "type_harmony",
        "nCounts (from only relevant genes)", 
        "UMAP"
    )

p_genes_selected<-plot_dim_red_cont(
        integrated.myeloid$Humap$embedding[cellids.finetype.cosmx, ], 
        gc_select,
        "cellID",
        "nGene",
        "type_harmony",
        "nGenes (from only relevant genes)", 
        "UMAP"
    )

In [None]:
fig.size(5, 12)
p_counts_selected | p_genes_selected

In [None]:
fig.size(5, 15)
p_counts_selected | p_finetyped_nolabels 

In [None]:
ncounts_cosmx_genes<-colSums(integrated.myeloid$counts[, colnames(integrated.myeloid$counts) %in% cellids.finetype.cosmx])
ngenes_cosmx_genes<-colSums(integrated.myeloid$counts[, colnames(integrated.myeloid$counts) %in% cellids.finetype.cosmx] > 0)

In [None]:
cosmx.syno$metadata %>% 
    subset(cellID %in% cellids.finetype.cosmx) %>% 
    left_join(integrated.myeloid$metadata %>%  
              subset(cellID %in% cellids.finetype.cosmx) %>%  
              dplyr::select(cellID, type_harmony),
        by = "cellID"
    ) %>% 
    cbind(
        nCounts_genes = ncounts_cosmx_genes,
        nGenes_genes = ngenes_cosmx_genes
    ) %>%       
    group_by(type_harmony) %>% 
    summarise(
        medianCounts = median(nCounts_genes),
        medianGenes = median(nGenes_genes)
    ) %>%  
    ungroup %>% 
    ggplot() + 
        geom_col(aes(medianGenes, type_harmony))

## Isolate TPH and recluster

In [None]:
CXCR6tph <-integrated.myeloid$metadata %>% 
    subset(type_harmony %in% c("CCL5+CXCR6+MAF+ TPH")) 

In [None]:
rownames(CXCR6tph) <- CXCR6tph$cellID
cellids_CXCR6tph<-rownames(CXCR6tph)

In [None]:
CXCR6tph<-list()
CXCR6tph$counts<-integrated.myeloid$counts[, cellids_CXCR6tph]
CXCR6tph$metadata<-integrated.myeloid$metadata[cellids_CXCR6tph, ]

In [None]:
QC_harmony_pipeline_normval

In [None]:
CXCR6tph$Humap$fgraph

In [None]:
library(parallel)
detectCores()

In [None]:
CXCR6tph$Humap$snn

In [None]:
set.seed(9)
            CXCR6tph$Humap$clusters <- RunModularityClustering(CXCR6tph$Humap$snn, 
                resolution = c(0.5,1.5,2.5),  n_cores=1,
                print.output = FALSE)

In [None]:
diag(CXCR6tph$Humap$fgraph)