# Coarse Cell Typing

In [None]:
options(warn = -1, verbose=FALSE)
#!/usr/bin/env Rscript 
library(dplyr)
library(Seurat)
library(httr)
library(readr)
library(pheatmap)
library(RColorBrewer)
library(ggplot2)
library(cowplot)
library(patchwork)
library(unixtools)
library(ggrepel)
library(repr)
library(ggmin)
library(harmony)
library(SeuratWrappers)
library(Nebulosa)
library(ggthemes)
library(purrr)
library(radiant.data)
library(presto)
library(pryr)
set_config(config(ssl_verifypeer = 0L))
ulimit::memory_limit(200000)
set.tempdir("/datastore/lucy/tmp/")
setwd("/datastore/lucy/CosMx")

In [None]:
source("./R/utils.R")
start_upR(clusterfiles = TRUE)

In [None]:
QC_harmony_pipeline

In [None]:
RunModularityClustering

In [None]:
RunModularityClusteringCpp

In [None]:
plot_dim_red

# Read-in data

In [None]:
obj<-list()
obj$counts<-readRDS("../data_Glasgow/Glasgow.RDS")
obj$metadata<-fread("../data_Glasgow/Glasgow.csv")

In [None]:
cellgeoms<-readRDS("../data_Glasgow/Glasgow_cellgeoms_QC.RDS")

## QC based on cell size

In [None]:
obj$metadata<-obj$metadata %>% subset(cellID %in% cellgeoms$cellID)

In [None]:
dimlist(obj)

In [None]:
head(obj$metadata)

In [None]:
unique(obj$metadata$Disease)

In [None]:
all(obj$metadata$cellID == colnames(obj$counts))

# Genecellmat

In [None]:
dimlist(obj)

## QC - I loaded pre-QCed data: 
+ ngenes>=20 & ncounts>=30

## delete negprobs

In [None]:
posPrbs<-rownames(obj$counts)[!grepl("NegPrb", rownames(obj$counts))]

In [None]:
length(posPrbs)

In [None]:
dim(obj$counts[posPrbs, ])

In [None]:
obj$counts<-obj$counts[posPrbs, ]

## ngenes per cell

In [None]:
fig.size(5, 10)
plot_genes_counts(obj$counts, gene_int = 20, count_int = 30)

In [None]:
range(colSums(obj$counts > 0))

In [None]:
range(colSums(obj$counts))

In [None]:
obj$counts[1:5, 1:5]

# Harmonize

In [None]:
norm_value<-median(colSums(obj$counts))

In [None]:
norm_value

In [None]:
obj$metadata<-obj$metadata %>% 
    unite("SampleFOV", c("SampleID", "FOV"), remove = FALSE)

In [None]:
head(obj$metadata)

In [None]:
batch<-c('SampleID', 'SampleFOV')
cluster_res<-c(1.5, 2.5, 3.1)
theta_harmony<-c(0, 0) 
sigma_harmony<-0.25
system.time({
    objH<-QC_harmony_pipeline(
        obj, 
        ngenes_threshold = 20, 
        ncounts_threshold = 30, 
        norm_value, 
        do_cluster_after = TRUE,
        resolution_clustering = cluster_res, 
        clustering_ncores = 8,
        vars_use = batch,
        theta = theta_harmony,
        sigma = sigma_harmony,
        max.iter.harmony = 15,
        max.iter.cluster = 100,
        return_object = TRUE
    )

    objH$sigma_harmony<-sigma_harmony
    objH$vars_use<-batch
    objH$theta_harmony<-theta_harmony    

    })

saveRDS(objH, "../data_Glasgow/cache/CosMxcoarseGrainharmonyObj.RDS")

## load

In [None]:
# objH<-readRDS( "../data_Glasgow/cache/CosMxcoarseGrainharmonyObj.RDS")

In [None]:
obj$metadata<-obj$Humap$embedding %>% 
    as.data.frame %>% 
    cbind(obj$Humap$clusters) %>% 
    rownames_to_column('cellID') %>% 
    left_join(obj$metadata, by = 'cellID')

In [None]:
head(obj$metadata)

## Figures

### PCA

In [None]:
pca_before<-plot_dim_red(
    dim_red_embeddings = objH$pca$embeddings,
    clusters = NULL,
    metadata = objH$metadata, 
    cell_id_colname = "cellID",
    color_by = "SampleID",
    plot_title = "PCA before Harmony",
    dim_red_type = "PCA",
    size_points = 0.2
)
pca_after<-plot_dim_red(
    dim_red_embeddings = objH$H,
    clusters = NULL,
    metadata = objH$metadata, 
    cell_id_colname = "cellID",
    color_by = "SampleID",
    plot_title = "PCA after Harmony",
    dim_red_type = "PCA",
    size_points = 0.2
)
fig.size(5, 10)
pca_before + theme(legend.position = "none") | 
pca_after + theme(legend.position = "none")

### UMAP

In [None]:
umap_before<-plot_dim_red(
    dim_red_embeddings = objH$umap$embedding,
    clusters = NULL,
    metadata = objH$metadata, 
    cell_id_colname = "cellID",
    color_by = "SampleID",
    plot_title = "UMAP before Harmony",
    dim_red_type = "UMAP",
    size_points = 0.2
)
umap_after<-plot_dim_red(
    dim_red_embeddings = objH$Humap$embedding,
    clusters = NULL,
    metadata = objH$metadata, 
    cell_id_colname = "cellID",
    color_by = "SampleID",
    plot_title = "UMAP after Harmony",
    dim_red_type = "UMAP",
    size_points = 0.2
)

In [None]:
fig.size(5, 15)
umap_before | 
umap_after 

In [None]:
umap_before<-plot_dim_red(
    dim_red_embeddings = objH$umap$embedding,
    clusters = NULL,
    metadata = objH$metadata, 
    cell_id_colname = "cellID",
    color_by = "SampleFOV",
    plot_title = "UMAP before Harmony",
    dim_red_type = "UMAP",
    size_points = 0.2
)
umap_after<-plot_dim_red(
    dim_red_embeddings = objH$Humap$embedding,
    clusters = NULL,
    metadata = objH$metadata, 
    cell_id_colname = "cellID",
    color_by = "SampleFOV",
    plot_title = "UMAP after Harmony",
    dim_red_type = "UMAP",
    size_points = 0.2
)

In [None]:
fig.size(5, 15)

umap_after 

In [None]:
fig.size(5, 8)
umap_after<-plot_dim_red(
    dim_red_embeddings = objH$Humap$embedding,
    clusters = objH$Humap$clusters,
    metadata = objH$metadata, 
    cell_id_colname = "cellID",
    color_by = "Clust1.5",
    plot_title = "UMAP after Harmony",
    dim_red_type = "UMAP",
    size_points = 0.2
)
umap_after

In [None]:
fig.size(7, 12)
umap_after<-plot_dim_red(
    dim_red_embeddings = objH$Humap$embedding,
    clusters = objH$Humap$clusters,
    metadata = objH$metadata, 
    cell_id_colname = "cellID",
    color_by = "Clust3.1",
    plot_title = "UMAP after Harmony",
    dim_red_type = "UMAP",
    size_points = 0.2
)
umap_after

# Find markers

## load

## Clust3.1

In [None]:
fig.size(5, 12)
umap_after<-plot_dim_red(
    dim_red_embeddings = objH$Humap$embedding,
    clusters = objH$Humap$clusters,
    metadata = objH$metadata, 
    cell_id_colname = "cellID",
    color_by = "Clust3.1",
    plot_title = "UMAP after Harmony",
    dim_red_type = "UMAP",
    size_points = 0.2,
    plot_labels = FALSE
)
umap_after

In [None]:
fig.size(10, 20)
umap_after + facet_wrap(~ color_col)

In [None]:
formula_glmer_input<-createFormula("y", "1 + (1|Clust3.1)", c("(1|SampleFOV/Clust3.1)", "(1|SampleID/Clust3.1)", "offset(logUMI)"))

objH$markers_Clust3.1<-find_glmer_markers(
    objH$counts,
    objH$metadata %>% cbind(objH$Humap$clusters),
    formula_glmer_input, 
    "Clust3.1", 
    c("SampleID", "FOV", "Clust3.1") 
)
saveRDS(objH, "../data_Glasgow/cache/CosMxcoarseGrainharmonyObj_markers.RDS")

### Viz clusters at 3.1

In [None]:
options(repr.matrix.max.cols=50, repr.matrix.max.rows=100)

In [None]:
objH$markers_Clust3.1 %>% 
    mutate(contrast = as.numeric(contrast)) %>% 
    group_by(contrast) %>% 
    filter(pvalue < 1e-02) %>% 
    filter(logFC > 0.5) %>% 
    top_n(n = 20, wt = logFC) %>% 
    mutate(rank = rank(-logFC)) %>% 
    ungroup %>% 
    # dplyr::mutate(feature_logFC = paste0(feature, "_", logFC)) %>% 
    dplyr::select(contrast, feature, rank) %>% 
    spread(contrast, feature, fill = NA)

## Clust1.5

In [None]:
formula_glmer_input<-createFormula("y", "1 + (1|Clust1.5)", c("(1|SampleFOV/Clust1.5)", "(1|SampleID/Clust1.5)", "offset(logUMI)"))

objH$markers_Clust1.5<-find_glmer_markers(
    objH$counts,
    objH$metadata %>% cbind(objH$Humap$clusters),
    formula_glmer_input, 
    "Clust1.5", 
    c("SampleID", "FOV", "Clust1.5") 
)
saveRDS(objH, "../data_Glasgow/cache/CosMxcoarseGrainharmonyObj_markers.RDS")

In [None]:
objH <- readRDS("./cache/CosMxcoarseGrainharmonyObj_markers.RDS")

### Viz clusters at 1.5

In [None]:
fig.size(5, 8)
umap_after<-plot_dim_red(
    dim_red_embeddings = objH$Humap$embedding,
    clusters = objH$Humap$clusters,
    metadata = objH$metadata, 
    cell_id_colname = "cellID",
    color_by = "Clust1.5",
    plot_title = "UMAP after Harmony",
    dim_red_type = "UMAP",
    size_points = 0.2,
    plot_labels = FALSE
)
umap_after

In [None]:
objH$metadata

In [None]:
fig.size(10, 15)
umap_after + facet_wrap(~ color_col)

In [None]:
objH$markers_Clust1.5 %>% 
    mutate(contrast = as.numeric(contrast)) %>% 
    group_by(contrast) %>% 
    filter(pvalue < 1e-02) %>% 
    filter(logFC > 0.5) %>% 
    top_n(n = 20, wt = logFC) %>% 
    mutate(rank = rank(-logFC)) %>% 
    ungroup %>% 
    # dplyr::mutate(feature_logFC = paste0(feature, "_", logFC)) %>% 
    dplyr::select(contrast, feature, rank) %>% 
    spread(contrast, feature, fill = NA)

### annotate at Clust1.5

In [None]:
# number of cells in each cluster
objH$metadata %>% 
    cbind(objH$Humap$clusters) %>% 
    with(table(Clust1.5))

In [None]:
objH$metadata<-objH$metadata %>% 
    cbind(objH$Humap$clusters) %>% 
    mutate(celltype.coarse = case_when(
        Clust1.5 %in%(c(0)) ~ "T cells",
        Clust1.5 %in%(c(1, 5, 6, 12)) ~ "Myeloid",
        Clust1.5 %in%(c(7)) ~ "DC",
        Clust1.5 %in%(c(2, 8, 9, 11)) ~ "Fibroblasts",
        Clust1.5 %in%(c(3)) ~ "Endothelial",
        Clust1.5 %in%(c(4)) ~ "Plasma",
        Clust1.5 %in%(c(10)) ~ "Mural",
        Clust1.5 %in%(c(13)) ~ "B cells",
        Clust1.5 %in%(c(14)) ~ "Mast",
        Clust1.5 %in%(c(15)) ~ "Adipocytes",
        Clust1.5 %in%(c(16)) ~ "ProliferationT",
        .default = Clust1.5
        )
    ) %>% 
    dplyr::select(- starts_with("Clust"))


In [None]:
unique(objH$metadata$celltype.coarse)

In [None]:
objH <- readRDS("./cache/CosMxcoarseGrainharmonyObj_markers.RDS")

In [None]:
objH$metadata$celltype.coarse <- as.ordered(factor(objH$metadata$celltype.coarse, c("Adipocytes","Fibroblasts","Mural","Endothelial",
                                                                                    "B cells","Plasma","T cells","ProliferationT","Mast","Myeloid",
                                                                                    "DC")))
                                                                    
cols <- c("#ffeca9","gold","sienna1","red3","pink","deeppink","green3","green4","lightblue1","royalblue1","royalblue4")

In [None]:
fig.size(7, 9)
umap_after<-plot_dim_red(
    dim_red_embeddings = objH$Humap$embedding,
    clusters = objH$Humap$clusters,
    metadata = objH$metadata,
    cell_id_colname = "cellID",
    color_by = "celltype.coarse",
    plot_title = "UMAP after Harmony",
    dim_red_type = "UMAP",
    size_points = 0.2,
    plot_labels = FALSE
)
umap_after + theme_powerpoint() + scale_fill_manual(values=cols)+ scale_color_manual(values=cols)

In [None]:
dim(objH$counts)

In [None]:
# number of cells of each type
table(objH$metadata$celltype.coarse) %>% as.data.frame %>% arrange(-Freq)

In [None]:
head(objH$metadata)

In [None]:
objH$metadata$SampleID <- objH$metadata$SampleID.x
objH$metadata$FOV <- objH$metadata$FOV.x

### glmer markers for annotations

In [None]:
presto.presto <- function (formula, design, response, size_varname, features = NULL, 
    effects_cov = c(""), ncore = 1, nsim = 100, family = "poisson", 
    min_sigma = 0, verbose = 0L) 
{
    if (is.null(features)) {
        features <- rownames(response)
    }
    if (family %in% c("poisson", "binomial", "nb")) {
        message("CAUTION: if using GLMM, make sure your counts are integers!")
    }
    design$EXPOSURE <- design[[size_varname]]
    fstr <- gsub(size_varname, "EXPOSURE", as.character(formula))
    formula <- as.formula(sprintf("%s~%s", fstr[[2]], fstr[[3]]), 
        env = .GlobalEnv)
    if (verbose > 0) {
        message("Set up models")
    }
    model_base <- fit_model.presto(formula, design, response[features[[1]], 
        ], family)
    priornames_df <- as.data.frame(VarCorr(model_base))[, 1:3]
    if (isGLMM(model_base)) {
        priornames_df <- rbind(priornames_df, tibble(grp = "Residual", 
            var1 = NA, var2 = NA))
    }
    has_offset <- !all(map_lgl(model_base@resp$offset, identical, 
        0))
    betanames_df <- make_betanames_df(model_base, has_offset)
    features <- intersect(features, rownames(response))
    if (ncore == 1) {
        future::plan(sequential)
    }
    else if (ncore %in% c(0, Inf)) {
        ncore <- availableCores()
        future::plan(multisession)
    }
    else {
        .ncore <<- ncore
        future::plan(future::multisession(workers = .ncore))
        rm(.ncore)
    }
    if (verbose > 0) {
        message("Learn the models")
    }
    lres <- furrr::future_map(features, glmm_uni, formula, design, 
        response, effects_cov, family, nsim, has_offset, min_sigma)
    names(lres) <- features
    lres <- lres[which(purrr::map_lgl(as.integer(map_int(lres, 
        "status")), identical, 0L))]
    if (verbose > 0) {
        message("Aggregate the results")
    }
    res <- collapse_lres(lres)
    if (verbose > 0) {
        message("Cleap up names")
    }
    covmat_names <- tibble(grpvar_orig = rownames(res$covmat)) %>% 
        left_join(subset(betanames_df, term %in% c("(Intercept)", 
            "Fixed"))) %>% dplyr::mutate(newname = case_when(is.na(grpvar) ~ 
        grpvar_orig, TRUE ~ as.character(glue::glue("{grpvar}.{grp}.{term}")))) %>% 
        with(newname)
    dimnames(res$covmat) <- list(covmat_names, covmat_names, 
        colnames(res$beta))
    res$betanames_df <- betanames_df
    res$priornames_df <- priornames_df
    res$meta_data <- design
    if (has_offset) {
        res$design <- list(EXPOSURE = model_base@resp$offset, 
            t(model_base@pp$X), model_base@pp$Zt) %>% purrr::reduce(Matrix::rbind2)
    }
    else {
        res$design <- list(t(model_base@pp$X), model_base@pp$Zt) %>% 
            purrr::reduce(Matrix::rbind2)
    }
    row.names(res$design) <- res$betanames_df$grp
    res$response <- response[names(lres), ]
    if (verbose > 0) {
        message("Compute gene means")
    }
    res <- genemeans.presto(res, xpm = 1e+06)
    res$has_offset <- has_offset
    res$family <- family
    res$size_varname <- size_varname
    res$nsim <- nsim
    res$formula_str <- as.character(formula)
    return(res)
}

In [None]:
#These are internal functions i.e. unexported so we must access with :::
collapse_vecs <- presto:::collapse_vecs
collapse_mats <- presto:::collapse_mats
collapse_lres <- presto:::collapse_lres

In [None]:
formula_glmer_input<-createFormula("y", "1 + (1|celltype.coarse)", c("(1|SampleFOV/celltype.coarse)", "(1|SampleID/celltype.coarse)", "offset(logUMI)"))

objH$markers_celltype_coarse<-find_glmer_markers(
    objH$counts,
    objH$metadata,
    formula_glmer_input, 
    "celltype.coarse", 
    c("SampleID", "FOV", "celltype.coarse") 
)


In [None]:
objH$markers_celltype_coarse %>% 
    group_by(contrast) %>% 
    filter(pvalue < 1e-02) %>% 
    filter(logFC > 0.5) %>% 
    top_n(n = 20, wt = logFC) %>% 
    mutate(rank = rank(-logFC)) %>% 
    ungroup %>% 
    # dplyr::mutate(feature = paste0(feature, "_", logFC)) %>% 
    dplyr::select(contrast, feature, rank) %>% 
    spread(contrast, feature, fill = NA)

### heatmap of markers

In [None]:
# pick signficant genes to show in the heatmap
# arrange the genes such that the gene with max logFC is at the center and values decrease as we move away from center
genes_use_df<-data.table(objH$markers_celltype_coarse)[
    pvalue < 0.05][
    , .SD[order(-zscore)][1, ], by = feature][
    , .SD[order(-logFC)][1:20, ], by = contrast][
    , .SD[!is.na(feature)]][
    , .SD[order(logFC)], by = contrast][
    , ":=" (len = .N, rank = frank(logFC)), by = contrast][
    , .SD[c(seq(1, round(.SD$len[1]/3), 1), seq(.SD$len[1], round(.SD$len[1]/3)+1, -1)), ], by = contrast][
    , .(feature, logFC, zscore, contrast, pvalue) # the genes in this list will be unique because I assigned one z-score to each gene
]

message("Checking that the genes are unique: ", nrow(genes_use_df) == length(unique(genes_use_df$feature)))
message("Number of genes in heatmap: ", nrow(genes_use_df))

# select the top 3 genes with highest logFC in each cluster and assign each gene to a cell type based on it logFC - the gene belongs to the celltype with higher logFC
genes_to_mark_celltypes<-genes_use_df[, .SD[order(-logFC)][1:5, ], by = contrast][
    , .(contrast, feature)
]
genes_to_mark_celltypes<-genes_to_mark_celltypes %>% 
    # subset(! contrast %in% c("Neutrophils", "T cells")) %>% 
    rbind(genes_use_df[feature %in% c("MPO", "CYSTM1", "EFNA4"), .(contrast, feature)]) %>% 
    rbind(genes_use_df[feature %in% c("CD3G", "CD3E", "IL32"), .(contrast, feature)])

.mat<-objH$markers_celltype_coarse %>% 
    subset(feature %in% genes_use_df$feature) %>% 
    dplyr::select(cluster, logFC, feature) %>% 
    tidyr::spread(cluster, logFC) %>% 
    tibble::column_to_rownames('feature') %>% 
    as.matrix() %>% t() 
.mat<-.mat[, genes_use_df$feature]

message(
    "Are all genes that are being marked also present in the matrix?: ", 
    length(setdiff(genes_to_mark_celltypes$feature, colnames(.mat))) == 0
)

genes_idx<-data.frame(idx = which(colnames(.mat) %in% genes_to_mark_celltypes$feature)) %>% 
    mutate(feature = colnames(.mat)[idx])

genes_to_mark_celltypes<-genes_to_mark_celltypes %>% left_join(genes_idx, by = "feature")


names(cols)<-char(levels(objH$metadata$celltype.coarse))
message("Colors being used: ")
print(cols)
message("Check if any cell types above are not getting a color because of a smaller palette")


fig.size(5, 15)
ha<-columnAnnotation(
    col_ann = anno_mark(
        at = genes_to_mark_celltypes$idx,
        side = "bottom",
        labels = genes_to_mark_celltypes$feature)
    )

ha2<-columnAnnotation(show_legend = FALSE,
    celltypes = genes_use_df$contrast,
    col = list(celltypes = cols)
    )

# ha2<-columnAnnotation(
#     celltypes = anno_block(
#         gp = gpar(fill = col_pal), 
#         labels = genes_use_df$contrast %>% unique

# ))
split<-genes_use_df$contrast
colnames(.mat)<-NULL
.mat %>% 
    Heatmap(
        name = 'logFC',
        col = circlize::colorRamp2(c(0, 0.1, 3), c('white', 'white', muted('blue'))),
        # column_names_gp = grid::gpar(fontsize = 10),
        row_dend_side = "right",
        cluster_rows = FALSE,
        cluster_columns = FALSE,
        top_annotation = ha2, 
        # row_km = 12,
        bottom_annotation = ha,
        column_split = split
    )

In [None]:
char(levels(objH$metadata$celltype.coarse))


In [None]:
fig.size(5, 25)
.mat<-objH$markers_celltype_coarse %>% 
    subset(feature %in% genes_use_df$feature) %>% 
    dplyr::select(cluster, logFC, feature) %>% 
    tidyr::spread(cluster, logFC) %>% 
    tibble::column_to_rownames('feature') %>% 
    as.matrix() %>% t() 
.mat<-.mat[, genes_use_df$feature]
.mat %>% 
    Heatmap(
        name = 'logFC',
        col = circlize::colorRamp2(c(0, 0.1, 3), c('white', 'white', muted('blue'))),
        column_names_gp = grid::gpar(fontsize = 6),
        row_dend_side = "right",
        cluster_rows = FALSE,
        cluster_columns = FALSE,
        top_annotation = ha2, 
        # row_km = 12,
        # column_title = "Heatmap of markers for coarse-grain cell types", 
        column_split = split
    )


In [None]:
names(objH)

## cache

In [None]:
saveRDS(objH, "./cache/CosMxcoarseGrainharmonyObj_markers.RDS")

In [None]:
objH<-readRDS("./cache/CosMxcoarseGrainharmonyObj_markers.RDS")

In [None]:
objH$markers_celltype_coarse

## glmer heatmap with zscores instead of logFC

In [None]:
# pick signficant genes to show in the heatmap
# arrange the genes such that the gene with max logFC is at the center and values decrease as we move away from center
genes_use_df<-data.table(objH$markers_celltype_coarse)[
    pvalue < 0.05][
    , .SD[order(-zscore)][1, ], by = feature][
    , .SD[order(-logFC)][1:20, ], by = contrast][
    , .SD[!is.na(feature)]][
    , .SD[order(logFC)], by = contrast][
    , ":=" (len = .N, rank = frank(logFC)), by = contrast][
    , .SD[c(seq(1, round(.SD$len[1]/3), 1), seq(.SD$len[1], round(.SD$len[1]/3)+1, -1)), ], by = contrast][
    , .(feature, logFC, zscore, contrast, pvalue) # the genes in this list will be unique because I assigned one z-score to each gene
]

message("Checking that the genes are unique: ", nrow(genes_use_df) == length(unique(genes_use_df$feature)))
message("Number of genes in heatmap: ", nrow(genes_use_df))

# select the top 3 genes with highest logFC in each cluster and assign each gene to a cell type based on it logFC - the gene belongs to the celltype with higher logFC
genes_to_mark_celltypes<-genes_use_df[, .SD[order(-logFC)][1:3, ], by = contrast][
    , .(contrast, feature)
]

.mat<-objH$markers_celltype_coarse %>% 
    subset(feature %in% genes_use_df$feature) %>% 
    dplyr::select(cluster, zscore, feature) %>% 
    tidyr::spread(cluster, zscore) %>% 
    tibble::column_to_rownames('feature') %>% 
    as.matrix() %>% t() 
.mat<-.mat[, genes_use_df$feature]

message(
    "Are all genes that are being marked also present in the matrix?: ", 
    length(setdiff(genes_to_mark_celltypes$feature, colnames(.mat))) == 0
)

genes_idx<-data.frame(idx = which(colnames(.mat) %in% genes_to_mark_celltypes$feature)) %>% 
    mutate(feature = colnames(.mat)[idx])

genes_to_mark_celltypes<-genes_to_mark_celltypes %>% left_join(genes_idx, by = "feature")

message("Colors being used: ")
message("Check if any cell types above are not getting a color because of a smaller palette")


fig.size(5, 15)
ha<-columnAnnotation(
    col_ann = anno_mark(
        at = genes_to_mark_celltypes$idx,
        labels = genes_to_mark_celltypes$feature,
        side = "bottom"
        )
    )

ha2<-columnAnnotation(
    celltypes = genes_use_df$contrast,
    col = list(celltypes = cols)
    )

# ha2<-columnAnnotation(
#     celltypes = anno_block(
#         gp = gpar(fill = col_pal), 
#         labels = genes_use_df$contrast %>% unique

# ))
split<-genes_use_df$contrast
colnames(.mat)<-NULL
.mat %>% 
    Heatmap(
        name = "zscore",
        col = circlize::colorRamp2(c(-20, 1, 20), c('white', 'white', muted('blue'))),
        # column_names_gp = grid::gpar(fontsize = 10),
        row_dend_side = "right",
        cluster_rows = FALSE,
        cluster_columns = FALSE,
        top_annotation = ha2, 
        # row_km = 12,
        # column_title = "Heatmap of markers for coarse-grain cell types", 
        bottom_annotation = ha,
        column_split = split
    )

# Markers in UMAP space

In [None]:
unique(objH$metadata$celltype.coarse)

In [None]:
genes_viz<-genes_to_mark_celltypes$feature

In [None]:
cellids<-objH$metadata %>% subset(!grepl("Doublet", celltype.coarse)) %>% with(cellID)
counts_metadata<-objH$logcpx[
    genes_viz, cellids] %>% 
    t() %>% 
    as.matrix %>% 
    as.data.frame %>% 
    rownames_to_column("cellID") %>% 
    gather(target, geneval, -cellID) %>% 
    left_join(
        genes_to_mark_celltypes %>% dplyr::select(contrast, feature), 
        by = c("target" = "feature")
    ) %>%  
    mutate(celltypes = paste0(target, " (", contrast, ")")) %>% 
    identity()

In [None]:
head(counts_metadata)

In [None]:
markers_umap

In [None]:
fig.size(10, 10)
p<-markers_umap(
    dim_red_embeddings = objH$Humap$embedding[cellids, ],
    metadata = counts_metadata, 
    cell_id_colname = "cellID", 
    color_by = "geneval", 
    plot_title = "Markers in UMAP space",
    dim_red_type = "UMAP"
) 

## order the facets based on cell types

In [None]:
fig.size(12, 15)
p + facet_wrap(~ contrast + target, labeller = labeller(celltypes = (label_wrap_gen(10))))

In [None]:
genes <- as.data.frame(rownames(objH$counts))

In [None]:
options(repr.matrix.max.rows=1000, repr.matrix.max.cols=200)
genes

In [None]:
genes_viz<-c("IL15","IL2RA","IL2RB","IL2RG","IL15RA")


In [None]:
objH$metadata

In [None]:
dimlist(objH)

In [None]:
objH$logcpx[
    genes_viz, colnames(objH$logcpx) %in% objH$metadata$cellID
    ] %>% 
    t() %>% 
    as.matrix %>% 
    as.data.frame %>% 
    rownames_to_column("cellID")

In [None]:
counts_metadata<-objH$logcpx[
    genes_viz, colnames(objH$logcpx) %in% objH$metadata$cellID
    ] %>% 
    t() %>% 
    as.matrix %>% 
    as.data.frame %>% 
    rownames_to_column("cellID") %>% 
    gather(target, geneval, -cellID) %>% 
    identity() %>% arrange(geneval) %>% left_join(objH$metadata %>% dplyr::select(cellID, celltype.coarse), by = "cellID")

In [None]:
counts_metadata

In [None]:
fig.size(10, 20)
ggplot(counts_metadata, aes(x=celltype.coarse, y=geneval)) + 
  geom_violin() + facet_grid(~target) + theme(axis.text.x=element_text(angle=90))  +stat_summary(fun.y=mean, geom="point", size=2, color="red")

In [None]:
markers_umap <- function (dim_red_embeddings, metadata, cell_id_colname, color_by, 
    label_by, plot_title, dim_red_type, legend_posn = "right", 
    shape_points = ".", size_points = 0.1) 
{
    plt_df <- dim_red_embeddings[, 1:2] %>% as.data.frame %>% 
        purrr::set_names("V1", "V2") %>% rownames_to_column(cell_id_colname) %>% 
        left_join(metadata %>% dplyr::select(-starts_with("V")), 
            by = cell_id_colname, multiple = "all") %>% sample_frac(1) %>% arrange(geneval)
    range_col <- plt_df %>% with(geneval) %>% range
    p <- plt_df %>% ggplot() + geom_point(aes(V1, V2, color = geneval), 
        shape = shape_points) + ggtitle(plot_title) + scale_color_gradient2_tableau(limits = c(-range_col[2], 
        range_col[2])) + xlab(paste0(dim_red_type, "_1")) + ylab(paste0(dim_red_type, 
        "_2")) + theme(plot.title = element_textbox_simple()) + 
        theme(legend.position = legend_posn) + facet_wrap(~target) + 
        NULL
}

In [None]:
fig.size(10, 10)
p_adip<-markers_umap(
    dim_red_embeddings = objH$Humap$embedding,
    metadata = counts_metadata, 
    cell_id_colname = "cellID", 
    color_by = "geneval", 
    plot_title = "Markers in UMAP space",
    dim_red_type = "UMAP"
) 

In [None]:
fig.size(7, 15)
p_adip

# Coarse cell types in space

In [None]:
cellgeoms<-readRDS("/datastore/lucy/CosMx/cache/raw/Glasgow_cellgeoms_QC.RDS")

In [None]:
cellgeoms<-cellgeoms %>%
    left_join(objH$metadata %>% dplyr::select(cellID, celltype.coarse), by = "cellID")

In [None]:
cellgeoms <- cellgeoms %>% 
    unite("SampleFOV", c("SampleID", "FOV"), remove = FALSE)

In [None]:
cellgeoms <- merge(cellgeoms, sampleFOVcondition, all=FALSE)

In [None]:
cellgeoms$SampleFOV <- paste0(cellgeoms$Condition,"_",cellgeoms$SampleFOV)

In [None]:
unique(cellgeoms$celltype.coarse)

In [None]:
head(cellgeoms %>% subset(is.na(celltype.coarse)))

In [None]:
names(cols) <- levels(cellgeoms$cell.coarse)

In [None]:
library(ggdark)
options(repr.plot.width=6, repr.plot.height=6)
cellgeoms %>% 
    with(SampleFOV) %>%  
    unique %>% 
    # head(1) %>% 
    purrr::map(~ cellgeoms %>% 
        # subset(!grepl("Doublet*|Low_Quality", celltype.coarse)) %>% 
        subset(SampleFOV %in% .x) %>% 
        ggplot() + 
            geom_sf(aes(geometry = shape, fill = celltype.coarse), na.rm=TRUE,linewidth = 0.1) + 
            scale_fill_manual(values = cols, na.value = "black", na.translate = TRUE) + 
            ggtitle(.x) + dark_theme_classic() 
    )

# Cell fractions across samples and FOVs

In [None]:
sampleFOVcondition <- read.table("/datastore/lucy/CosMx/cache/SampleFOV-condition.csv", header=TRUE, sep=",")

In [None]:
objH$metadata <- objH$metadata %>% inner_join(sampleFOVcondition, by = "SampleFOV")

In [None]:
fig.size(8, 4)
.vals<-unique(objH$metadata$celltype.coarse)
.vals<-.vals[!is.na(.vals)]

objH$metadata %>% 
    with(table(celltype.coarse, Condition)) %>% 
    prop.table(margin = 2) %>% 
    as.data.frame %>% 
    ggplot() + 
        geom_col(aes(x = Condition, y = Freq, fill = celltype.coarse)) + 
        scale_fill_manual(values = cols) +
        guides(x =  guide_axis(angle = 90)) + theme_powerpoint() + theme(axis.text=element_text(size=20), axis.title=element_text(size=30))


In [None]:
fig.size(5, 5)

cellgeoms %>% 
    unite("SampleFOV", c("SampleID", "FOV"), remove = FALSE) %>% 
    with(table(SampleID, SampleFOV, celltype.coarse)) %>% 
    prop.table(margin = 1) %>% 
    as.data.frame %>% 
    ggplot() + 
        geom_col(aes(SampleID, Freq, fill = celltype.coarse)) + 
        scale_fill_manual(values = cols) + 
        theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

## cell fractions across FOVs in each of the three slides

In [None]:
fig.size(10, 10)
cellgeoms %>% 
    unite("SampleFOV", c("SampleID", "FOV"), remove = FALSE) %>% 
    subset(SampleID %in% "Run5654_399G0_32G77") %>% 
    with(table(SampleID, SampleFOV, celltype.coarse)) %>% 
    prop.table(margin = 2) %>% 
    as.data.frame %>% 
    ggplot() + 
        geom_col(aes(SampleFOV, Freq, fill = celltype.coarse)) + 
        scale_fill_manual(values = cols) + 
        theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) + 
        ggtitle("Fraction of cells in each FOV")

In [None]:
fig.size(10, 10)
cellgeoms %>% 
    unite("SampleFOV", c("SampleID", "FOV"), remove = FALSE) %>% 
    subset(SampleID %in% "Run5666_G2697_G1532") %>% 
    with(table(SampleID, SampleFOV, celltype.coarse)) %>% 
    prop.table(margin = 2) %>% 
    as.data.frame %>% 
    ggplot() + 
        geom_col(aes(SampleFOV, Freq, fill = celltype.coarse)) + 
        scale_fill_manual(values = cols) + 
        theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) + 
        ggtitle("Fraction of cells in each FOV")

In [None]:
fig.size(10, 10)
cellgeoms %>% 
    unite("SampleFOV", c("SampleID", "FOV"), remove = FALSE) %>% 
    subset(SampleID %in% "Run5666_G7G72_G22G5") %>% 
    with(table(SampleID, SampleFOV, celltype.coarse)) %>% 
    prop.table(margin = 2) %>% 
    as.data.frame %>% 
    ggplot() + 
        geom_col(aes(SampleFOV, Freq, fill = celltype.coarse)) + 
        scale_fill_manual(values = cols) + 
        theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) + 
        ggtitle("Fraction of cells in each FOV")

## Add in finetyping results

In [None]:
integrated.tcell <- readRDS("./cache/integrated.tcell.rds")
integrated.fibro <- readRDS("./cache/integrated.fibro.RDS")
integrated.myeloid <- readRDS("./cache/integrated.myeloid.RDS")

In [None]:
integrated.fibro$metadata$type_harmony <- integrated.fibro$metadata$type_harmony_clean

In [None]:
integrated.tcell$metadata$type_harmony <- paste0("Tcell_",integrated.tcell$metadata$type_harmony)
integrated.myeloid$metadata$type_harmony <- paste0("Myeloid_",integrated.myeloid$metadata$type_harmony)
integrated.fibro$metadata$type_harmony <- paste0("Fibro_",integrated.fibro$metadata$type_harmony)

In [None]:
head(integrated.tcell$metadata)
head(integrated.fibro$metadata)
head(integrated.myeloid$metadata)

In [None]:
head(objH$metadata)

In [None]:
fine.meta <- objH$metadata %>% left_join(
    rbind(integrated.tcell$metadata %>% dplyr::select(cellID, type_harmony),
          integrated.myeloid$metadata %>% dplyr::select(cellID, type_harmony),
          integrated.fibro$metadata %>% dplyr::select(cellID, type_harmony)
    ), 
    by = "cellID"
)

In [None]:
head(fine.meta)

In [None]:
objH$metadata <- fine.meta

In [None]:
#Replace NA values for celltypes not finetyped
objH$metadata$type_harmony <- ifelse(is.na(objH$metadata$type_harmony), objH$metadata$celltype.coarse, objH$metadata$type_harmony)

In [None]:
saveRDS(objH, "./cache/CosMxcoarseGrainharmonyObj_markers.RDS")

In [None]:
objH <- readRDS("./cache/CosMxcoarseGrainharmonyObj_markers.RDS")

In [None]:
objH

In [None]:
objH$metadata

# Fine cell types in space

In [None]:
library(sf)

In [None]:
cellgeoms<-readRDS("/datastore/lucy/CosMx/cache/raw/Glasgow_cellgeoms_QC.RDS")

In [None]:
head(cellgeoms)

In [None]:
cellgeoms<-cellgeoms %>%
    left_join(objH$metadata %>% dplyr::select(cellID, type_harmony, celltype.coarse), by = "cellID")

In [None]:
sampleFOVcondition <- read.table("/datastore/lucy/CosMx/cache/SampleFOV-condition.csv", header=TRUE, sep=",")

In [None]:
cellgeoms <- cellgeoms %>% 
    unite("SampleFOV", c("SampleID", "FOV"), remove = FALSE)

In [None]:
cellgeoms <- merge(cellgeoms, sampleFOVcondition, all=FALSE)

In [None]:
cellgeoms$SampleFOV <- paste0(cellgeoms$Condition,"_",cellgeoms$SampleFOV)

In [None]:
unique(cellgeoms$type_harmony_clean)

In [None]:
clusters.of.interest <- c("Myeloid_CD63+ DC3","Myeloid_CLEC10A+ STM","Myeloid_CLEC9A+ DC1","Myeloid_DC2","Myeloid_CCR7+ migDC",
                  "Tcell_CCL5+CXCR6+MAF+ TPH","Tcell_MAF+ TPH","Tcell_RORA+CD4+ TEM","Tcell_HSP+CD4+ TEM",
                 "Tcell_ANXA1+CD4+ TEM","Tcell_CCR7+LTB+CD4+ TCM","Tcell_CCR7+SELL+CD4+ Naive","Tcell_FOXP3+ Treg")

In [None]:
cellgeoms$type_dct <- ifelse(cellgeoms$type_harmony==clusters.of.interest, cellgeoms$type_harmony, NA)

In [None]:
num.clust <- length(unique(cellgeoms$type_dct))

In [None]:
# cols <- tableau_color_pal('Tableau 20')(num.clust)
cols <- rainbow(n=num.clust)
names(cols) <- unique(cellgeoms$type_dct)

In [None]:
library(ggdark)
options(repr.plot.width=10, repr.plot.height=10)
cellgeoms %>% 
    with(SampleFOV) %>%  
    unique %>% 
    # head(1) %>% 
    purrr::map(~ cellgeoms %>% 
        subset(SampleFOV %in% .x) %>% 
        ggplot() + 
            geom_sf(aes(geometry = shape, fill = type_dct), na.rm=TRUE,linewidth = 0.1) + 
            scale_fill_manual(values = cols, na.value = "black", na.translate = TRUE) + 
            ggtitle(.x) + dark_theme_classic() 
    )