In [None]:
suppressMessages({
    library(Seurat)
    library(dplyr)
    library(ggplot2)
    library(patchwork)
    library(rlang)
    library(UpSetR)
    library(Seurat)
    library(UCell)
    library(ggExtra)
    library(fgsea)
    library(DOSE)
    library(org.Hs.eg.db)
    library(tidyverse)
    library(SeuratObject)
    library(DropletUtils)
    library(Ckmeans.1d.dp)
#   library("leiden")
    library(igraph)
    library(tidyverse)
    library(patchwork)
    library(viridis)
    library(Seurat)
    library(scCustomize)
    library(qs)
    library(ggpubr)
    library(clustree)
    library("ggraph")
    library("Nebulosa")
    library(ggalluvial)
    library("MAST")
    library("pheatmap")
    library("ape")
    library("glmGamPoi")
    library(EnhancedVolcano)
    library(wesanderson)
    library(ArchR)
    library(ComplexHeatmap)
})

In [None]:
#generate violin plots colored by fraction of cells
violin1 <- function(
    obj,
    feature,
    assay="SCT",
    cell_group_col="CellClassCD4CD8.perCluster",
    groups_to_compare # char vector of cluster IDs
)
{
    vec2str <- function(x) # best solution? no. works? yes.
    { # c(1,2,3) -> "c(1,2,3)"
        return(paste0(
            "c(", trimws(
                paste0(x,sep=",",collapse = ""),
                which="right",whitespace=","
            ),")"
        ))
    }
   
    build_df <- function(name, bcs)
    {
        xv <- as.numeric(obj@assays[[assay]]@data[feature, bcs])
#         xv <- obj@meta.data[bcs, feature], use this for ModuleScores
        return(data.frame(
            value = xv, variable = name,
            frac=length(which(xv > 0))/length(xv)
        ))
    }
   
    # Extract required info
    d    <- list()
   
    for (g in groups_to_compare)
    {
        grps.bcs <- rownames(filter(
            obj@meta.data,
            !! rlang::parse_expr(
                sprintf("%s == %s", cell_group_col, "g"))
        ))
        d[[g]] <- build_df(name=g, bcs=grps.bcs)
    }
    return(purrr::reduce(d, rbind))
}

In [None]:
#generate violin plots for mean expression
violin2 <- function(
    obj,
    feature,
    assay="RNA",
    cell_group_col="seurat_clusters",
    groups_to_compare # char vector of cluster IDs
)
{
    vec2str <- function(x) # best solution? no. works? yes.
    { # c(1,2,3) -> "c(1,2,3)"
        return(paste0(
            "c(", trimws(
                paste0(x,sep=",",collapse = ""),
                which="right",whitespace=","
            ),")"
        ))
    }
   
    build_df <- function(name, bcs)
    {
        xv <- as.numeric(obj@assays[[assay]]@data[feature, bcs])
#         xv <- obj@meta.data[bcs, feature], use this for ModuleScores
        return(data.frame(
            value = xv, variable = name,
            mean_exp =mean(xv)
        ))
    }
   
    # Extract required info
    d    <- list()
   
    for (g in groups_to_compare)
    {
        grps.bcs <- rownames(filter(
            obj@meta.data,
            !! rlang::parse_expr(
                sprintf("%s == %s", cell_group_col, "g"))
        ))
        d[[g]] <- build_df(name=g, bcs=grps.bcs)
    }
    return(purrr::reduce(d, rbind))
}

In [None]:
save_result_generic <- function(args)
{
    if (args[["type"]] == "table") {
        return(-1)
    }
    
    if (args[["type"]] == "plot") {
        pdf(sprintf("%s/%s.pdf", IMG_OUT, args[["title"]]), 
        width=args[["width"]], height=args[["height"]])
        print(args[["FUN"]]) # expression to plot: DimPlot() or such
        dev.off()
    }
}

In [None]:
parse_marker_gmt <- function(gmt.file, trim_first=F)
{
	if(!trim_first) {
		markerlist        <- strsplit(readLines(gmt.file), "\t")
		names(markerlist) <- sapply(markerlist, head, 1)
		markerlist        <- lapply(markerlist, tail, -1)
		return(markerlist)
	} else {
		markerlist        <- strsplit(readLines(gmt.file), "\t")
		names(markerlist) <- sapply(markerlist, head, 1)
		markerlist        <- lapply(markerlist, tail, -2)
		return(markerlist)
	}
}

In [None]:
display_proportion <- function(seuobj, group_by, check_proportion_of, palette=NULL, method="prop")
{
    M <- seuobj@meta.data
    percent_fraction <- function(select_group, member, method) {
        n <- M[(M[, group_by] == select_group) & ((M[, check_proportion_of] == member)),] %>%
        nrow()
        d <- M[(M[, group_by] == select_group),] %>% nrow()
        p <- round((n/d), 2)
	if (method == "prop") {
		# cat(sprintf("a"))
		return(p)
	}
        if (method == "freq") {
        	# cat(sprintf("b"))
        	return(n)
        }
    }
    
    unique_groups <- unique(M %>% pull(group_by))
    unique_member <- unique(M %>% pull(check_proportion_of))
    unique_member <- unique_member[!is.na(unique_member)]
    
    grp <- c(); mbr <- c(); prop <- c()

    for (g in unique_groups) {
        for (m in unique_member) {
            grp <- c(grp, g)
            mbr <- c(mbr, m)
            prop <- c(
                prop, 
                as.numeric(percent_fraction(select_group=g, member=m, method=method))
            )
        }
    }
    
    cfreq <- as.data.frame(cbind(grp, mbr, prop))
    colnames(cfreq) <- c(group_by, check_proportion_of, "Proportion")
    cfreq$Proportion <- as.numeric(cfreq$Proportion)

    
    # change aes_string() to aes() for normal behaviour
    if (!is.null(palette))
    {
    	if (method=="prop") {
    		# cat(sprintf(method))
	        P <- ggplot(cfreq, aes_string(fill=check_proportion_of, y="Proportion", x=group_by)) + 
	        geom_bar(position="fill", stat="identity") + scale_fill_manual(values=palette)
	        # scale_x_discrete(limits=factor(0:16)) + # uncomment and manually add the cluster info here if you want them sorted
	        theme_minimal() +  theme(text = element_text(size=20), axis.text.x = element_text(size=20, angle=90)) +
	        ylab("Proportion") + xlab(group_by)
    	}
    	
    	if (method=="freq") {
    		# cat(sprintf(method))
	        P <- ggplot(cfreq, aes_string(fill=check_proportion_of, y="Proportion", x=group_by)) + 
	        geom_bar(position="fill", stat="identity") + scale_fill_manual(values=palette)
	        # scale_x_discrete(limits=factor(0:16)) + # uncomment and manually add the cluster info here if you want them sorted
	        theme_minimal() +  theme(text = element_text(size=20), axis.text.x = element_text(size=20, angle=90)) +
	        ylab("Frequency") + xlab(group_by)
    	}
        return(P)
    }
    
    
    	if (method=="prop") {
    		cat(sprintf("A"))
    P <- ggplot(cfreq, aes_string(fill=check_proportion_of, y="Proportion", x=group_by)) + 
    geom_bar(position="fill", stat="identity") +  
    # scale_x_discrete(limits=factor(0:16)) + # uncomment and manually add the cluster info here if you want them sorted
    theme_minimal() +  theme(text = element_text(size=20), axis.text.x = element_text(size=20, angle=90)) +
    ylab("Proportion") + xlab(group_by)
    	}
    	
    	if (method=="freq") {
    		cat(sprintf("B"))
    P <- ggplot(cfreq, aes_string(fill=check_proportion_of, y="Proportion", x=group_by)) + 
    geom_bar(position="stack", stat="identity") +  
    # scale_x_discrete(limits=factor(0:16)) + # uncomment and manually add the cluster info here if you want them sorted
    theme_minimal() +  theme(text = element_text(size=20), axis.text.x = element_text(size=20, angle=90)) +
    ylab("Frequency") + xlab(group_by)
    	}
    
    return(P)
}

In [None]:
plot_gating <- function(
    obj,
    bc_subset=c(),
    assay="ADT", 
    features=c(),
    autogate=FALSE, # will ignore the gating vector
    gating  = c(), # x and y,
    ret_quad_bcs = FALSE, # return barcodes falling in each quadrant 
    # clockwise from top left
    highlight_barcodes = c(),
    highlight_col="black"
)
{
    if (length(gating) == 0) {
        autogate <- TRUE
    }
    
    if (length(bc_subset)!=0) {
        obj <- obj[, bc_subset]
        cat(paste(dim(obj)))
    }
    
    # Get names
    X <- features[1]
    Y <- features[2]
    
    if (assay == "RNA") {
        n <- as.data.frame(t(obj@assays[[assay]]@scale.data[c(X,Y),]))
        cn <- colnames(n)
    } else {
        n <- as.data.frame(t(obj@assays[[assay]]@data[c(X,Y),]))
        cn <- colnames(n)
    }
    
    if (autogate) {
        gating <- autogate(t(n), k=2)
    }
    
    output <- list()
    
    if (length(highlight_barcodes) == 0) {
        
        p <- ggplot(n, aes_string(x=cn[1], y=cn[2])) +
            geom_point(aes(col="grey")) + theme_bw()+ geom_density_2d() + 
            theme(legend.position="none", text=element_text(size=20),
            axis.text.x=element_text(size=20)) + 
            geom_hline(yintercept=gating[[Y]],col="blue") +
            geom_vline(xintercept=gating[[X]],col="blue")
        print(ggMarginal(p, type="densigram", groupFill=TRUE))
        
    } else {
        p <- ggplot(n, aes_string(x=cn[1], y=cn[2])) +
            geom_point(aes(col="grey")) + theme_bw() + geom_density_2d() +
            geom_point(data=n[highlight_barcodes,], aes_string(x=cn[1], y=cn[2]), colour=highlight_col) +
            theme(legend.position="none", text=element_text(size=20),
            axis.text.x=element_text(size=20)) +
            geom_hline(yintercept=gating[[Y]],col="blue") +
            geom_vline(xintercept=gating[[X]],col="blue") + geom_density_2d()
        print(ggMarginal(p, type="densigram", groupFill=TRUE))
    }
    
    output[["gating"]] <- gating
    
    Q1 <- n %>% filter((.[[X]] < gating[[X]]) & (.[[Y]] > gating[[Y]])) %>% rownames()
    Q2 <- n %>% filter((.[[X]] > gating[[X]]) & (.[[Y]] > gating[[Y]])) %>% rownames()
    Q3 <- n %>% filter((.[[X]] > gating[[X]]) & (.[[Y]] < gating[[Y]])) %>% rownames()
    Q4 <- n %>% filter((.[[X]] < gating[[X]]) & (.[[Y]] < gating[[Y]])) %>% rownames()
    
    # get barcodes
    output[["Q1"]] <- Q1
    output[["Q2"]] <- Q2
    output[["Q3"]] <- Q3
    output[["Q4"]] <- Q4

    return(output)                         
}

In [None]:
plot_violin_metadata <- function(mdata.df,group_by, column_select)
{
	base::merge(mdata.df, mdata.df %>% group_by(!! rlang::parse_expr(group_by)) %>%
	      	dplyr::summarise(AvgModuleScore = mean(!! rlang::parse_expr(column_select)))) -> WZ
	return(ggplot(WZ, aes(x=!! rlang::parse_expr(group_by), y=!! rlang::parse_expr(column_select))) + geom_violin(aes(fill=AvgModuleScore)) +
		geom_boxplot(width=0.1, outlier.shape=NA) + ylab("Mean module score") + ggtitle(column_select) + 
		scale_fill_gradient2(
			low="#00AAD4",
			high="#FF2A2A",
			midpoint=mean(WZ[, column_select])
			) + theme_classic() + theme(
				axis.title = element_text(size=15),
				axis.text.x = element_text(size=15),
				axis.text.y = element_text(size=15)
				))
}

In [None]:
build_clonotype_table <- function(obj, vdj_anno_path, plots=FALSE)
{
    demux.singlet <- obj@meta.data %>% as.data.frame() %>%
                        rownames_to_column("barcode") %>% 
                        filter(DoubletStatus %in% c("Singlet", "Negative"))
    
    ctype <- read.csv(vdj_anno_path) %>% dplyr::select(barcode, raw_clonotype_id) %>% distinct()

    merge(demux.singlet, ctype, by="barcode", all.x=TRUE) %>%
        add_count(raw_clonotype_id, name="frequency") %>% 
        mutate(expanded = ifelse(frequency > 1, "Yes", "No")) -> dmx.ctype
    
    # return(dmx.ctype)
    
    if (plots)
    {
        warning("Plotting is enabled. Set plot=FALSE to disable.")
        ct.cls <- dmx.ctype %>% dplyr::select(seurat_clusters, raw_clonotype_id, Origin, Donor)
        ct.cls$seurat_clusters <- as.character(ct.cls$seurat_clusters)
        ct.cls$seurat_clusters %>% unique() -> vs

        vlst <- list()
        for (v in vs) {
            vlst[[v]] <- ct.cls[ct.cls$seurat_clusters==v, "raw_clonotype_id"]
        }

        png("../IMG/upset_cluster.png", width=1000, height=900)
        print(upset(
            fromList(vlst), 
            order.by = "freq", 
            text.scale = c(2, 2, 2, 2, 2, 2), 
            point.size=3, 
            line.size=1.5, 
            nsets=17, 
            number.angles=0
        )) #clonotypeID
        dev.off()

        ct.cls$Origin %>% unique() -> idx

        idst <- list()
        for (i in idx) {
            idst[[i]] <- ct.cls[ct.cls$Origin==i, "raw_clonotype_id"]
        }

        png("../IMG/upset_origin.png", width=700, height=500)
        print(upset(
            fromList(idst), 
            order.by="freq", 
            text.scale = c(2, 2, 2, 2, 2, 2), 
            point.size=3,
            line.size=1.5,
            nsets=17, 
            number.angles=0
        )) 
        dev.off()

        idx <- dmx.ctype$Donor

        jdst <- list()
        for (i in idx) {
            jdst[[i]] <- ct.cls[ct.cls$Donor==i, "raw_clonotype_id"]
        }

        png("../IMG/upset_donor.png", width=700, height=600)
        print(upset(
            fromList(jdst), 
            order.by="freq", 
            text.scale = c(2, 2, 2, 2, 2, 2), 
            point.size=3, 
            line.size=1.5, 
            nsets=17, 
            number.angles=0
        )) #clonotypeID
        dev.off()
    }
    
    if (!plots)
    {
	    ctn <- dmx.ctype$raw_clonotype_id %>% 
	           gsub("clonotype", "", .) %>% 
	           as.numeric() %>% sort() %>% unique()
	    
	    clonotypes <- sapply(ctn, FUN=function(x) {return(sprintf("clonotype%d", x))})
	    donors     <- dmx.ctype$Donor %>% unique()
	    origin     <- dmx.ctype$Origin %>% unique()
	    
	    dv <- expand.grid(donors, origin)
	    colnames(dv) <- c("Donor", "Origin")
	    dv$Donor <- as.character(dv$Donor)
	    dv$Origin <- as.character(dv$Origin)
	    
	    nc <- length(clonotypes)
	    
	    ctdata <- list() # clonotype --> vector showing frequency for each D-V combination
	    tmp    <- 1
	    for (ct in clonotypes) {
	        p   <- round((tmp/nc)*100, 2)
	        tmp <- tmp + 1
	        cat(sprintf("%0.2f%% complete\r", p))
	        flush.console()
	        flush.console()
	        ctdata[[ct]] <- c()
	        for (i in 1:nrow(dv)) {
	            dnr <- dv[i, "Donor"]
	            org <- dv[i, "Origin"]
	            ctdata[[ct]] <- c(
	                ctdata[[ct]], 
	                filter(
	                    dmx.ctype, (
	                        (Donor == dnr) & (Origin == org) & (raw_clonotype_id == ct)
	                    )
	                ) %>% nrow())
	        }
	    }
	    
	    ftab <- as.data.frame(t(ctdata %>% data.frame()))
	    colnames(ftab) <- paste(dv$Donor, dv$Origin, sep="_")
	    
	    maccounts <- as.data.frame((as.numeric(rowSums(ftab))))
	    colnames(maccounts) <- "Total"
	    
	    D <- cbind(maccounts, ftab)
	    A <- as.data.frame(t(dv))
	    B <- cbind(c("NA", "NA"), A)
	    colnames(B) <- c("Total", colnames(ftab))
	    E <- rbind(B, D)
	    
	    results <- list (
	        "clonotype_by_barcode" = dmx.ctype,
	        "clonotype_table" = E
	    )
    }
}

In [None]:
print_plots_in_list1 <- function(L, blocksize=5, nrow=6, ncol=1,title="KEGG_VIOLIN")
{
    k <- 1
    i <- 1
    LEN <- length(L)
    repeat 
    {
        if (i > LEN + blocksize) {break}
        j <- i + blocksize
        P <- L[i:j]
        pdf(
            sprintf("%s/%s_%d_%d.pdf", IMG_OUT, title, i, j), 
            width=15, height=30
        )
        print(ggarrange(plotlist=P, nrow=nrow, ncol=ncol))
        dev.off()
        i <- j + 1
        k <- k + 1
    }
}

In [None]:
HMAP_sct <- function(obj, filtered_de_table, subset_barcodes=NULL, cell.groups="seurat_clusters")
{
    if (!is.null(subset_barcodes)) {
        obj <- obj[, subset_barcodes]
    }
    intersect(unique(filtered_de_table$gene),rownames(obj@assays$SCTnewcounts@scale.data)) -> genes.heatmap
    c("Day10_no.cytotox", "Day10_low.cytotox", "Day10_high.cytotox",
      "Day20_no.cytotox", "Day20_low.cytotox", "Day20_high.cytotox")        -> clusters.heatmap 
    heatmap.list <- list()
    for (j in clusters.heatmap) {
        select_cells_by <- sprintf("%s == %s", cell.groups, "j")
        obj@meta.data %>% filter(!! rlang::parse_expr(select_cells_by)) %>% rownames() -> cls.cells
        obj@assays$SCTnewcounts@scale.data[genes.heatmap, cls.cells] %>% rowMeans2(useNames = FALSE) -> heatmap.list[[j]]
    }
    matrix(
        unlist(heatmap.list), 
        ncol=length(clusters.heatmap), nrow=length(genes.heatmap)
    ) %>% t() %>% scale() %>% t() -> mat.hmap
    rownames(mat.hmap) <- genes.heatmap
    colnames(mat.hmap) <- clusters.heatmap
   # return(mat.hmap)
    mat.hmap <- mat.hmap[, c("Day10_no.cytotox", "Day10_low.cytotox", "Day10_high.cytotox",
      "Day20_no.cytotox", "Day20_low.cytotox", "Day20_high.cytotox")]
    col_fun_sct = colorRamp2(c(-2, -1, 0, 1, 2), paletteContinuous(set = "horizonExtra", n = 5, reverse = FALSE))
    col_fun_sct(seq(-3, 3))
    Heatmap(mat.hmap, cluster_columns = T, cluster_rows = T,
       col = col_fun_sct,
       column_names_rot = 90, column_names_centered = T,
       heatmap_legend_param = list(legend_direction = "vertical")
        )
}