In [1]:
source("../Scripts/summarize_functions.r")
source("../Scripts/functions.R")
source("../Scripts/visulizations.r")
library(dplyr)
library(purrr)
library(ggplot2)
 library("RColorBrewer")
source("../Scripts/weighted_bootstrapping.r")


Attaching package: ‘dplyr’


The following objects are masked from ‘package:stats’:

    filter, lag


The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union




In [2]:
celltypes = c("Cytotoxic T cell", "CD4+ T cell", "CD14+ monocyte", "B cell", "Megakaryocyte",
              "Natural killer cell", "CD16+ monocyte", "Dendritic cell",
              "Plasmacytoid dendritic cell")

methods <- c("Seurat", "SingleR","CellID", "SingleCellNet", "ItClust")  
sizes <- c(3090, 2418, 1373, 1022, 703, 623, 273, 126, 38)
names(sizes) <- celltypes


query <- read.csv("../Data/Fulldata/PBMC_Query/meta.csv")
folder <- "../Data/Predictions/"
name <- "PBMC10x"

In [3]:
itclust <- get_results_method(paste(sep="/",folder,"ItClust" ), name, "ItClust")
itclust <- adjust_names(itclust, "ItClust")
itclust <- merge(itclust, query, by=c("id"), all=T)
itclust[is.na(itclust)] <- "unassigned"


[1] "Start ItClust ..."


In [4]:
seurat <- get_results_method(paste(sep="/",folder,"Seurat"), name, "Seurat")
scn <- get_results_method(paste(sep="/",folder,"SingleCellNet" ), name, "SingleCellNet")
singleR <- get_results_method(paste(sep="/",folder,"SingleR" ), name, "SingleR")
cellid <- get_results_method(paste(sep="/",folder,"CellID" ), name, "CellID")
data <- list(cellid, seurat, scn, singleR, itclust) %>% reduce(full_join, by = "id")#

rownames(data) <- data$id
nrow(data)

[1] "Start Seurat ..."
[1] "Start SingleCellNet ..."
[1] "Start SingleR ..."
[1] "Start CellID ..."


In [5]:
data <- data[!is.na(data$class),]
nrow(data)

In [6]:
long <- make_long(data, celltypes, methods, sizes,
                  c("id", "class_", "nGene", "nUMI", "Cluster","Experiment", "Method", 'percent.mito'))  
long$predicted[is.na(long$predicted)] <- "unassigned"

“[1m[22mExpected 5 pieces. Additional pieces discarded in 39285879 rows [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].”


In [7]:
full <- do.call(rbind, lapply(unique(long$class),
                                  function(type) do.call(rbind,lapply(unique(long$Approach),
                                  function(method)  get_measures(long[long$Genes %in% c(1000,0),],
                                                                 type,
                                                                 "PBMC10x", method,
                                                                 3090, 0)))))

write.csv(full, "../Results/Files/values_full.csv")
                                                                    

In [None]:
all <- long[long$Size %in% c(38,100,250, 500,1000,1500,2000,3000),]
all <- do.call(rbind, lapply(unique(long$class),
                                 function(type) do.call(rbind,lapply(unique(long$Approach),
                                  function(method) do.call(rbind, lapply(unique(long$Size),
                                  function(size) do.call(rbind, lapply(unique(long$Set),
                                  function (set) get_measures(all, type, "PBMC10x",
                                                             method, size,set)))))))))
                                                                       
print(head(all))                                                             
write.csv(all, "../Results/Files/values_all.csv") 

In [None]:
get_measures <- function(data, type, ref, method, size, set, genes){
 
    data <- data[data$Reference == ref & data$Genes == genes & 
                 data$Approach == method & data$Size == size & data$Set == set,] #
    print(paste(type, ref, method, size,set,genes,  nrow(data)))
    tp <- length(data$predicted[data$predicted == type & data$class == type])
    fp <- length(data$predicted[data$predicted == type & data$class != type])
    fn <- length(data$predicted[data$predicted != type & data$class == type])
    tn <- length(data$predicted[data$predicted != type & data$class != type])
    precision <- tp / (tp + fp)
    recall <- tp / (tp + fn)
    f1 <- 2*(precision * recall) / (precision + recall)
    accuracy <- (tp) / length(data$predicted[data$class == type])
   
    return(data.frame("class"=type,"reference"=ref,"method"=method,"size"=size,"set"=set, 
                      "genes"= genes,"precision"=precision,"recall"=recall,"f1"=f1,
                      "accuracy"=accuracy))
}

geneset <- do.call(rbind, lapply(c(1000,200,2000), 
                                  function(genes) do.call(rbind,lapply(unique(long$class),
                                  function(type) do.call(rbind,lapply(unique(long$Approach[long$Approach != "ItClust"]),
                                  function(method)  get_measures(long,
                                                                 type,
                                                                 "PBMC10x", method,
                                                                 3090, 0, genes)))))))

write.csv(geneset, "../Results/Files/values_geneset.csv")
                                                                      
                                                                      

In [None]:
full1 <- full[, c("class", "method", "precision", "recall", "f1", "accuracy")]
colnames(full1) <-  c("class", "method", "full_precision", "full_recall", "full_f1",
                     "full_accuracy")


In [None]:
query <- read.csv("../Data/Fulldata/PBMC_Query//meta.csv")


In [None]:
mono <- get_summary("../Data/Predictions/",
                    "../Data/Fulldata/PBMC10x_Reference/meta.csv",
                    "../Results/Files/", query, "mono", celltypes, methods, seq(1,20,1),
                    full1,  pattern="PBMC10x")


In [None]:
unique(full1$reference)

In [None]:

mosaic <- get_summary("../Data/Predictions/",
                    "../Data/Fulldata/PBMCMosaic_Reference//meta.csv",
                    "../Results/Files/", query,"mosaic", celltypes, methods, seq(1,20,1),
                      full1, pattern="PBMCMosaic")

In [None]:
mono1 <- mono[mono$reference == "PBMC10x",]
mono1 <- mono1[, c("id", "method", "accuracy")]
mosaic1 <- mosaic[mosaic$reference == "PBMCMosaic",]
mosaic1 <- mosaic1[, c("id", "method", "accuracy")]
nrow(mono1)
nrow(mosaic1)

In [None]:

full <- data[, stringr::str_detect(colnames(data), "3090")]

full$class_ <- data$class_
full$id <- data$id
full$tech <- data$Method
full <- reshape2::melt(full,id=c("class_", "id", "tech"), value.name = "predicted")
full$score <-  full$class_ == full$predicted
full$score[full$score == TRUE] <- 1

full[c('reference', 'method', "size", "set", "genes")] <- stringr::str_split_fixed(full$variable, '_', 5)
unique(full$method)

head(full[full$method == "ItClust",])
full <- full[full$size == 3090 & full$set==0 & full$genes %in% c("1000.txt", "0"),]
unique(full$method)

full <- full[, c("id", "class_", "method", "score", "predicted", "tech")]


In [None]:
mosaic1$accuracy[is.na(mosaic1$accuracy)]<- 0
mono1$accuracy[is.na(mono1$accuracy)]<- 0

In [None]:
unique(full$method)

In [None]:
mosaic_umap <- tidyr::pivot_wider(mosaic1, names_from = c(method), values_from = accuracy,
                             names_prefix="mosaic_")
mono_umap <- tidyr::pivot_wider(mono1, names_from = c(method), values_from = accuracy,
                             names_prefix="mono_")




pred <- merge(mosaic_umap, mono_umap, all=TRUE,by=c("id") )
head(pred)

full1_umap <- tidyr::pivot_wider(full[, colnames(full) != "predicted"], names_from = c(method),
                             values_from = score,
                             names_prefix="full_")
nrow(full1_umap)
full2_umap <- tidyr::pivot_wider(full[, colnames(full) != "score"], names_from = c(method),
                             values_from = predicted,
                             names_prefix="fullPred_")
nrow(full2_umap)

full1_umap$class_ <- NULL
full1_umap$tech <- NULL

In [None]:

umap <- Reduce(function(x, y) merge(x, y, all=TRUE,by=c("id") ),
               list(mosaic_umap, mono_umap, full1_umap, full2_umap))
head(umap)


In [None]:

write.table(umap, "../Results/Files/umap_data.csv", sep=",")