In [None]:
library(dplyr)
library(purrr)
getName <- function(folder, position){
  name <- unlist(stringr::str_split(folder,"/"))
  name <- name[length(name)]
  name <- unlist(stringr::str_split(name,"_"))[position]
  return(name)
}
getVector <- function(method, folder){
    if(method %in% c("Seurat", "SCN", "CellID", "SingleR")){
  if(stringr::str_detect(folder, "predict")) stop("Wrong file")
  name <- paste(sep="_", getName(folder,1), method ,getName(folder,2),
                stringr::str_replace(getName(folder,3), ".txt", "") )
  df <- read.csv(folder, sep="\t")
  df <- df[,c("id", "predicted")] #.match
  colnames(df) <- c("id", name)
    
} else if (method == "ItClust"){

  name <- paste(sep="_",  getName(folder,1), "ItClust",getName(folder,2),
                getName(folder,3))
  print(name)
  df <- read.csv(paste(folder, "results.txt", sep="/"), sep="\t")
  df <- df[,c("class_", "predicted_celltype", "cell_id")]
  colnames(df) <- c("class", "predicted", "id")
  df[,name] <- df$predicted # df$class == 
  df <- df[, c("id",  name)]
}
    rownames(df) <- df$id
    return(df)
}

get_results_method <- function(resultfolder, id, method){
    print(paste("Start", method, "..."))
    files <- list.files(resultfolder, pattern=id, full.names = T)
    print(head(files))
    print(length(files))
    if (method == "Seurat") files <- files[stringr::str_detect(files,
                                                               "predict", negate=T)]
    data <- lapply(files, function(file) getVector(method, file))
    summary <- data %>% reduce(full_join, by = "id")
    return(summary)
}
                   
translate <- function(col){
  
    col[col == "B"] <- "B cell"
    col[col == "Cytotoxic T"] <- "Cytotoxic T cell"
    col[col == "CD4+ T"] <- "CD4+ T cell"
    col[col == "Dendritic"] <- "Dendritic cell"
    col[col == "Natural killer"] <- "Natural killer cell"
    col[col == "Plasmacytoid dendritic"] <- "Plasmacytoid dendritic cell"
    return(col)
} 
                   
adjust_names <- function(data, name="ItClust"){
    print(ncol(data))
    cols <- colnames(data)[stringr::str_detect(colnames(data), name)] 
    x<- do.call(cbind,lapply(cols, function(col) translate(data[,col])))
    print(ncol(x))
    colnames(x) <- cols  
    id <- data[,!(colnames(data) %in% cols)]

    data <- cbind(x,id)
    print(ncol(data))
    return(as.data.frame(data))
}
                             
transform_PBMC_results <- function(data_file, celltypes, methods, sizes,
                                   cols= c("id","nGene", "nUMI", "percent.mito", "Cluster", "class_", "Experiment", "Method")){
    data <- read.csv(data_file)
    x <- reshape2::melt(data,  id.vars =cols,value.name = "Prediction")
    
    x[c('Reference', 'Approach', "Size", "Set")] <- stringr::str_split_fixed(x$variable, '_', 4)
    print(head(x))
    x$Match <- x$Prediction == x$class_ 

    x$refSize <- sizes[ match(x$class_, celltypes ) ]
    x$Approach <- factor(x$Approach, levels=methods)
    x$class <- factor(x$class_, levels=celltypes)
    x <- x[,c("id", "Prediction", "Reference", "Approach", "Size", "Set", "class", "refSize", "Match")]
    x$Size <- as.numeric(x$Size)
    x <- x[!is.na(x$Match),]
    return(x)
}                            

In [3]:
folder <- "/fast/AG_Haghverdi/Carla_Moelbert/Celltype_annotation/Data/Predictions/"
metafile <- "/fast/AG_Haghverdi/Carla_Moelbert/Celltype_annotation/Data/Fulldata/PBMC_Query/meta.csv"

In [40]:
seurat <- get_results_method(paste(sep="/",folder,"Seurat/done_PBMC10x"), "PBMC10x", "Seurat")
scn <- get_results_method(paste(sep="/",folder,"SCN/done_PBMC10x" ), "PBMC10x", "SCN")
itclust <- get_results_method(paste(sep="/",folder,"ItClust" ), "PBMC10x", "ItClust")
itclust <- adjust_names(itclust)
singleR <- get_results_method(paste(sep="/",folder,"SingleR" ), "PBMC10x", "SingleR")
cellid <- get_results_method(paste(sep="/",folder,"CellID" ), "PBMC10x", "CellID")
data <- list(cellid, seurat, scn, singleR, itclust) %>% reduce(full_join, by = "id")

meta <- read.csv(metafile)
data <- merge(data,meta)  
rownames(data) <- data$id
head(data)
write.table(data, "../Results/Files/results_PBMC10x.csv", sep=",", col.names=T, row.names=T, quote=F, append=F)

[1] "Start Seurat ..."
[1] "Start SCN ..."
[1] "Start ItClust ..."
[1] 1526
[1] 1525
[1] 1526
[1] "Start SingleR ..."
[1] "Start CellID ..."


Unnamed: 0_level_0,id,PBMC10x_CellID_100_1,PBMC10x_CellID_100_10,PBMC10x_CellID_100_100,PBMC10x_CellID_100_101,PBMC10x_CellID_100_102,PBMC10x_CellID_100_103,PBMC10x_CellID_100_104,PBMC10x_CellID_100_105,PBMC10x_CellID_100_106,⋯,PBMC10x_ItClust_75_7,PBMC10x_ItClust_75_8,PBMC10x_ItClust_75_9,nGene,nUMI,percent.mito,Cluster,class_,Experiment,Method
Unnamed: 0_level_1,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,⋯,<chr>,<chr>,<chr>,<int>,<int>,<dbl>,<int>,<chr>,<chr>,<chr>
pbmc2_10X_V2_AAACCTGAGATGGGTC,pbmc2_10X_V2_AAACCTGAGATGGGTC,B cell,B cell,B cell,B cell,B cell,B cell,B cell,B cell,B cell,⋯,B cell,B cell,B cell,1044,2360,0.04194915,2,B cell,pbmc2,10x
pbmc2_10X_V2_AAACCTGAGCGTAATA,pbmc2_10X_V2_AAACCTGAGCGTAATA,B cell,B cell,B cell,B cell,B cell,B cell,B cell,B cell,CD14+ monocyte,⋯,B cell,B cell,B cell,803,1888,0.04131356,2,B cell,pbmc2,10x
pbmc2_10X_V2_AAACCTGAGCTAGGCA,pbmc2_10X_V2_AAACCTGAGCTAGGCA,Natural killer cell,Natural killer cell,Natural killer cell,Natural killer cell,Natural killer cell,Cytotoxic T cell,Cytotoxic T cell,Cytotoxic T cell,Natural killer cell,⋯,Cytotoxic T cell,Cytotoxic T cell,Cytotoxic T cell,1372,3456,0.03530093,1,Cytotoxic T cell,pbmc2,10x
pbmc2_10X_V2_AAACCTGAGGGTCTCC,pbmc2_10X_V2_AAACCTGAGGGTCTCC,Dendritic cell,CD14+ monocyte,Dendritic cell,Dendritic cell,Dendritic cell,Dendritic cell,Dendritic cell,Dendritic cell,Dendritic cell,⋯,Dendritic cell,Dendritic cell,Dendritic cell,1519,3802,0.04208311,6,Dendritic cell,pbmc2,10x
pbmc2_10X_V2_AAACCTGGTCCGAACC,pbmc2_10X_V2_AAACCTGGTCCGAACC,CD4+ T cell,CD4+ T cell,CD4+ T cell,CD4+ T cell,CD4+ T cell,CD4+ T cell,CD4+ T cell,CD4+ T cell,CD4+ T cell,⋯,Natural killer cell,Natural killer cell,Natural killer cell,1451,3826,0.03711448,0,CD4+ T cell,pbmc2,10x
pbmc2_10X_V2_AAACCTGTCGTCCGTT,pbmc2_10X_V2_AAACCTGTCGTCCGTT,Cytotoxic T cell,Cytotoxic T cell,Plasmacytoid dendritic cell,Natural killer cell,Cytotoxic T cell,CD4+ T cell,CD4+ T cell,Cytotoxic T cell,Cytotoxic T cell,⋯,CD4+ T cell,CD4+ T cell,CD4+ T cell,931,2345,0.0652452,0,CD4+ T cell,pbmc2,10x


In [4]:
seurat <- get_results_method(paste(sep="/",folder,"Seurat"), "PBMCMosaic", "Seurat")
scn <- get_results_method(paste(sep="/",folder,"SCN" ), "PBMCMosaic", "SCN")
itclust <- get_results_method(paste(sep="/",folder,"ItClust" ), "PBMCMosaic", "ItClust")
itclust <- adjust_names(itclust)

singleR <- get_results_method(paste(sep="/",folder,"SingleR" ), "PBMCMosaic", "SingleR")
cellid <- get_results_method(paste(sep="/",folder,"CellID" ), "PBMCMosaic", "CellID")
data <- list(cellid, seurat, scn, singleR, itclust) %>% reduce(full_join, by = "id")
meta <- read.csv(metafile)
data <- merge(data,meta)  
rownames(data) <- data$id
head(data)
write.table(data, "../Results/Files/results_PBMCMosaic.csv", sep=",",
            col.names=T, row.names=T, quote=F, append=F)

[1] "Start Seurat ..."
[1] "Start SCN ..."
[1] "Start ItClust ..."


“cannot open file '/fast/AG_Haghverdi/Carla_Moelbert/Celltype_annotation/Data/Predictions//ItClust/PBMCMosaicBalanced_102_1/results.txt': No such file or directory”


ERROR: Error in file(file, "rt"): cannot open the connection


In [17]:
#seurat <- get_results_method(paste(sep="/",folder,"Seurat"), "Kidney", "Seurat")
#scn <- get_results_method(paste(sep="/",folder,"SCN" ), "Kidney", "SCN")
#itclust <- get_results_method(paste(sep="/",folder,"ItClust" ), "Kidney", "ItClust")
#singleR <- get_results_method(paste(sep="/",folder,"SingleR" ), "Kidney", "SingleR")
#cellid <- get_results_method(paste(sep="/",folder,"CellID" ), "Kidney", "CellID")
data <- list(cellid, seurat, scn, singleR, itclust) %>% reduce(full_join, by = "id")
meta <- read.csv(metafile)
data <- merge(data,meta)  
rownames(data) <- data$id
head(data)
write.table(data, "../Results/Files/results_Kidney.csv", sep=",", col.names=T, row.names=T, quote=F, append=F)

Unnamed: 0_level_0,id,KidneyMouse_CellID_228_10,KidneyMouse_CellID_228_100,KidneyMouse_CellID_228_101,KidneyMouse_CellID_228_102,KidneyMouse_CellID_228_103,KidneyMouse_CellID_228_104,KidneyMouse_CellID_228_105,KidneyMouse_CellID_228_106,KidneyMouse_CellID_228_107,⋯,KidneyMouse_ItClust_228_91,KidneyMouse_ItClust_228_92,KidneyMouse_ItClust_228_93,KidneyMouse_ItClust_228_94,KidneyMouse_ItClust_228_95,KidneyMouse_ItClust_228_96,KidneyMouse_ItClust_228_97,KidneyMouse_ItClust_228_98,KidneyMouse_ItClust_228_99,KidneyMouse_ItClust_26482_0
Unnamed: 0_level_1,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,⋯,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
1,AAACCTGAGGCGCTCT.1_H15T_normal,Macrophage,Macrophage,Macrophage,Macrophage,Macrophage,Macrophage,Macrophage,Macrophage,Macrophage,⋯,Macrophage,Macrophage,Macrophage,Macrophage,Macrophage,Macrophage,Macrophage,Macrophage,Macrophage,Macrophage
2,AAACCTGAGGTTCCTA.1_H15T_normal,T_Cells,T_Cells,Macrophage,T_Cells,T_Cells,T_Cells,T_Cells,T_Cells,Macrophage,⋯,Macrophage,Macrophage,Macrophage,Macrophage,Macrophage,Macrophage,Macrophage,Macrophage,Macrophage,Macrophage
3,AAACCTGAGTCGATAA.1_H15T_normal,Loop of Henle,Distal_Tubules,Distal_Tubules,Loop of Henle,Loop of Henle,Distal_Tubules,Distal_Tubules,Distal_Tubules,Distal_Tubules,⋯,Loop of Henle,Loop of Henle,Distal_Tubules,Distal_Tubules,Loop of Henle,Loop of Henle,Loop of Henle,Loop of Henle,Distal_Tubules,PT
4,AAACCTGCACATCCGG.1_H15T_normal,Macrophage,Macrophage,Macrophage,Macrophage,Macrophage,Macrophage,Macrophage,Macrophage,Macrophage,⋯,NK_Cells,NK_Cells,NK_Cells,NK_Cells,NK_Cells,NK_Cells,NK_Cells,NK_Cells,NK_Cells,NK_Cells
5,AAACCTGCATGTTGAC.1_H15T_normal,CD_IC,PT,PT,PT,PT,PT,PT,PT,PT,⋯,Distal_Tubules,Distal_Tubules,Loop of Henle,Distal_Tubules,Distal_Tubules,Loop of Henle,Distal_Tubules,Loop of Henle,Distal_Tubules,PT
6,AAACCTGGTAGGACAC.1_H15T_normal,T_Cells,T_Cells,T_Cells,Macrophage,T_Cells,Macrophage,Macrophage,T_Cells,T_Cells,⋯,B_Cells,B_Cells,T_Cells,T_Cells,B_Cells,T_Cells,B_Cells,B_Cells,T_Cells,Distal_Tubules


In [None]:
celltypes = c("Cytotoxic T cell", "CD4+ T cell", "CD14+ monocyte", "B cell", "Megakaryocyte", "Natural killer cell",
              "CD16+ monocyte", "Dendritic cell", "Plasmacytoid dendritic cell")
methods <- c("Seurat",  "SingleR","CellID", "SCN", "ItClust")

sizes <- c(3090, 2418, 1373, 1022, 703, 623, 273, 126, 23)
names(sizes) <- celltypes

In [2]:


data <- transform_PBMC_results("../Results//Files//results_PBMC10x.csv", celltypes, methods, sizes)
write.table(data, "../Results/Files/results_PBMC10x_long.csv", sep=",", col.names=T, row.names=T, quote=F, append=F)

                             id nGene nUMI percent.mito Cluster
1 pbmc2_10X_V2_AAACCTGAGATGGGTC  1044 2360   0.04194915       2
2 pbmc2_10X_V2_AAACCTGAGCGTAATA   803 1888   0.04131356       2
3 pbmc2_10X_V2_AAACCTGAGCTAGGCA  1372 3456   0.03530093       1
4 pbmc2_10X_V2_AAACCTGAGGGTCTCC  1519 3802   0.04208311       6
5 pbmc2_10X_V2_AAACCTGGTCCGAACC  1451 3826   0.03711448       0
6 pbmc2_10X_V2_AAACCTGTCGTCCGTT   931 2345   0.06524520       0
            class_ Experiment Method             variable          Prediction
1           B cell      pbmc2    10x PBMC10x_CellID_100_1              B cell
2           B cell      pbmc2    10x PBMC10x_CellID_100_1              B cell
3 Cytotoxic T cell      pbmc2    10x PBMC10x_CellID_100_1 Natural killer cell
4   Dendritic cell      pbmc2    10x PBMC10x_CellID_100_1      Dendritic cell
5      CD4+ T cell      pbmc2    10x PBMC10x_CellID_100_1         CD4+ T cell
6      CD4+ T cell      pbmc2    10x PBMC10x_CellID_100_1    Cytotoxic T cell
  Refe

In [None]:
data <- transform_PBMC_results("../Results//Files//results_PBMCMosaic.csv", celltypes, methods, sizes)
write.table(data, "../Results/Files/results_PBMCMosaic_long.csv", sep=",", col.names=T, row.names=T, quote=F, append=F)