# load data

In [None]:
objList <- list.files('/project/sex_cancer/data/data_zenodo', pattern = 'obj', full.names = TRUE)
objList
length(objList)

In [None]:
seuratList <- lapply(objList, function(x){readRDS(x)})
names(seuratList) <- objList %>% gsub('/project/sex_cancer/data/data_zenodo/obj.', '', .) %>% gsub('.rds', '', .)

# extract intersect genes

In [None]:
geneList <- lapply(seuratList, function(x){rownames(x)})
geneList_all <- geneList %>% ext_list() %>% unique() 
length(geneList_all) ## 65547 genes
geneList_freq13 <- geneList %>% unlist %>% table() %>% as.data.frame() %>% subset(Freq == 13) %>% .[,1] %>% ext_list() 
length(geneList_freq13) ## 13414 genes

# tumor cells integration

In [None]:
seuratList_name <- names(seuratList)
seuratList_name

## filter cells & genes (retain only tumor cells)

In [None]:
seuratList <- lapply(seuratList, function(obj){
                        obj %>% subset(gCT == 'Tumor') %>% subset(SampleType == 'tumor') %>% subset(feature = geneList_freq13)
                     })
names(seuratList) <- seuratList_name
seuratList
lapply(seuratList, function(x){ncol(x)}) %>% do.call(sum, .)
seurat_TumorCell <- merge(seuratList[[1]], seuratList[-1])

## run UMAP

In [None]:
seurat_TumorCell <- seurat_TumorCell %>%
                     NormalizeData(normalization.method = "LogNormalize", scale.factor = 10000, verbose = F) %>%
                     FindVariableFeatures(selection.method = "vst", nfeatures = 1000, verbose = F) %>%
                     ScaleData(vars.to.regress = c("nCount_RNA"), verbose = F) %>%
                     RunPCA(verbose = F)

options(repr.plot.height = 3, repr.plot.width = 6)
DimPlot(object = seurat_TumorCell, reduction = "pca", group.by = "Cohort", cols = paletteer_d("ggsci::default_igv"))

## run harmony
library(harmony)
seurat_TumorCell <- seurat_TumorCell %>% 
                         RunHarmony(group.by.vars = "Cohort", plot_convergence = TRUE)
## cluster
nPC <- min(PC_selection_harmony(seurat_TumorCell)$PCselect)
seurat_TumorCell <- seurat_TumorCell %>% 
                         RunUMAP(reduction = "harmony", dims = 1:nPC, umap.method = "uwot") %>%
                         RunTSNE(reduction = "harmony", dims = 1:nPC)
colnames(seurat_TumorCell@meta.data)

In [None]:
options(repr.plot.height = 5, repr.plot.width = 22) 
select <- 'umap'
DimPlot_scCustom(seurat_TumorCell, pt.size = .1, group.by = "gCT", reduction = select, label = F, label.size = 4, colors_use = pal_igv("default")(51))|
DimPlot_scCustom(seurat_TumorCell, pt.size = .1, group.by = "mCT", reduction = select, label = TRUE, label.size = 4, colors_use = pal_igv("default")(51))|
DimPlot_scCustom(seurat_TumorCell, pt.size = .1, group.by = "Cohort", reduction = select, label = F, label.size = 4, colors_use = pal_igv("default")(51))|
DimPlot_scCustom(seurat_TumorCell, pt.size = 1, group.by = "Sex", label = TRUE, label.size = 4, colors_use = pal_igv("default")(51))

# malignancy score calculation

In [None]:
obj <- seurat_TumorCell %>% SplitObject(split.by = "Cohort")
obj

In [None]:
# code source: https://github.com/czythu/scCancer/blob/master/vignettes/malignantCellIden.Rmd
scCancer_malignancy <- function(object){
                            model.path <- paste0(system.file("txt", package = "scCancer"), "/sc_xgboost.model")
                            genes.path <- paste0(system.file("txt", package = "scCancer"), "/genes-scRNA-tcga-sorted.txt")
                            model.ref <- xgb.load(model.path)
                            
                            features <- as.list(read.table(genes.path))[[1]]
                            testdata <- t(as.matrix(object@assays$RNA@scale.data))

                            temp <- matrix(data = 0, nrow = nrow(testdata), ncol = length(features), dimnames = list(rownames(testdata), features))
                            current.features <- colnames(testdata)
                            for(j in 1:length(features)){
                                if(features[j] %in% current.features){
                                    temp[,j] <- testdata[, features[j]]
                                }
                            }
                            testdata <- temp
                            
                            # Prediction
                            testdata <- xgb.DMatrix(testdata)
                            predict.label <- predict(model.ref, testdata)
                            predict.score <- predict.label
                            predict.label[which(predict.label > 0.5)] <- "malignant"
                            predict.label[which(predict.label <= 0.5)] <- "nonMalignant"
                            table(predict.label)

                            # Visualization
                            object$malignant.label <- predict.label
                            object$malignant.score <- predict.score
                            return(object)
                        }

In [None]:
obj <- lapply(obj, function(x){scCancer_malignancy(x)})
obj <- merge(obj[[1]], obj[-1])
obj@meta.data <- obj@meta.data %>% dplyr::rename(c('Malignant_label' = 'malignant.label', 'Malignant_score' = 'malignant.score'))

## save

In [None]:
seurat_TumorCell@meta.data <- seurat_TumorCell@meta.data %>% cbind(obj@meta.data[colnames(seurat_TumorCell), c('Malignant_label', 'Malignant_score')]) %>% .[colnames(seurat_TumorCell),]
saveRDS(seurat_TumorCell, 'obj.TumorCell.all.rds')

# Diet Tumor component
downsample samples with > 1000 cells to 1000 cells

In [None]:
obj <- seurat_TumorCell
sampleList <- unique(obj$SampleID)
length(sampleList)

In [None]:
sampleDiet <- lapply(sampleList, function(x){
                    sampleMeta <- obj@meta.data %>% subset(SampleID == x)
                    Ncell <- nrow(sampleMeta)
                    if(Ncell > 1000){
                        sampleMeta <- sampleMeta[sample(Ncell, 1000), ]
                    }
                    return(sampleMeta)
                }) %>% do.call(rbind, .)
dim(sampleDiet)
sampleDiet %>% head(n = 2)

## filter
obj.diet <- obj %>% subset(cells = rownames(sampleDiet))

In [None]:
obj.diet <- obj %>% subset(cells = rownames(sampleDiet))
obj.diet

options(repr.plot.height = 5, repr.plot.width = 30) 
select <- 'umap'
DimPlot_scCustom(obj.diet, pt.size = .1, group.by = "gCT", reduction = select, label = F, label.size = 4, colors_use = pal_igv("default")(51))|
DimPlot_scCustom(obj.diet, pt.size = .1, group.by = "mCT", reduction = select, label = TRUE, label.size = 4, colors_use = pal_igv("default")(51))|
DimPlot_scCustom(obj.diet, pt.size = .1, group.by = "Cohort", reduction = select, label = TRUE, label.size = 4, colors_use = pal_igv("default")(51))|
DimPlot_scCustom(obj.diet, pt.size = .1, group.by = "Malignant_label", reduction = select, label = F, label.size = 4, colors_use = pal_igv("default")(51))|
DimPlot_scCustom(obj.diet, pt.size = 1, group.by = "Sex", label = TRUE, label.size = 4, colors_use = pal_igv("default")(51))

In [None]:
saveRDS(obj.diet, 'obj.TumorCell.diet.rds')