In [None]:
### Integration.R
library(dplyr)
library(Seurat)
library(ggplot2)
library(DropletUtils)
# library(future)
# library(future.apply)
library(gplots)
library(purrr)
library(cowplot)
library(infercnv)




# Load objects and prepare object.list for integration
minFeature <- 300
minCount <- 600
downsampled <- FALSE
if (downsampled) {
    base_dir = "data/"
} else {
    base_dir = "data/"
}

pat_list <- c('Sarcoma167GEX', 'Sarcoma322GEX', 'Sarcoma559GEX', 'Sarcoma708GEX', 'SarcomaS408GEX', 'SarcomaS410GEX', 'SarcomaS914GEX', 'SarcomaS956GEX')

object.list <- lapply(pat_list, function (x) readRDS(paste0('data/', x, '/data_', x, '_genes_', minFeature, '_UMI_', minCount, '_annotated_for_infercnv.rds')))

                      
for(i in 1:length(object.list)) {
    object.list[[i]][["celltype_singleR"]] <-    object.list[[i]][["celltype_"]]       
}
                      
                      
features <- SelectIntegrationFeatures(object.list = object.list)
object.list <- lapply(X = object.list, FUN = function(x) {
  x <- ScaleData(x, features = features, verbose = FALSE)
  x <- RunPCA(x, features = features, verbose = FALSE)
})
                      
for(i in 1:length(object.list)) {
    Idents(object.list[[i]]) <- "RNA"

    pat <- pat_list[[i]]

    # identify malignant cells
    immune<-c('CD4+ T-cells','CD4+ Tcm','CD4+ Tem','CD8+ T-cells','CD8+ Tcm',
              'CD8+ Tem','Class-switched memory B-cells','DC','Eosinophils',
              'Macrophages','Macrophages M1','Macrophages M2','Memory B-cells',
              
              'Monocytes','naive B-cells','Neutrophils','NK cells','Plasma cells','Tregs')
    
    
    object.list[[i]][['immune']] <- ifelse(object.list[[i]]@meta.data$celltype_singleR %in% immune, 'immune','non-immune')
    tc <- c('CD4+ T-cells', 'CD8+ T-cells')
    object.list[[i]][['Tcell']] <- ifelse(object.list[[i]]@meta.data$celltype_singleR %in% tc, 'T-cell','other')
    object.list[[i]]<-add_to_seurat(object.list[[i]], paste0("IndividualInferCNVScripts/local_infercnv", pat))

}

# Continue here if inferCNV was run on AWS


for(i in 1:length(object.list)) {
    pat <- pat_list[[i]]

    # add CNV metrics
    cnv_cols<-grep('proportion_scaled_cnv_chr',names(object.list[[i]]@meta.data),value=T)
    cnvs<-object.list[[i]]@meta.data[,cnv_cols]
    object.list[[i]]@meta.data$proportion_scaled_cnv_avg<-rowMeans(cnvs)
    object.list[[i]]@meta.data$proportion_scaled_cnv_sum<-rowSums(cnvs)

    cnv_cols<-grep('proportion_cnv_chr',names(object.list[[i]]@meta.data),value=T)
    cnvs<-object.list[[i]]@meta.data[,cnv_cols]
    object.list[[i]]@meta.data$proportion_cnv_avg<-rowMeans(cnvs)
    object.list[[i]]@meta.data$proportion_cnv_sum<-rowSums(cnvs)

    cnv_cols<-grep('has_cnv_chr',names(object.list[[i]]@meta.data),value=T)
    cnvs<-object.list[[i]]@meta.data[,cnv_cols]
    object.list[[i]]@meta.data$has_cnv_avg<-rowMeans(cnvs)
    object.list[[i]]@meta.data$has_cnv_sum<-rowSums(cnvs)
}
                         
# Find integration anchors using CCA. Chose not to use RPCA because biological states are likely to match??? 
anchors <- FindIntegrationAnchors(object.list = object.list, dims = 1:50)

# Integrate data sets
seu <- IntegrateData(anchorset = anchors, dims = 1:50)
DefaultAssay(seu) <- "integrated"

# Normal workflow
seu <- ScaleData(object = seu)
seu <- RunPCA(object = seu)
seu <- FindNeighbors(seu, dims = 1:30)
seu <- FindClusters(seu)
seu <- RunUMAP(object = seu, dims = 1:30)

saveRDS(seu,paste0("data/sarcoma_all/data_sarcoma_all_merged_obj.rds"))
                         