## ⭐ cross-cell prediction

In [2]:
library(Seurat)
library(ggplot2)
library(Matrix)
library(Signac)
library(stats4)
library(dplyr)
library(SeuratDisk)
library(feather)

In [3]:
save_path <- './Results/'
data_dir <-  './data/'
# loading training data(label)
counts <- as.matrix(read.csv(paste0(data_dir,"atac1_counts.csv"), row.names = 1))   # matrix
obs <- read.csv(paste0(data_dir,"atac1_obs.csv"), row.names = 1)   # cell metadata
var <- read.csv(paste0(data_dir,"atac1_var.csv"), row.names = 1)   # gene metadata

In [4]:
head(rownames(obs))
head(rownames(var))
head(rownames(counts))
length(rownames(counts))
length(rownames(var))

In [5]:
rownames(counts) <- rownames(obs)  
colnames(counts) <- rownames(var)  
head(rownames(counts))
head(colnames(counts))

In [6]:
# Create Seurat object
TRAIN_ATAC_obj <- CreateSeuratObject(counts = t(counts), assay = "ATAC", project = "ATAC", min.cells = 0, min.features = 0)
TRAIN_ATAC_obj

An object of class Seurat 
36282 features across 4391 samples within 1 assay 
Active assay: ATAC (36282 features, 0 variable features)
 2 layers present: counts, data

In [7]:
TRAIN_ATAC_obj@meta.data <- obs

In [8]:
# loading testing data (label)
counts <- as.matrix(read.csv(paste0(data_dir,"atac2_counts.csv"), row.names = 1))   # matrix
obs <- read.csv(paste0(data_dir,"atac2_obs.csv"), row.names = 1)    # cell metadata
var <- read.csv(paste0(data_dir,"atac2_var.csv"), row.names = 1)   # gene metadata
rownames(counts) <- rownames(obs)  
colnames(counts) <- rownames(var)  
head(rownames(counts)) 
head(colnames(counts))  
# Create Seurat object
TEST_ATAC_obj <- CreateSeuratObject(counts = t(counts), assay = "ATAC", project = "ATAC", min.cells = 0, min.features = 0)
TEST_ATAC_obj@meta.data <- obs
TEST_ATAC_obj

An object of class Seurat 
36282 features across 487 samples within 1 assay 
Active assay: ATAC (36282 features, 0 variable features)
 2 layers present: counts, data

In [9]:
# loading training data(input)
counts <- as.matrix(read.csv(paste0(data_dir,"rna1_counts.csv"), row.names = 1))   # matrix
obs <- read.csv(paste0(data_dir,"rna1_obs.csv"), row.names = 1)   # cell metadata
var <- read.csv(paste0(data_dir,"rna1_var.csv"), row.names = 1)   # gene metadata
rownames(counts) <- rownames(obs)  
colnames(counts) <- rownames(var)  
head(rownames(counts)) 
head(colnames(counts)) 
# Create Seurat object
TRAIN_RNA_obj <- CreateSeuratObject(counts = t(counts), assay = "RNA", project = "RNA", min.cells = 0, min.features = 0)
TRAIN_RNA_obj@meta.data <- obs
TRAIN_RNA_obj

An object of class Seurat 
10721 features across 4391 samples within 1 assay 
Active assay: RNA (10721 features, 0 variable features)
 2 layers present: counts, data

In [10]:
# loading testing data(input)
counts <- as.matrix(read.csv(paste0(data_dir,"rna2_counts.csv"), row.names = 1))   # matrix
obs <- read.csv(paste0(data_dir,"rna2_obs.csv"), row.names = 1)   # cell metadata
var <- read.csv(paste0(data_dir,"rna2_var.csv"), row.names = 1)   # gene metadata
rownames(counts) <- rownames(obs)  
colnames(counts) <- rownames(var)  
head(rownames(counts))
head(colnames(counts))
# Create Seurat object
TEST_RNA_obj <- CreateSeuratObject(counts = t(counts), assay = "RNA", project = "RNA", min.cells = 0, min.features = 0)
TEST_RNA_obj@meta.data <- obs
TEST_RNA_obj

An object of class Seurat 
10721 features across 487 samples within 1 assay 
Active assay: RNA (10721 features, 0 variable features)
 2 layers present: counts, data

### Find anchors and predict data

In [11]:
TRAIN_RNA_obj <- NormalizeData(TRAIN_RNA_obj, normalization.method = "LogNormalize", scale.factor = 1e4) 
TRAIN_RNA_obj <- FindVariableFeatures(TRAIN_RNA_obj, selection.method = 'vst', nfeatures = 4000)
TEST_RNA_obj <- NormalizeData(TEST_RNA_obj, normalization.method = "LogNormalize", scale.factor = 1e4) 
features<-TRAIN_RNA_obj@assays$RNA@var.features
DN = 30
options (warn = -1)
anchors <- FindTransferAnchors(reference =TRAIN_RNA_obj,query = TEST_RNA_obj,reduction = 'cca',features=features,reference.assay = 'RNA',query.assay = 'RNA', k.filter = NA, dims = 1:DN,verbose=FALSE)
refdata <- GetAssayData(object = TRAIN_ATAC_obj,assay = 'ATAC',slot = 'data',verbose=FALSE)
imputation <- TransferData(anchorset = anchors,refdata = refdata,weight.reduction = 'cca',dims = 1:DN,k.weight=10,verbose=FALSE)
Imp_New_peaks = as.data.frame(imputation@data)

In [12]:
test_mod <- as.data.frame(TEST_ATAC_obj@assays$ATAC@counts)
shared<-intersect(rownames(test_mod),rownames(Imp_New_peaks))
Imp_New_peaks<-Imp_New_peaks[shared,]
test_mod<-test_mod[shared,]
dim(test_mod)
dim(Imp_New_peaks) 
test_mod$index <- rownames(test_mod)
Imp_New_peaks$index <- rownames(Imp_New_peaks)

In [13]:
write_feather(test_mod, paste0(save_path,"Seurat_true.feather")) 
write_feather(Imp_New_peaks, paste0(save_path,"Seurat_pred.feather")) 