In [None]:
library(BiocManager)
library(BSgenome.Hsapiens.UCSC.hg38)
library(ArchR)
library(ggplot2)
library(TFBSTools)
library(Seurat)
library(ggplot2)
library(dplyr)
library(reticulate)
data("geneAnnoHg38")
data("genomeAnnoHg38")
geneAnno <- geneAnnoHg38
genomeAnno <- genomeAnnoHg38
addArchRThreads(24)


fn <- unclass(lsf.str(envir = asNamespace("ArchR"), all = TRUE))
  for(i in seq_along(fn)){
    tryCatch({
      eval(parse(text=paste0(fn[i], '<-ArchR:::', fn[i])))
    }, error = function(x){
    })
  }

In [None]:
#Load Archr Project with all cancer cells
proj<-loadArchRProject('ArchR project with all cancer cells')

proj


In [None]:
cancertype <- 'Cancer name' #BRCA BLCA KIRC ...

In [None]:
#Extracting the top 30 LSI dimensions from IterativeLSI
lsi <- getReducedDims(proj, reducedDims = "IterativeLSI", returnMatrix = FALSE)

#load cells chosen for the KNN 
svdDisease_rownames<-read.csv(paste0('svdDiseaseCells_ChosenforNN_',cancertype,'.csv'))
svdReference_rownames<-read.csv(paste0('svdReferenceCells_ChosenforNN_',cancertype,'.csv'))


#Loading cell metadata and splitting cells into query and reference cells
cellcoldata<-getCellColData(proj)
matSVD <- data.frame(lsi$matSVD)
svdDisease <- matSVD[svdDisease_rownames$x,]
svdReference<-matSVD[svdReference_rownames$x,]


#KNN Nearest Neighbor using FNN 5NN
input_knn <- 5
library(FNN)
set.seed(1)
knnDisease <- get.knnx(
    data = svdReference,
    query = svdDisease,
    k = input_knn)

cellcoldata<-getCellColData(proj)
NN_index<-data.frame(knnDisease$nn.index)


# Counting the number of cells from same sample vs same cancer different sample in 5NN
stats_table <- data.frame()
j=1
for (row in 1:nrow(NN_index)) {
    i <- as.matrix(NN_index)[row,]
    reqnames <- rownames(svdReference[as.vector(i),])
    query_name <- rownames(svdDisease)[j]
    query_tumour_type <-  cellcoldata[rownames(cellcoldata) == query_name,'cleaned_sample']
    query_samplename <-  cellcoldata[rownames(cellcoldata) == query_name,'Sample']
    NN_tumour_type <- cellcoldata[reqnames,'cleaned_sample']
    NN_samplename <- cellcoldata[reqnames,'Sample']
    df<-data.frame(cbind(NN_tumour_type,NN_samplename))
    same_samplecount <- dim(df[df$NN_samplename == query_samplename,])[1]
    same_cancercount <- dim(df[ (df$NN_samplename != query_samplename ) & (df$NN_tumour_type == query_tumour_type ),])[1]
    diff_cancercount <- dim(df[ (df$NN_samplename != query_samplename) & (df$NN_tumour_type != query_tumour_type ),])[1]
    stats_table <- rbind(stats_table, c(query_name,same_samplecount,same_cancercount,diff_cancercount))  
    j=j+1
    #break
}
column_names<-c('barcode','Same_sample','Same_cancer','Different_cancer')
colnames(stats_table) <- column_names

stats_table$Same_sample<-as.integer(stats_table$Same_sample)
stats_table$Same_cancer<-as.integer(stats_table$Same_cancer)
stats_table$Different_cancer<-as.integer(stats_table$Different_cancer)




