**NOTE**: Here we test the performance of the STged with true cell type proportion and cell type proportion predicted from cell type deconvolution methods. We then compare it with competing methods.

In [1]:

#remove(list = ls())
#define the conda env

library(reticulate)
use_condaenv("ccnet", required = TRUE)

python_env = "python_env"

In [2]:
# Source code
source("./code/STged.R")
source("./code/benchmark.R")

# Load data the generated simulated data
## For the 50/100 um2
i = 1 for the 50 um2
i = 2 for the 50 um2

In [3]:
sim_data = readRDS("./realdata/MAOP/FN7_hash_mpoa_sim-29_ref01.Rds")

i =2
patch_size = c(100,50)[i]
patch_size

mpoa = sim_data$mpoa_list[[i]]$`-0.29`

## use the merfish data as reference data
usemer = FALSE

if(usemer ){
sc_exp = t(sim_data$ref_exp)
sc_label =  sim_data$ref_label$Cell_class
}



Loading required package: hash



hash-2.2.6.3 provided by Decision Patterns




In [4]:
# load the referene from scRNA
sim_data = readRDS("./realdata/MAOP/refscRNA/scRNA_MPOA.RDS")


In [5]:
sc_exp = sim_data$scexp
sc_label = sim_data$sclabel

dim(sc_exp)
length(sc_label)

In [6]:
table(sc_label)
sc_label[sc_label=="Mural"]="Pericytes"
table(sc_label)

sc_label
  Astrocyte Endothelial   Ependymal  Excitatory  Inhibitory   Microglia 
        857         541          69        2000        2000         828 
      Mural OD Immature   OD Mature 
        513        1692        2000 

sc_label
  Astrocyte Endothelial   Ependymal  Excitatory  Inhibitory   Microglia 
        857         541          69        2000        2000         828 
OD Immature   OD Mature   Pericytes 
       1692        2000         513 

## The spots location information

In [7]:
spot_raw = rownames(mpoa$patchGexp)
spots = paste0("spot",1:length(spot_raw))


split_strings <- strsplit(spot_raw, "_")
spot_loc <- do.call(rbind, lapply(split_strings, function(x) as.numeric(x)))
spot_exp <- t(mpoa$patchGexp)[,spot_raw ]
dim(spot_exp)

cell_type_freq =as.matrix(mpoa$cellTypeTable[spot_raw,]) 
cell_type_freq <- apply(cell_type_freq, 2, as.numeric)  
cell_type_prop = sweep(cell_type_freq, 1, rowSums(cell_type_freq), FUN = "/")

cell_type = colnames(cell_type_prop)
cell_type


rownames(cell_type_prop) = rownames(spot_loc)  = spots
colnames(spot_exp) = spots


## TRUE cell type-specific gene expression for each cell type

In [8]:
genes = rownames(spot_exp)  
TRUE_F = vector("list", length(cell_type))  
names(TRUE_F) = cell_type  

spot_cell_type_exp = mpoa$avgPatchGexp

for(i in 1:length(cell_type)){
  
  temp_mat = matrix(0, nrow = length(spots), ncol = length(genes))
  colnames(temp_mat) = genes
  rownames(temp_mat) = spots
  
  for(j in 1:length(spots)){
    temp = spot_cell_type_exp[[j]]
    
    
    if (cell_type[i] %in% colnames(temp)) {
      temp_mat[j,] = temp[,cell_type[i]]
    } else {
      
      temp_mat[j,] = 0
    }
  }
  
  
  TRUE_F[[i]] = t(temp_mat)
}



# Run the STged step by step

## Step 1: clear data

In [9]:
clean.only = FALSE
depthscale = 1e6
datax = data_process(sc_exp = sc_exp,   sc_label = sc_label, 
                     spot_exp = spot_exp,  spot_loc = spot_loc,
                     depthscale = depthscale,  gene_det_in_min_cells_per = 0, 
                     expression_threshold = 0,
                     nUMI =  10, verbose = FALSE, clean.only = clean.only)


## Step 2: construct spatial correlation structures

In [10]:
cat("Construct spatial correlation", "\n")
L.mat = dis_weight(spot_loc = datax$spot_loc, spot_exp = datax$spot_exp, k = 6, 
                   quantile_prob_bandwidth = 1/3, method = "Hex", 
                   coord_type = "grid")

Construct spatial correlation 


## Step 3: construct reference gene matrix

In [11]:
cat("Construct reference gene matrix", "\n")

ref_exp = create_group_exp(sc_exp = datax$sc_exp, sc_label = datax$sc_label)
colnames(ref_exp)


Construct reference gene matrix 


In [12]:
ref_exp = ref_exp[rownames(datax$spot_exp),]
colnames(ref_exp )
cell_type

In [13]:
beta.type = cell_type_prop[colnames(datax$spot_exp),]

In [14]:

lambda_values1 <- c(0.001, 0.01, 0.2204442,1, 5)
lambda_values2 <- c( 0.001,0.01,0.1962507 ,1,5)

# Generate all combinations of lambda1 and lambda2 using expand.grid
parameter_grid <- expand.grid(lambda1 = lambda_values1, lambda2 = lambda_values2)

sens_STged = matrix(list(), length(methods) ,1)
for(ii in 1:nrow(parameter_grid)){
  
  cat("Run the STged", "\n")
  
  start_time <- Sys.time()
  stged.est = MUR.STged(srt_exp = datax$spot_exp, ref_exp = ref_exp, 
                        beta.type = beta.type,   W = L.mat$dis_weight, 
                        lambda1 = parameter_grid[ii,1], lambda2 = parameter_grid[ii,2],
                        cutoff = 0.05, 
                        epsilon = 1e-5)

  end_time <- Sys.time()
  sens_STged[[ii]] = stged.est$V.hat
  cat("Run time of STged", end_time - start_time,"\n")

}

files = paste0("./results/MPOA/True_spot_decon_",patch_size,"um2_sens.RDS")
saveRDS(sens_STged, file =files)


saveRDS(parameter_grid, file = paste0("./results/MPOA/True_cell_exp_tuning_values", patch_size, "um2.RDS"))

Run the STged 
Select value of lambda1 0.001 
Select value of lambda2 0.001 
Run the main algorithm... 
Run time of STged 8.135943 
Run the STged 
Select value of lambda1 0.01 
Select value of lambda2 0.001 
Run the main algorithm... 
Run time of STged 24.45781 
Run the STged 
Select value of lambda1 0.2204442 
Select value of lambda2 0.001 
Run the main algorithm... 
Run time of STged 25.81207 
Run the STged 
Select value of lambda1 1 
Select value of lambda2 0.001 
Run the main algorithm... 
Run time of STged 9.676772 
Run the STged 
Select value of lambda1 5 
Select value of lambda2 0.001 
Run the main algorithm... 
Run time of STged 23.85715 
Run the STged 
Select value of lambda1 0.001 
Select value of lambda2 0.01 
Run the main algorithm... 
Run time of STged 1.864399 
Run the STged 
Select value of lambda1 0.01 
Select value of lambda2 0.01 
Run the main algorithm... 
Run time of STged 6.435936 
Run the STged 
Select value of lambda1 0.2204442 
Select value of lambda2 0.01 
Run 