In [None]:
# in jupyter notebook R kernel
library(Seurat)
library(stringr)

rdsfile <- file.path(dir, paste0('Annotated_6phase_',sample, '_RNA_T_0.5.rds'))

output = file.path('/data/work/output/pySCENIC',sample)
dir.create(output, recursive = TRUE, showWarnings = FALSE)

objs <- readRDS(rdsfile)
objs@meta.data[1:4,]

# output matrix
write.csv(t(as.matrix(objs@assays$RNA3@counts)),file = file.path(output, "scenic.data.csv"))

# in jupyter notebook python kernel
import os,sys
os.getcwd()
os.listdir(os.getcwd())

import loompy as lp
import numpy as np
import scanpy as sc

folder = os.path.join("/data/work/output/pySCENIC", sample)
file_name1 = "scenic.data.csv"
file_name2 = "scenic.loom"
file1 = os.path.join(folder, file_name1)
file2 = os.path.join(folder, file_name2)

print(file1)
print(file2)
print(folder)

x=sc.read_csv(file1);
row_attrs={"Gene":np.array(x.var_names),};
col_attrs={"CellID":np.array(x.obs_names)};
lp.create(file2,x.X.transpose(),row_attrs,col_attrs);

!pyscenic grn --num_workers 30 --output grn.tsv --method grnboost2 scenic.loom /data/work/input/pySCENIC/TF_gene_PlantTFDB.txt

!pyscenic ctx grn.tsv /data/users/lili10/online/input/ITAG4.1.regions_vs_motifs.rankings.feather --annotations_fname /data/work/input/pySCENIC/ITAG4.1_MOTIF_PlantTFDB.tbl --expression_mtx_fname scenic.loom --mode "dask_multiprocessing" --output ctx.csv --num_workers 30 --mask_dropouts

!pyscenic aucell scenic.loom ctx.csv --output aucell.loom --num_workers 30

使用R kernel

# in jupyter notebook R kernel
library(Seurat)
library(SCopeLoomR) 
library(AUCell)
library(SCENIC)
library(dplyr)
library(KernSmooth)
library(RColorBrewer)
library(plotly)
library(BiocParallel)
library(grid)
library(ComplexHeatmap) 
library(data.table)
library(scRNAseq) 
library(patchwork)
library(ggplot2)
library(stringr)
library(circlize)

setwd(file.path('/data/work/output/pySCENIC', sample))
getwd()

loom<-open_loom('aucell.loom')

regulons_incidMat<-SCopeLoomR::get_regulons(loom,column.attr.name="Regulons")

regulons_incidMat[,1:5]


regulons<-SCENIC::regulonsToGeneLists(regulons_incidMat) 
head(regulons,3)


regulons_long <- stack(regulons)
colnames(regulons_long) <- c("Genename_ITAG4.1", "TF")
write.csv(regulons_long, paste0('regulons_',sample, '.csv'), row.names = FALSE)

regulonAUC<-SCopeLoomR::get_regulons_AUC(loom,column.attr.name='RegulonsAUC')
regulonAUC

regulonAucThresholds<-SCopeLoomR::get_regulon_thresholds(loom)
head(regulonAucThresholds,3)

embeddings<-SCopeLoomR::get_embeddings(loom)
embeddings

close_loom(loom)

rdsfile <- file.path(dir, paste0('Annotated_6phase_',sample, '_RNA_T_0.5.rds'))
objs <- readRDS(rdsfile)


seurat.data<-objs

Idents(seurat.data)<-"assign.ident"

objs@meta.data[1:4,]


sub_regulonAUC<-regulonAUC[,match(colnames(seurat.data),colnames(regulonAUC))]

identical(colnames(sub_regulonAUC),colnames(seurat.data))

seurat.data@meta.data=cbind(seurat.data@meta.data,t(assay(sub_regulonAUC)))

colors20 <- c('#1f77b4', '#ff7f0e', '#c7c7c7', '#d62728', '#aa40fc', 
                     '#8c564b', '#e377c2', '#b5bd61', '#279e68', '#aec7e8',
                     '#ffbb78',  '#bd9e39','#5254a3',  '#6b6ecf', '#ad494a',
                         '#9edae5','#dbdb8d', '#E7298A', '#f7b6d2','#8c6d31','#BCBD22')

all_regulons <- rownames(regulons_incidMat)
n_rows <- ceiling(length(all_regulons) / 2)

pdf("./VlnPlot_all_regulons.pdf", height = 4 * n_rows, width = 24)

plots <- list()

for (i in seq_along(all_regulons)) {
  plots[[i]] <- VlnPlot(
    seurat.data,
    features = all_regulons[i],
    pt.size = 0,
    cols = colors20
  ) +
    theme(
      legend.position = "none" 
    )
}

combined_plot <- wrap_plots(plots, ncol = 2, byrow = TRUE)

print(combined_plot)
dev.off()

pdf("./FeaturePlot_all_regulons.pdf", height = 3.5 * n_rows, width = 10)

plots <- list()

for (i in seq_along(all_regulons)) {
  plots[[i]] <- FeaturePlot(
    seurat.data,
    features = all_regulons[i],
  )
}

combined_plot <- wrap_plots(plots, ncol = 2, byrow = TRUE)

print(combined_plot)
dev.off()

pdf("./RidgePlot_all_regulons.pdf", height = 8 * n_rows, width = 12)

plots <- list()

for (i in seq_along(all_regulons)) {
  plots[[i]] <- RidgePlot(
    seurat.data,
    features = all_regulons[i],
    cols = colors20
  ) +
    theme(
      legend.position = "none"  
    )
}

combined_plot <- wrap_plots(plots, ncol = 2, byrow = TRUE)

print(combined_plot)
dev.off()

cellClusters<-data.frame(
    row.names=colnames(seurat.data),
    seurat_clusters=as.character(seurat.data$assign.ident)) |>
dplyr::mutate(seurat_clusters=ifelse(is.na(seurat_clusters),"unkown",seurat_clusters))

cellsPerGroup<-split(rownames(cellClusters),cellClusters$seurat_clusters)

cellsPerGroup <- cellsPerGroup[sapply(cellsPerGroup, length) > 0]

sub_regulonAUC<-sub_regulonAUC[onlyNonDuplicatedExtended(rownames(sub_regulonAUC)),]

regulonActivity_byGroup<-sapply(
    cellsPerGroup,
    function(cells)
        rowMeans(getAUC(sub_regulonAUC)[,cells]))

regulonActivity_byGroup_Scaled<-t(scale(t(regulonActivity_byGroup),center=T,scale=T))

pdf("./Heatmap_regulons.pdf", height = 8, width = 12)

Heatmap(matrix=t(regulonActivity_byGroup_Scaled[,]))

dev.off()

ht_auc<-assay(sub_regulonAUC)
ht_auc<-ht_auc[,as.character(unlist(cellsPerGroup))]
identical(colnames(ht_auc),as.character(unlist(cellsPerGroup)))


ht_auc_scale<-t(scale(t(ht_auc),center=T,scale=T))

lapply(seq_along(names(cellsPerGroup)),function(x){
    rep(names(cellsPerGroup)[x],length(cellsPerGroup[[x]]))
}) |> unlist() -> celltypes

col_anno <- columnAnnotation(celltype = celltypes)

colors20 <- c('#1f77b4', '#ff7f0e', '#c7c7c7', '#d62728', '#aa40fc', 
                     '#8c564b', '#e377c2', '#b5bd61', '#279e68', '#aec7e8',
                     '#ffbb78',  '#bd9e39','#5254a3',  '#6b6ecf', '#ad494a',
                         '#9edae5','#dbdb8d', '#E7298A', '#f7b6d2','#8c6d31','#BCBD22')
length(celltypes)
unique_celltypes <- unique(celltypes)

custom_colors <- setNames(colors20[1:length(unique_celltypes)], unique_celltypes)
custom_colors
col_anno <- columnAnnotation(celltype = celltypes, 
                             col = list(celltype = custom_colors))

pdf("./Heatmap_regulons_allcells.pdf", height = 4, width = 10)

Heatmap(
    matrix = ht_auc_scale,
    top_annotation = col_anno,
    col = colorRamp2(c(-2,0,2), c("#003399", "white", "#990066")),
    cluster_columns = F,
    show_row_names = T,
    show_column_names = F)

dev.off()

ht_auc <- data.frame(ht_auc)

purrr::map_df(seq_along(regulonAucThresholds),function(x){
    tmp <- ht_auc[regulonAucThresholds[x],]
    val <- as.numeric(names(regulonAucThresholds[x]))
    tmp <- data.frame(apply(tmp, c(1,2), function(x) ifelse(x > val,1,0)))
                            return(tmp)
}) -> binary_tfs

binary_tfs <- binary_tfs[,as.character(unlist(cellsPerGroup))]
identical(colnames(binary_tfs),as.character(unlist(cellsPerGroup)))


mark <- sample(rownames(binary_tfs),size = 3,replace = F)
at <- match(mark,rownames(binary_tfs))

tfs_mark = rowAnnotation(foo = anno_mark(at = at,labels=mark))

heatmap_height <- 0.2 * nrow(binary_tfs) 
heatmap_height <- max(4, heatmap_height) 

pdf("./Heatmap_Binary_regulons_allcells.pdf", height = heatmap_height, width = 10)

Heatmap(
    matrix = binary_tfs,
    name = "Binary activity of regulon",
    cluster_columns = FALSE,
    col = c("white", "black"),
    top_annotation = col_anno,
    right_annotation = tfs_mark,
    show_row_names = FALSE,
    show_column_names = FALSE)

dev.off()

pdf("./Heatmap_Binary_regulons_of_allcells.pdf", height = heatmap_height, width = 10)

Heatmap(
    matrix = binary_tfs,
    name = "Binary activity of regulon",
    col = c("white", "black"),
    top_annotation = col_anno,
    show_row_names = T,
    show_column_names = F)

dev.off()