# Co-accessibility with Cicero

In [74]:
# Load libraries
suppressMessages(library(hdf5r))
suppressMessages(library(Seurat))
suppressMessages(library(Signac))
suppressMessages(library(dplyr))
suppressMessages(library(stringr))
suppressMessages(library(cicero))

ERROR: Error in library(monocle3): there is no package called 'monocle3'


In [3]:
rds = "/cellar/users/aklie/data/igvf/beta_cell_networks/multiome_stimulated_sc/rds/dm023_palmitate/dm023_palmitate_endocrine_SC.beta.rds"
out_dir = "/cellar/users/aklie/

In [46]:
adata <- readRDS(rds)
adata

An object of class Seurat 
696827 features across 7042 samples within 6 assays 
Active assay: mpeak (215116 features, 215116 variable features)
 5 other assays present: RNA, RNA_raw, ATAC, SCT, peaks
 9 dimensional reductions calculated: lsi, harmony.atac, pca, harmony.sct, umap.wnn, umap.atac, umap.sct, harmony.peaks, umap.peaks

In [47]:
# Extract the ATAC counts data -- this notebook assumes peaks are written in this format: chr-start-end
DefaultAssay(adata) <- 'mpeak'
atac.counts = GetAssayData(adata,slot='counts')
dim(atac.counts)

In [65]:
cellinfo <- adata@meta.data

In [66]:
peakinfo <- data.frame(site_name=rownames(atac.counts))
row.names(peakinfo) <- peakinfo$site_name
peakinfo <- cbind(peakinfo, stringr::str_split_fixed(peakinfo$site_name, "-", 3))
names(peakinfo) <- c('site_name', 'chr', 'bp1', 'bp2')
peakinfo$chr <- gsub('chr','', peakinfo$chr)
peakinfo$bp1 <- as.numeric(as.character(peakinfo$bp1))
peakinfo$bp2 <- as.numeric(as.character(peakinfo$bp2))

In [67]:
row.names(atac.counts) <- row.names(peakinfo)
colnames(atac.counts) <- row.names(cellinfo)

In [78]:
input_cds <- suppressWarnings(newCellDataSet(
    cellData=atac.counts,
    methods::new('AnnotatedDataFrame', data = cellinfo),
    featureData = methods::new('AnnotatedDataFrame', data = peakinfo),
    expressionFamily=negbinomial.size(),
    lowerDetectionLimit=0))

In [81]:
umap_coords <- Embeddings(adata[['umap.wnn']])
colnames(umap_coords) <- NULL

In [83]:
cicero_cds <- make_cicero_cds(input_cds, reduced_coordinates = umap_coords, k=30)

Overlap QC metrics:
Cells per bin: 30
Maximum shared cells bin-bin: 26
Mean shared cells bin-bin: 0.125320541692941
Median shared cells bin-bin: 0

"the condition has length > 1 and only the first element will be used"


In [85]:
run_cicero <- function(cicero_cds, umap_coords, celltype){
    window <- 1e6
    chromsizes <- "/cellar/shared/carterlab/genomes/hg38/hg38.chrom.sizes"
    distance_parameters <- estimate_distance_parameter(cicero_cds, window=window, maxit=100, sample_num=100, distance_constraint=500000, genomic_coords=chromsizes)
    mean_distance_parameter <- mean(unlist(distance_parameters))
    cicero_out <- generate_cicero_models(cicero_cds, distance_parameter=mean_distance_parameter, window=window, genomic_coords=chromsizes)
    conns <- assemble_connections(cicero_out, silent=FALSE)
    return(conns)
}
     

In [86]:
cicero_conns <- run_cicero(cicero_cds, umap_coords, 'SC.beta')

[1] "Successful cicero models:  5652"
[1] "Other models: "

  Too many elements in range Zero or one element in range 
                          71                         1021 
[1] "Models with errors:  0"


In [89]:
all_peaks <- row.names(exprs(input_cds))

In [92]:
output_folder <- "/cellar/users/aklie/projects/igvf/beta_cell_networks/bin/infer_grns/cicero/results/dm023_palmitate"

In [96]:
head(cicero_conns)

Unnamed: 0_level_0,Peak1,Peak2,coaccess
Unnamed: 0_level_1,<chr>,<fct>,<dbl>
1,chr1-1000339-1001027,chr1-585740-586641,-0.001193087
2,chr1-1000339-1001027,chr1-629485-630598,0.075568451
3,chr1-1000339-1001027,chr1-631298-632243,0.001908325
4,chr1-1000339-1001027,chr1-633466-634664,0.115832962
5,chr1-1000339-1001027,chr1-778301-779228,-0.014169742
6,chr1-1000339-1001027,chr1-804460-805380,0.002669663


In [103]:
all_peaks <- row.names(exprs(input_cds))
write.csv(x = all_peaks, file = paste0(output_folder, "/all_peaks.csv"), col.names=FALSE, row.names = FALSE, quote = FALSE)
write.csv(x = cicero_conns, file = paste0(output_folder, "/cicero_connections.csv"), row.names = FALSE, quote = FALSE)

"attempt to set 'col.names' ignored"


: 

: 

: 

: 

# Scratch

In [None]:
# Load in the peaks file, ignore first line
peaks.file = "/cellar/users/aklie/data/igvf/beta_cell_networks/multiome_stimulated_sc/peaks/dm023_palmitate_endocrine_SC.beta.narrowPeak"
peaks <- read.table(peaks.file, sep='\t', header=FALSE, stringsAsFactors=FALSE, skip=1)

In [None]:
peaks.lst <- paste(peaks$V1, peaks$V2, peaks$V3, sep="_")