In [1]:
import pandas as pd
import numpy as np
import scanpy as sc
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.sparse import csc_matrix,csr_matrix
import re
import sys
import spatialSNV as ss



In [2]:
sample_name = 'CRC-P19-T'
rna = sc.read_h5ad(f'./input/{sample_name}.rna.h5ad')
snv_path = f"./input/{sample_name}_snp100_matrix"
snv_depth_path = f"./input/{sample_name}_snp100_matrix_depth"
snv_depth = sc.read_10x_mtx(snv_depth_path,cache=True)

... writing an h5ad cache file to speedup reading next time


In [3]:
snv = sc.read_10x_mtx(snv_path,cache=True)
snv.obs_names = 'DNB_'+ snv.obs_names

snv_depth = sc.read_10x_mtx(snv_depth_path,cache=True)
snv_depth.obs_names = 'DNB_'+ snv_depth.obs_names

... writing an h5ad cache file to speedup reading next time
... reading from cache file cache/input-CRC-P19-T_snp100_matrix_depth-matrix.h5ad


In [4]:
common = list(set(rna.obs_names).intersection(set(snv_depth.obs_names)))
rna = rna[common, :].copy()
snv_depth = snv_depth[common,:].copy()
snv = snv[common,snv_depth.var_names].copy()
snv.obsm['spatial'] = rna.obsm['spatial']

In [5]:
gtf = "./ref/gencode.v31.chr_patch_hapl_scaff.annotation.gtf.gz"
annovar_ref = "/home/liuyi/02.software/annovar/p12_ref" # bulid by annovar
annovar_spe = "homo"
annovar_ref_name = "gencodev38"

snv = ss.processsnv(
    sample_name,
    snv,
    snv_depth,
    gtf = gtf,
    annovar_ref = annovar_ref,
    annovar_spe = annovar_spe,
    annovar_ref_name = annovar_ref_name,
    thrshold = 20,
    min_cells = 5,
    outdir = f"./out/",
    annovar = "/home/liuyi/02.software/annovar/annovar/table_annovar.pl", # path to annovar
)

filtered out 11997 genes that are detected in less than 1 cells
filtered out 11997 genes that are detected in less than 1 cells
filtered out 1 genes that are detected in less than 5 cells


NOTICE: the --polish argument is set ON automatically (use --nopolish to change this behavior)
-----------------------------------------------------------------
NOTICE: Processing operation=g protocol=gencodev38

NOTICE: Running with system command <annotate_variation.pl -geneanno -buildver homo -dbtype gencodev38 -outfile ./out//CRC-P19-T.gencodev38 -exonsort -nofirstcodondel ./out//CRC-P19-T.avinput /home/liuyi/02.software/annovar/p12_ref>
NOTICE: Output files are written to ./out//CRC-P19-T.gencodev38.variant_function, ./out//CRC-P19-T.gencodev38.exonic_variant_function
NOTICE: Reading gene annotation from /home/liuyi/02.software/annovar/p12_ref/homo_gencodev38.txt ... Done with 247086 transcripts (including 137441 without coding sequence annotation) for 66738 unique genes
NOTICE: Processing next batch with 42172 unique variants in 42172 input lines
NOTICE: Reading FASTA sequences from /home/liuyi/02.software/annovar/p12_ref/homo_gencodev38Mrna.fa ... Done with 4246 sequences

NOTIC

In [6]:
ss.normalize_with_rna(snv,rna)

In [8]:
snv

AnnData object with n_obs × n_vars = 17564 × 42172
    obs: 'TotalDepth', 'UMI_counts', 'snvperumi', 'SNVtypes'
    var: 'gene_ids', 'feature_types', 'SNVDepth', 'SNVCount', 'n_cells', 'Func', 'ExonicFunc', 'gene_name', 'gene_lenth', 'Func_L0'
    obsm: 'spatial'
    layers: 'norm'

In [9]:
snv_gene_adata = ss.bulid_windows(snv,window_size = 100000,basis = 'spatial')

In [10]:
snv_gene_adata.obs['cluster'] = rna.obs['cluster']
snv_gene_adata = snv_gene_adata[snv_gene_adata.obs['cluster']!='Normal'].copy()
sc.pp.filter_genes(snv_gene_adata,min_cells=1)

filtered out 1 genes that are detected in less than 1 cells


In [11]:
radius = ss.get_min_distance(snv_gene_adata,basis = 'spatial')
ss_con = ss.build_connect(snv_gene_adata, radius = 5 * (radius), include_self=False, norm=True, scaling=False, rank2=False,
                          rank_cutoff = 50, n_neighbors = 30, use_raw=False, decay='Gaussian')

  scalers = 1 / scalers


Step1: cell-cell connectivity calculation finished, Wed Dec 25 16:26:06 2024
Step2: ranking SNV expression values finished, Wed Dec 25 16:26:19 2024


  scaler_mode = 1/mode


Step3: cell-SNV connectivity calculation finished, Wed Dec 25 16:26:22 2024
Step5: SNV-SNV connectivity calculation finished, Wed Dec 25 16:26:25 2024


In [12]:
snv_gene_adata,snv_group_adata = ss.build_sg(snv_gene_adata, con = ss_con,resolution = 10, syn = False)

In [13]:
ss.netplot(snv_gene_adata,snv_group = 1,ss_con = ss_con,save = './out/snv_group1.pdf')

In [14]:
snv.write('./out/dome.snv.h5ad')
snv_gene_adata.write('./out/dome.snv_gene_adata.h5ad')
snv_group_adata.write('./out/dome.snv_group_adata.h5ad')