## scCASE.run

In [1]:
import scCASE
import pandas as pd
import episcanpy as epi
import anndata as ad
import sklearn.metrics
import warnings
warnings.filterwarnings("ignore")

**scCASE.run is the main function of scCASE.**

In [2]:
adata = epi.read("/home/sccasimp/data/sccasimp/BM0828/origdata.h5ad")

In [15]:
adata

AnnData object with n_obs × n_vars = 533 × 320083
    obs: 'cell_type'
    var: 'peak'

### Utilitze scCASE to enhance data

In [12]:
data_enhanced = scCASE.run(data_path = adata,ref_path = None,method = "scCASE",data_format = "h5ad",
         data_sep=",",ref_sep=",",type_number_range=range(3, 15),output_path = "./",
         batchlabel= "batch",threshold = 0.01,
         K = "Auto",K_ref = "Auto",save_result = False)

Data shape after feature selection:
(99662, 533)
Raw dataset shape:  (533, 99662)
Dataset shape after preprocessing:  (533, 92229)
Estimating by sum of squared distances...
Estimating by Davies-Bouldin score...
Estimating by silhouette coefficient...
Initializing...
Generating similarity matrix...
Updating...
Finished


### Utilitze scCASER to enhance data with reference.

In [None]:
scCASE.run(data_path = adata,ref_path = "/home/sccasimp/data/sccasimp/BM0828/bulk.csv",method = "scCASER",data_format = "h5ad",
         data_sep=",",ref_sep=",",type_number_range=range(3, 15),output_path = "./",
         batchlabel= "batch",threshold = 0.01,
         K = "Auto",K_ref = "Auto",save_result = False)

Data shape after feature selection:
(99662, 533)
Reference shape after feature selection:
(99662, 17)
Raw dataset shape:  (533, 99662)
Dataset shape after preprocessing:  (533, 92229)
Estimating by sum of squared distances...


### Utilitze scCASE for correct sequencing depth.

In [None]:
scCASE.run(data_path = adata,ref_path = None,method = "Correct seq depth",data_format = "h5ad",
         data_sep=",",ref_sep=",",type_number_range=range(3, 15),output_path = "./",
         batchlabel= "batch",threshold = 0.01,
         K = "Auto",K_ref = "Auto",save_result = False)

Data shape after feature selection:
(99662, 533)
Raw dataset shape:  (533, 99662)
Dataset shape after preprocessing:  (533, 92229)
Estimating by sum of squared distances...


### Utilitze scCASE for correct batch effect.

In [None]:
scCASE.run(data_path = adata,ref_path = None,method = "Correct batch effect",data_format = "h5ad",
         data_sep=",",ref_sep=",",type_number_range=range(3, 15),output_path = "./",
         batchlabel= "batch",threshold = 0.01,
         K = "Auto",K_ref = "Auto",save_result = False)

Data shape after feature selection:
(99662, 533)
Raw dataset shape:  (533, 99662)
Dataset shape after preprocessing:  (533, 92229)
Estimating by sum of squared distances...


### Utilitze scCASE.Estimate_k solely for estimat the parameter K of scCASE.

In [None]:
Estimated_K = scCASE.Estimate_k(adata.X.T.A,search_range=range(5,15),method = "scCASE")
scCASE.run(adata,method = "scCASE",K = Estimated_K)

## scCASE.lazy

**Similar to the function of episcanpy.pp.lazy,including TF-IDF (in addition to episcanpy), PCA, creating neighbors graph,and t-SNE/UMAP.**

In [17]:
scCASE.lazy(data_enhanced)

In [18]:
data_enhanced

AnnData object with n_obs × n_vars = 533 × 99662
    obs: 'cell_type'
    var: 'peak'
    uns: 'pca', 'neighbors', 'umap', 'tsne'
    obsm: 'Embeding', 'X_pca', 'X_umap', 'X_tsne'
    varm: 'Projection', 'PCs'
    obsp: 'Similarity', 'distances', 'connectivities'

## scCASE.identify_specific_peaks

In [20]:
peaks = scCASE.identify_specific_peaks(data_enhanced,obs = "cell_type",type_ = "HSC",peak_name = "peak")