# Setup

In [6]:
%load_ext autoreload
%autoreload 2

import crispr as cr 
from crispr.crispr_class import Crispr
import pertpy as pt
import pandas as pd
import numpy as np
from config import (files_data, col_cell_type_data, col_gene_symbols_data, 
                    assays_data, layer_perturbation_data,
                    col_perturbation_data, key_control_data, 
                    col_split_by_data, layer_perturbation_data,
                    col_target_genes_data, key_nonperturbed_data,
                    key_treatment_data, col_guide_rna_data, col_batch_data,
                    col_sample_id_data, kws_process_guide_rna_data)

# Options
pd.options.display.max_columns = 100
skip_preprocessing_clustering = False  # to skip these steps
print(dir(cr.ax))
file = "CRISPRi_scr"
# file = "CRISPRi_ess"
# file = "ECCITE"
# file = "augur_ex"
# file = "coda"
# file = "screen"
# file = "perturb-seq"
kws_pca = dict(n_comps=None, use_highly_variable=True)
kws_neighbors = None  # passed to neighbors function
kws_umap = dict(min_dist=0.3)  # passed to UMAP fx
kws_cluster = dict(resolution=0.5)  # passed to louvain/leiden fx

#  Set Arguments
if file in assays_data:
    if isinstance(assays_data[file], str) or assays_data[file] is None:
        assays_data[file] = [assays_data[file], None]
    assay, assay_protein = assays_data[file]
else:
    assay, assay_protein = None
file_path = files_data[file]
kws_process_guide_rna = kws_process_guide_rna_data[file]
col_split_by = col_split_by_data[file]
kwargs_init = dict(assay=assay, assay_protein=assay_protein,
                   col_gene_symbols=col_gene_symbols_data[file],  
                   col_cell_type=col_cell_type_data[file], 
                   col_sample_id=col_sample_id_data[file], 
                   col_batch=col_batch_data[file],
                   col_perturbation=col_perturbation_data[file], 
                   col_guide_rna=col_guide_rna_data[file], 
                   col_target_genes=col_target_genes_data[file], 
                   layer_perturbation=layer_perturbation_data[file],
                   key_nonperturbed=key_nonperturbed_data[file],
                   key_control=key_control_data[file], 
                   key_treatment=key_treatment_data[file])
print(files_data)
if file == "default_fast":
    skip_preprocessing_clustering = True

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
['__builtins__', '__cached__', '__doc__', '__file__', '__loader__', '__name__', '__package__', '__path__', '__spec__', 'analyze_composition', 'cluster', 'clustering', 'compute_distance', 'find_markers', 'perform_augur', 'perform_differential_prioritization', 'perform_gsea', 'perform_mixscape', 'perturbations']
{'CRISPRi_scr': {'directory': '/home/asline01/projects/crispr/examples/data/crispr-screening/HH03'}, 'CRISPRi_wgs': '/home/asline01/projects/crispr/examples/data/replogle_2022_k562_gwps.h5ad', 'CRISPRi_ess': '/home/asline01/projects/crispr/examples/data/replogle_2022_k562_esss.h5ad', 'pool': '/home/asline01/projects/crispr/examples/data/norman_2019_raw.h5ad', 'bulk': '/home/asline01/projects/crispr/examples/data/burczynski_crohn.h5ad', 'screen': '/home/asline01/projects/crispr/examples/data/dixit_2016_raw.h5ad', 'perturb-seq': '/home/asline01/projects/crispr/examples/data/adamson_2016_upr_pert

# Setup Object

## Instantiate


This is more code than an end-user will actually need. `self = Crispr(file_path)` is the bare minimum needed (though you must specify some column and entry label names, such as the column name for the gene symbols, if they aren't the same as the defaults).

End users can simply pass to the `file` argument of `Crispr()` a 10x CellRanger output directory (or a dictionary of appropriate protospacer-related arguments if perturbation-related data aren't included in the .mtx file; see the `Crispr.preprocessing.combine_matrix_protospacer()` function for details) or .h5ad file if desired.

The extra code is just for generalizability across example datasets because in certain cases, we want to pass an AnnData Scanpy object because there are specific manipulations made to certain public datasets in the config.load_example_data function meant to decrease computation time/memory usage, etc.


In [16]:
# Initialize Object
print(f"Initialization arguments (from config):\n\n{kwargs_init}")
if isinstance(file, str) and file in ["ECCITE", "Adamson"]:
    self = Crispr(
        pt.data.papalexi_2021() if file == "ECCITE" else
        pt.data.adamson_2016_upr_perturb_seq(), **kwargs_init)
else:
    self = Crispr(file_path, **kwargs_init)
        
# Print
self.adata[self._assay].obs if self._assay else self.adata.obs

Initialization arguments (from config):

{'assay': None, 'assay_protein': None, 'col_gene_symbols': 'gene_symbols', 'col_cell_type': 'leiden', 'col_sample_id': None, 'col_batch': None, 'col_perturbation': 'name', 'col_guide_rna': 'feature_call', 'col_target_genes': 'target_gene_name', 'layer_perturbation': nan, 'key_nonperturbed': 'NP', 'key_control': 'NT', 'key_treatment': 'Perturbed'}


 AnnData object with n_obs × n_vars = 15078 × 36601
    obs: 'num_features', 'feature_call', 'num_umis'
    var: 'gene_ids', 'feature_types'


{'col_gene_symbols': 'gene_symbols', 'col_cell_type': 'leiden', 'col_sample_id': None, 'col_batch': None, 'col_perturbation': 'name', 'col_guide_rna': 'feature_call', 'col_target_genes': 'target_gene_name'}

{'key_control': 'NT', 'key_treatment': 'Perturbed', 'key_nonperturbed': 'NP'}




 AnnData object with n_obs × n_vars = 15078 × 36601
    obs: 'num_features', 'feature_call', 'num_umis'
    var: 'gene_ids', 'feature_types'


Unnamed: 0,num_features,feature_call,num_umis
AAACCCAAGAATTTGG-1,6.0,RASGRP1-1|PAF1-1|NEG_CTRL-2-1|NRBP1-2|RASGRP1-...,10|19|55|4|9|18
AAACCCAAGATTTGCC-1,2.0,SNX17-1|SNX17-2,200|33
AAACCCAAGCGATTCT-1,3.0,SP110-1|HLA-B-1|SP140L-2,12|22|7
AAACCCAAGTCGGGAT-1,,,
AAACCCAAGTCTGCAT-1,6.0,RSBN1-1|PRDX6-1|STK11-1|RSBN1-2|PRDX6-2|NR4A1-2,194|381|51|513|119|47
...,...,...,...
TTTGTTGGTTGCCGAC-1,4.0,DUSP1-1|PPARA-1|TAGAP-2|DUSP1-2,48|19|4|46
TTTGTTGTCACTGTTT-1,2.0,ATG16L1-1|GPX4-1,73|125
TTTGTTGTCAGACCCG-1,,,
TTTGTTGTCAGTGTGT-1,5.0,HLA-B-1|TMEM50B-1|DUSP1-1|DUSP1-2|RUNX3-P2-2,188|168|35|9|47


## Preprocess and Cluster

See `self.figures[run_label]["clustering"]` (object attribute) or method output for figures.

You must set use_highly_variable to False if you don't want to filter by HVGs
and if you did not set kws_hvg to None or False in `.preprocess()`


In that scenario, the 'highly_variable' column in  will be created in `.obsm` 
during `.preprocess()` and then used by default in `.cluster`.

In [29]:
target_info


Unnamed: 0,transfection,name,target_gene_name,target_gene_name_list,feature_call
AAACCCAAGAATTTGG-1,multi,Perturbed,RASGRP1|PAF1|NRBP1,"[RASGRP1, PAF1, NT, NRBP1, RASGRP1, PAF1]",RASGRP1-1_2|PAF1-1_2|NT-2-1|NRBP1-2
AAACCCAAGATTTGCC-1,single,Perturbed,SNX17,"[SNX17, SNX17]",SNX17-1_2
AAACCCAAGCGATTCT-1,multi,Perturbed,SP110|HLA|SP140L,"[SP110, HLA, SP140L]",SP110-1|HLA-B-1|SP140L-2
AAACCCAAGTCGGGAT-1,,NT,NP,[NP],NP-NP
AAACCCAAGTCTGCAT-1,multi,Perturbed,RSBN1|PRDX6|STK11|NR4A1,"[RSBN1, PRDX6, STK11, RSBN1, PRDX6, NR4A1]",RSBN1-1_2|PRDX6-1_2|STK11-1|NR4A1-2
...,...,...,...,...,...
TTTGTTGGTTGCCGAC-1,multi,Perturbed,DUSP1|PPARA|TAGAP,"[DUSP1, PPARA, TAGAP, DUSP1]",DUSP1-1_2|PPARA-1|TAGAP-2
TTTGTTGTCACTGTTT-1,multi,Perturbed,ATG16L1|GPX4,"[ATG16L1, GPX4]",ATG16L1-1|GPX4-1
TTTGTTGTCAGACCCG-1,,NT,NP,[NP],NP-NP
TTTGTTGTCAGTGTGT-1,multi,Perturbed,HLA|TMEM50B|DUSP1|RUNX3,"[HLA, TMEM50B, DUSP1, DUSP1, RUNX3]",HLA-B-1|TMEM50B-1|DUSP1-1_2|RUNX3-P2-2


In [28]:
key_control_patterns = ["CTRL"]
feature_split = "|"
guide_split = "-"
import re

col_multi_transfection = "transfection"
if isinstance(key_control_patterns, str):
    key_control_patterns = [key_control_patterns]
if guide_split in self._keys["key_control"]:
    raise ValueError(
        f"""`guide_split` ({guide_split}) must not be in 
        `self._keys['key_control']`.""")
targets = self.adata.obs[self._columns["col_guide_rna"]].str.strip(
    " ").replace("", np.nan)
if np.nan in key_control_patterns:  # if NAs mean control sgRNAs
    key_control_patterns = list(pd.Series(key_control_patterns).dropna())
    targets = targets.replace(
        np.nan, self._keys["key_control"])  # NaNs replaced w/ control key
else:  # if NAs mean unperturbed cells
    targets = targets.replace(
        np.nan, self._keys["key_nonperturbed"]
        )  # NaNs replaced w/ nonperturbed key
keys_leave = [self._keys["key_nonperturbed"], 
                self._keys["key_control"]]  # entries to leave alone
targets, nums = [targets.apply(
    lambda x: [re.sub(p, ["", r"\1"][j], str(i)) for i in x.split(
        feature_split)]) for j, p in enumerate([
            f"{guide_split}.*", rf'^.*?{re.escape(guide_split)}(.*)$'])
                    ]  # each entry -> list of target genes
targets = targets.apply(
    lambda x: [i if i in keys_leave else self._keys[
        "key_control"] if any(
            (k in i for k in key_control_patterns)) else i 
        for i in x])  # find control keys among targets
grnas = targets.to_frame("t").join(nums.to_frame("n")).apply(
    lambda x: [i + guide_split + "_".join(np.array(
        x["n"])[np.where(np.array(x["t"]) == i)[0]]) 
                for i in pd.unique(x["t"])], 
    axis=1).apply(lambda x: feature_split.join(x)).to_frame(
        self._columns["col_guide_rna"])  # e.g., [TMEM50B-1, NT-1-1]
target_list = targets.apply(list).to_frame(
    self._columns["col_target_genes"] + "_list")  # guide gene list
targets = targets.apply(pd.unique).apply(list)  # unique guides
target_genes = targets.apply(
    lambda x: feature_split.join(
        list(x if all(np.array(x) == self._keys[
            "key_control"]) else pd.Series(x).replace(
                self._keys["key_control"], np.nan).dropna()
            )  # drop control label if multi-transfect w/ non-control
        )).to_frame(self._columns["col_target_genes"]
                    )  # re-join lists => single string
target_genes = targets.apply(
    lambda x: feature_split.join(
        list(x if all(np.array(x) == self._keys[
            "key_control"]) else pd.Series(x).replace(
                self._keys["key_control"], np.nan).dropna()
            )  # drop control label if multi-transfect w/ non-control
        )).to_frame(self._columns["col_target_genes"]
                    )  # re-join lists => single string
binary = targets.apply(
    lambda x: self._keys[
        "key_treatment"] if any(
            (q not in [
                self._keys["key_nonperturbed"], 
                self._keys["key_control"]]) 
            for q in x) else self._keys["key_control"]).to_frame(
                self._columns["col_perturbation"]
                )  # binary perturbed/not
multi = targets.apply(
    lambda x: "multi" if sum([i not in [
                self._keys["key_nonperturbed"], 
                self._keys["key_control"]] for i in x]
                            ) > 1  # >1 non-control guide?
    else None if all(np.array(x) == self._keys["key_nonperturbed"]
                    ) or len(x) == 0 
    else "single").to_frame(
        col_multi_transfection)  # multi v. single-transfected
for t in targets.explode().unique():
    tgt = targets.apply(
        lambda x: x if x == self._keys[
            "key_nonperturbed"] else self._keys["key_treatment"]
        if t in x else self._keys["key_control"]).to_frame(
            f"{self._keys['key_treatment']}_{t}"
            )  # NP, treatment, or control key for each target
target_info = multi.join(binary).join(target_genes).join(
    target_list).join(grnas.loc[targets.index])  # guide info combined into dataframe

In [13]:
if skip_preprocessing_clustering is False:  # if not already processed & clustered
    
    # Arguments
    print(f"UMAP Keywords: {kws_umap}")
    # Preprocess
    process_kws = dict(kws_hvg=dict(min_mean=0.0125, max_mean=3, min_disp=0.5),
                       target_sum=1e4, max_genes_by_counts=2500, max_pct_mt=15,
                       min_genes=200, min_cells=3, scale=10, regress_out=None,
                       kws_process_guide_rna=kws_process_guide_rna)
    self.preprocess(**process_kws, kws_umap=kws_umap)  # preprocessing
    
    # Cluster 
    self.cluster(method_cluster="leiden", kws_neighbors=kws_neighbors,
                 kws_pca=kws_pca, kws_umap=kws_umap,  kws_cluster=kws_cluster)
    
self.adata.obs

UMAP Keywords: {'min_dist': 0.3}

<<< PROCESSING GUIDE RNAs >>>



ValueError: Other Series must have a name

### Find Markers (DEGs)

In [None]:
marks, figs_m = self.find_markers(n_genes=5, layer="scaled", 
                                  method="wilcoxon", key_reference="rest")
print(marks)
# marks.loc["6"].loc[marks.loc["6"].scores.abs().sort_values(ascending=False).iloc[:10].index]

## Inspect & Set Up Arguments for Later


This code looks more complicated than it actually would actually be for an end user because it was made to be generalizable across several datasets with particular column names, sizes that make it necessary to subset them in order to run the vignettes in a reasonable period of time, etc.

Basically, you won't need this code as an end user; this is just to choose relevant random subsets of genes, perturbations, etc. that are available in a given example dataset, for illustration purposes (and to conserve memory/time by not using every gene/condition/etc.). 

In real use cases, you will know what genes and conditions are of interest, and you can manually specify them by simply stating them in the appropriate arguments (such as `target_gene_idents`) or (in many cases) by not specifying the argument (resulting in the code using all available genes, etc.).

In [None]:
# Mixscape Subset
subset_mixscape = (self.adata[assay] if assay else self.adata).obs.num_features < 2
if file == "CRISPRi_scr":
    subset_mixscape = subset_mixscape & ~(self.adata.obs[
        self._columns["col_target_genes"]].isin(["SPI1", "RPN1"]))
    
# Choose Subset of Sequenced Genes
genes = np.array((self.adata[assay] if assay else self.adata).var.reset_index(
    )[self._columns["col_gene_symbols"]].copy())
if file == "CRISPRi_scr":
    genes_subset = ["CDKN2A", "CDKN1A", "PLAUR", "TP53", "IL6ST", "IL1A", 
                    "CXCL8", "CCL2", "CEBPB", "NFKB1", "IGFBP7", "TGFB1", 
                    "BCL2", "PTGER4", "ERCC1", "ERCC4", "ATG16L1", "NOD2", "LACC1"]
else:
    genes_subset = list(pd.Series(genes).sample(10))

# Choose Subset of Target Genes
tga = (self.adata[assay] if assay else self.adata).obs[
    self._columns["col_target_genes"]][subset_mixscape].copy().unique()
# tga = list(pd.Series(tga)[pd.Series(tga).apply(
#     lambda t: sum((self.adata[assay].obs if assay else self.adata.obs)[
#         self._columns["col_target_genes"]] == t) > 3)])  # sufficient N
if file == "CRISPRi_scr":
    target_gene_idents = list(pd.Series(tga).dropna()[[
        ("ATG16L1" in x or "CDKN2A" in x or "PLAUR" in x 
         or "NOD2" in x or "PTGER4" in x or "IL6ST") and (
            len(x.split("|")) < 3) for x in pd.Series(tga).dropna()]])
    target_gene_idents = list(pd.Series(tga).dropna()[[
        ("ATG16L1" in x or "CDKN2A" in x or "PLAUR" in x 
         or "NOD2" in x or "PTGER4" in x or "IL6ST") and (
            len(x.split("|")) < 3) for x in pd.Series(tga).dropna()]])
else:
    target_gene_idents = list(pd.Series(tga).sample(3)) if len(tga) > 3 else True
print(f"\n\nGene Subset: {', '.join(list(genes_subset))}\n\n")
print(f"Targets: {target_gene_idents}")

# Perturbation Categories
(self.adata[assay] if assay else self.adata).obs[
    self._columns["col_perturbation"]]

## Basic Plots

In [None]:
figs = self.plot(genes=genes_subset, layers="all", 
                 kws_gex_violin=dict(scale="area", figsize=(15, 15)))

# Perturbation Analyses

## Augur: Cell Type-Level Perturbation Response

Which cell types are most affected by perturbations? Quantify perturbation responses by cell type with Augur, which uses supervised machine learning classification of experimental condition labels (e.g., treated versus untreated). The more separable the condition among cells of a given type, the higher the perturbation effect score.

<u> __Features__ </u>  

- Quantify and visualize degree of perturbation response by cell type

<u> __Input__ </u>  

<u> __Output__ </u>  

mean_augur_score

<u> __Notes__ </u>  

- col_perturbation + "_binary"
- Sub-sample sizes equal across conditions; does not account for perturbation-induced compositional shifts (cell type abundance)
- Scores are for cell types (aggregated across cells, not individual cells)
- Two modes
    - If select_variance_feature=True, run the original Augur implementation, which removes genes that don't vary much across cell type.
    - If False, use features selected by `scanpy.pp.highly_variable_genes()`, which is faster and sensitively recovers effects; however, the feature selection may yield inflated Augur scores because this reduced feature set is used in training, resulting in it taking advantage of the pre-existing power of this feature selection to separate cell types.
    - If False, you also have to be sure that "highly_variable_features" is a variable in your data. This can be complicated if you have a separate layer for perturbation data.

In [None]:
augur_data, augur_results, figs_augur = self.run_augur(
    col_perturbation=self._columns["col_perturbation"], 
    key_treatment=self._keys["key_treatment"], 
    classifier="random_forest_classifier", n_threads=True,
    augur_mode="default", select_variance_features=True, n_folds=2,
    kws_umap=kws_umap, subsample_size=20, kws_augur_predict=dict(span=0.7))

## Mixscape: Cell-Level Perturbation Response

**Which cell types are most affected by perturbations?** Quantify perturbation responses by cell type with Augur, which uses supervised machine learning classification of condition labels (e.g., treated versus untreated) and looking at how easy it is to tell perturbed versus non-perturbed (more separable = higher effect).

**Are there perturbation-specific clusters?** Mixscape uses linear discriminant analysis (LDA) to cluster cells that resemble each other in terms of gene expression and perturbation condition. _(LDA reduces dimensionality and attempts to maximize the separability of classes. Unperturbed cells are removed from analysis.)_

<u> __Features__ </u>  

- Remove confounds (e.g., cell cycle, batch effects)
- Classify cells as affected or unaffected (i.e., "escapees") by the perturbation
- Quantify and visualize degree of perturbation response

<u> __Input__ </u>  

<u> __Output__ </u>  
- Mixscape figures

<u> __Notes__ </u>  


In [None]:
cr.pl.plot_perturbation_scores_by_guide(
    adata_pert, mixscape_class_global="mixscape_class_global", 
    col_guide_rna=self._column["col_guide_rna"], guide_split="-",
    key_control="NT", key_treatment="Perturbed",
    panel_spacing=None, kws_text_size=None)

In [None]:
pt.pl.ms.barplot(
                adata_pert,
                # adata_pert, guide_split
                guide_rna_column=self._columns["col_guide_rna"]
                )  # targeting efficiency by condition 

In [None]:
x = "SNX17g1|SNX17g2"
adata_pert = self.adata[self.adata.obs[self._columns[
    "col_perturbation"]].isin([self._keys["key_treatment"], self._keys["key_control"]])].copy()
adata_pert.X = adata_pert.layers["X_pert"]

In [None]:
nums

In [None]:
adata_pert.obs[col_guide_rna]

In [None]:
.apply(lambda x: list(pd.Series([i if i in adata_pert.obs.loc[x.name][col_target_genes].split() else np.nan for i in x["id"]]).dropna()), axis=1)

In [None]:
mixscape_identifier = pt.tl.Mixscape()
mixscape_identifier.perturbation_signature(self.adata, self._columns["col_perturbation"], self._keys["key_control"])
adata_pert = self.adata[self.adata.obs[self._columns[
    "col_perturbation"]].isin([self._keys["key_treatment"], self._keys["key_control"]])].copy()
adata_pert.X = adata_pert.layers["X_pert"]
adata_pert.obs.loc[:, col_guide_rna] = adata_pert.obs.apply(lambda x: re.sub(split_guide, "g", x[col_guide_rna]), axis=1)

In [None]:
from plotnine import (
    aes,
    element_text,
    facet_wrap,
    geom_bar,
    ggplot,
    labs,
    scale_fill_manual,
    theme,
    theme_classic,
    xlab,
    ylab,
)
import pandas as pd 


plot_perturbation_scores_by_guide(
    adata_pert, mixscape_class_global="mixscape_class_global", 
    col_guide_rna="guide_ids", guide_split="-",
    key_control="NT", key_treatment="Perturbed",
    panel_spacing=None, kws_text_size=None)

In [None]:

grna = adata_pert.obs.apply(lambda x: feature_split.join(list(pd.unique(
            np.array([re.sub(guide_split, "g", str(i), count=1) 
                      for i in str(x[col_guide_rna]).split(feature_split)
                      if feature_split else [str(x[col_guide_rna]]])))),
                             axis=1)  # Pertpy needs format <gene_target>g<#>

In [None]:
pt.pl.ms.barplot(
                adata_pert, guide_rna_column=col_guide_rna
                )  # targeting efficiency by condition 

In [None]:
adata_pert.obs

In [None]:
adata_pert.uns["mixscape"].keys()

In [None]:
import re

mixscape_identifier = pt.tl.Mixscape()
mixscape_identifier.perturbation_signature(self.adata, self._columns["col_perturbation"], self._keys["key_control"])
adata_pert = self.adata[self.adata.obs[self._columns[
    "col_perturbation"]].isin([self._keys["key_treatment"], self._keys["key_control"]])].copy()
adata_pert.X = adata_pert.layers["X_pert"]




adata_pert.obs.loc[:, col_guide_rna] = adata_pert.obs.apply(
    lambda x: feature_split.join(list(pd.unique(np.array([
        re.sub(guide_split, "g", str(i), count=1) 
        for i in list(str(x[col_guide_rna]).split(feature_split)
                        if feature_split else [x[col_guide_rna]])])))), 
    axis=1)  # Pertpy needs format <gene_target>g<#>
mixscape_identifier.mixscape(
    adata=adata_pert, labels=self._columns["col_target_genes"], 
    control=self._keys["key_control"], layer="X_pert",
    perturbation_type=self._keys["key_treatment"])  # Mixscape classification
mixscape_identifier.lda(adata=adata_pert, 
        # adata=adata_pert,
        labels=self._columns["col_target_genes"], 
        layer="X_pert", control=self._keys["key_control"], 
        split_by=col_split_by, 
        perturbation_type=self._keys["key_treatment"],
        mixscape_class_global="mixscape_class_global")  # linear discriminant analysis (LDA)

In [None]:
adata_pert.obs

In [None]:
figs_mixscape = self.run_mixscape(subset=subset_mixscape, 
                                  col_split_by=col_split_by)

## Distance Metrics

See `self.figures[run_label]["distances"]` and  `self.results[run_label]["distances"]`
for results.

In [None]:
fig_distance = self.compute_distance(
    subset=self.adata.obs[self._columns["col_target_genes"]].isin(
        [self._keys["key_control"], self._keys["key_nonperturbed"]
         ] + target_gene_idents),  # subset for quickness
    distance_type="edistance", method="X_pca")

## Composition Analysis

In [None]:
out_ca = cr.ax.analyze_composition(
    self.adata, "1", assay=self._assay, analysis_type="cell_level", 
    generate_sample_level=False, **self._columns, sample_identifier=None, est_fdr=0.05)