# Setup

In [1]:
%load_ext autoreload
%autoreload 2

import crispr as cr
from anndata import AnnData
import scanpy as sc
import copy

col_sample_id = "orig.ident"
kws_harmony = {"plot_convergence": True,
               "random_state": 1618}
kws_process_guide_rna = {"feature_split": "|", "guide_split": "-", 
                         "key_control_patterns": ["CTRL"],
                         "remove_multi_transfected": True}
kws_umap = {"min_dist": 0.3}
kws_umap=kws_umap
kws_init = {"assay": None, "assay_protein": None, 
            "col_gene_symbols": "gene_symbols", 
            "col_cell_type": "majority_voting", 
            "col_sample_id": col_sample_id, "col_batch": col_sample_id, 
            "col_perturbed": "perturbation", 
            "col_guide_rna": "feature_call", 
            "col_num_umis": "num_umis", 
            "kws_process_guide_rna": kws_process_guide_rna,
            "col_condition": "target_gene_name", "key_control": "NT", 
            "key_treatment": "KD"}
kws_pp = {"kws_hvg": {"min_mean": 0.0125, "max_mean": 10, 
                      "min_disp": 0.5, "flavor": "cell_ranger"}, 
          "target_sum": 10000.0, "cell_filter_pmt": [0, 15], 
          "cell_filter_ngene": [200, None], "cell_filter_ncounts": [3, 40000], 
          "gene_filter_ncell": [3, None], "regress_out": None, 
          "kws_scale": "z", "kws_umap": kws_umap}
kws_cluster = {"method_cluster": "leiden", "kws_umap": kws_umap, 
               "kws_neighbors": None, 
               "kws_pca": {"n_comps": None, "use_highly_variable": True}, 
               "kws_cluster": {"resolution": 0.5}}

file_path = {"HH03": {"directory": "data/crispr-screening/HH06"}, 
             "CR4": {"directory": "data/crispr-screening/HH-Hu-CR4"},
             "CR5": {"directory": "data/crispr-screening/HH-Hu-CR5"}}



KeyboardInterrupt: 

# Create Integrated Object

We first have to create preprocessed and clustered individual objects, then integrate them with Harmony.

## Integrate Data

If `kws_pp` and/or `kws_cluster` = None, then preprocessing and/or clustering, respectively, will not be performed on individual objects.

In [None]:
{"assay": None, "col_gene_symbols": "gene_symbols", "col_cell_type": "majority_voting", "col_sample_id": "orig.ident", "col_batch": "orig.ident", "col_perturbed": "perturbation", "col_guide_rna": "feature_call", "col_num_umis": "num_umis", "kws_process_guide_rna": {"feature_split": "|", "guide_split": "-", "key_control_patterns": ["CTRL"], "col_guide_rna": "feature_call", "col_num_umis": "num_umis", "key_control": "NT", "remove_multi_transfected": True}, "col_condition": "target_gene_name", "key_control": "NT", "key_treatment": "KD", "remove_multi_transfected": True, "key_nonperturbed": "NP"}.keys()


dict_keys(['assay', 'col_gene_symbols', 'col_cell_type', 'col_sample_id', 'col_batch', 'col_perturbed', 'col_guide_rna', 'col_num_umis', 'kws_process_guide_rna', 'col_condition', 'key_control', 'key_treatment', 'remove_multi_transfected', 'key_nonperturbed'])

In [35]:
adata = cr.pp.create_object_multi(
    file_path, kws_init=kws_init, kws_pp=kws_pp, kws_cluster=kws_cluster, 
    kws_harmony=kws_harmony)  # create integrated AnnData object
kws_init_new = copy.deepcopy(kws_init)
kws_init_new["kws_process_guide_rna"] = None  # don't need any more
kws_init_new["col_sample_id"] = col_sample_id
kws_init_new["col_batch"] = col_sample_id



<<< INTEGRATING DATASETS >>>



<<< INITIALIZING CRISPR CLASS OBJECT >>>

{'assay': None, 'col_gene_symbols': 'gene_symbols', 'col_sample_id': 'orig.ident', 'kws_process_guide_rna': {'feature_split': '|', 'guide_split': '-', 'key_control_patterns': ['CTRL'], 'remove_multi_transfected': True, 'col_guide_rna': 'feature_call', 'col_num_umis': 'num_umis', 'key_control': 'NT'}, 'remove_multi_transfected': True}

<<< LOADING PROTOSPACER METADATA >>>


TypeError: read_10x_mtx() got an unexpected keyword argument 'remove_multi_transfected'

## Create Overall Object

In [None]:
self = cr.Crispr(adata, **{**kws_init_new, 
                           **{"col_sample_id": "unique.idents", 
                              "col_batch": "unique.idents"}}
                 )  # pass integrated object to Crispr class
del(adata)  # object now in `self`

# Clustering & Annotations

Clustering was conducted in the individual objects in `cr.pp.create_object_multi()`, but we want to re-do QC and clustering in the overall sample.

## Clustering

We need to have `kws_pca=False` so that the Harmony-adjusted PCA created during integration (rather than a newly-calculated one) will be used.

In [None]:
self.cluster(**{**kws_cluster, "kws_pca": False}, layer="log1p")
degs = self.find_markers(n_genes=10, method="wilcoxon", key_reference="rest")
degs[0]  # marker data; degs[1] for figures

## CellTypist

Annotate cell types.

In [None]:
preds, f_ct = self.annotate_clusters(model="COVID19_HumanChallenge_Blood.pkl")
self.rna.obs

# Plots

In [None]:
cct = "predicted_labels" if "predicted_labels" in self.rna.obs else None
clus = list(self.rna.obs[cct].sample(2))  # clusters to circle
figs = self.plot(genes=36, 
                 kws_qc=True,
                 col_cell_type=cct,
                 layers="all",  # also plot "scaled" layer, for instance
                 kws_gex_violin=dict(scale="area", height=10),
                 kws_clustering=dict(col_cell_type=cct))

# Analyses

Perform perturbation-specific analyses.

## Augur

In [None]:
cct = "majority_voting" if "majority_voting" in self.rna.obs else \
    self._columns["col_cell_type"]
_ = self.run_augur(
    col_cell_type=cct, 
    # ^ will be label in self._columns by default, but can override here
    col_perturbed=self._columns["col_perturbed"], 
    # ^ will be this by default if unspecified, but can override here
    key_treatment=self._keys["key_treatment"],  
    # ^ will be this by default if unspecified, but can override here
    select_variance_features=True,  # filter by highly variable genes
    classifier="random_forest_classifier", n_folds=3, augur_mode="default", 
    kws_umap=kws_umap, subsample_size=5, kws_augur_predict=dict(span=0.7))

## Mixscape

In [None]:
figs_mixscape = self.run_mixscape(
    col_split_by="orig.ident", col_cell_type=cct, target_gene_idents=True)

# Distance

Investigate distance metrics.

In [None]:
kws_plot = dict(figsize=(15, 15), robust=True)  # heatmap keywords
fig_dist_w = self.compute_distance(distance_type="edistance", method="X_pca",
                                   kws_plot=kws_plot)
fig_dist_e = self.compute_distance(distance_type="mmd", method="X_pca",
                                   kws_plot=kws_plot)