# Setup

In [None]:
import crispr as cr 
import pertpy
import muon
import os
import pandas as pd
import numpy as np
from config import (files_data, col_cell_type_data, col_gene_symbols_data, 
                    assays_data, layer_perturbation_data,
                    col_perturbation_data, key_control_data, 
                    col_split_by_data, layer_perturbation_data,
                    label_perturbation_type_data, col_target_genes_data,
                    key_treatment_data, col_guide_rna_data,
                    col_sample_id_data, load_example_data)

# Options
print(dir(cr.ax))
n_threads = 4
file = "CRISPRi_scr"
regress_out = None
process_kws = dict(hvg_kws = dict(min_mean=0.0125, max_mean=3, min_disp=0.5),
                   target_sum = 1e4, max_genes_by_counts = 2500, 
                   max_pct_mt = 5, min_genes = 200, min_cells = 3, scale = 10,
                   regress_out = regress_out)
write_public = True  # if need to download public data, write to examples/data?

#  Set Arguments
col_cell_type = col_cell_type_data[file]
col_sample_id = col_sample_id_data[file]
col_perturbation = col_perturbation_data[file]
key_control, label_perturbation_type, col_target_genes, layer = [
    x[file] for x in [key_control_data, label_perturbation_type_data,
                      col_target_genes_data, layer_perturbation_data]
]
col_gene_symbols = col_gene_symbols_data[file]
if file in assays_data:
    if isinstance(assays_data[file], str) or assays_data[file] is None:
        assays_data[file] = [assays_data[file], None]
    assay, assay_protein = assays_data[file]
else:
    assay, assay_protein = None
key_treatment = key_treatment_data[file]
col_split_by = col_split_by_data[file]
col_guide_rna = col_guide_rna_data[file]
layer_perturbation = layer_perturbation_data[file]
file_path = files_data[file]
print(files_data)

## Data

In [None]:
f = h5py.File(file_path, "r")
mat = f["matrix"]
print(mat["features"]["feature_type"])

f.close()

In [None]:
import h5py 
import anndata 
import scipy.sparse as sp_sparse

key = ("matrix", "features")
with h5py.File(file_path, "r") as f:
    keys = [k for k in f.keys()]
    # read array
    if isinstance(key, (list, np.ndarray, tuple, set)):
        mat = f[key[0]]
        shape = mat["shape"]
        for i in range(1, len(key)):
            print(i, mat.keys())
            mat = mat[key[i]]
    else:
        mat = f[key]
    print(mat)
    print(type(mat))
    mat = sp_sparse.csr_matrix(mat)
    adata = anndata.AnnData(mat, shape=())

In [None]:
# h5f.close()
# dir(h5f["matrix"]["features"]["target_gene_name"])
import scipy.sparse as sp_sparse
with h5py.File(file_path, "r") as f:
    matrix = sp_sparse.csr_matrix((f['matrix']['data'], 
                                    f['matrix']['indices'], 
                                    f['matrix']['indptr']), 
                                    shape=f['matrix']['shape'])
    # feature_ids = [x.decode("ascii", "ignore") 
    #             for x in f["matrix"]["features"]["id"]]
    # feature_names = [x.decode("ascii", "ignore") 
    #                     for x in f["matrix"]["features"]["name"]]        
    # barcodes = list(f["matrix"]["barcodes"][:])
    # matrix = sp_sparse.csr_matrix((f["matrix"]["data"], 
    #                                 f["matrix"]["indices"], 
    #                                 f["matrix"]["indptr"]), 
    #                                   shape=f["matrix"]["shape"])
    # fbm = FeatureBCMatrix(feature_ids, feature_names, barcodes, matrix)

In [None]:
import scanpy as sc

# with h5py.File(file_path, 'r') as h5_file:
#     print(list(h5_file.keys()))  # List all the keys (datasets/groups) in the file
#     print(h5_file["matrix"].keys())
#     # Explore the structure of the file and metadata
h5f = h5py.File(file_path, "r")

adata = load_example_data(file, col_gene_symbols=col_gene_symbols)
print(pd.read_csv("data/crispr-screening/feature_ref.csv"))
adata.obs

In [None]:
import scipy.sparse as sp
import scanpy as sc

adata = sc.read_10x_h5(file_path, gex_only=False)
adata_crispr = adata[:, adata.var.feature_types == "CRISPR Guide Capture"]
adata = adata[:, adata.var.feature_types == "Gene Expression"]
common_genes = adata.var_names.intersection(adata_crispr.var_names)
adata = adata[:, common_genes]
adata.layers["X_pert"] = sp.csr_matrix(adata_crispr.X)

In [None]:
fbm, gex, barcodes, genes = cr.pp.get_matrix_from_h5(file_path)

In [None]:
adata.var[adata.var.feature_types == "CRISPR Guide Capture"]
adata.var[adata.var.feature_types == "Gene Expression"]