In [None]:
import os
import pandas

from aavomics import database
from aavomics import aavomics
import anndata

In [None]:
ALIGNMENT_NAME = "cellranger_5.0.1_gex_mm10_2020_A_AAVomics"
TAXONOMY_ALIGNMENT_NAME = "cellranger_5.0.1_gex_mm10_2020_A"

DEBRIS_TAXONOMY_NAME = "CCN202105041"
TAXONOMY_NAME = "CCN202105060"

FILE_NAME = "aavomics_cell_type_transduction_rates.csv"

TRANSDUCTION_RATE_METHOD = aavomics.Infection_Rate_Method.COUNTING
TRANSDUCTION_THRESHOLD = None

In [None]:
cell_set = database.CELL_SETS_DICT["20190703_TC3"]
tissue_sample = cell_set.source_tissue
animal = tissue_sample.animal
injections = animal.injections

In [None]:
sample_virus_cell_type_transduction_dict = {}

for cell_set in database.CELL_SETS:
    
    print("Inspecting %s" % cell_set.name)
    
    tissue_sample = cell_set.source_tissue
    animal = tissue_sample.animal

    if animal.injections is None:
        print("%s has no injections, skipping." % cell_set.name)
        continue
        
    adata = anndata.read_h5ad(cell_set.get_anndata_file_path(alignment_name=ALIGNMENT_NAME))
    adata.obs["AAV"] = adata[:, "AAV"].X.todense()
    
    taxonomy_adata = anndata.read_h5ad(cell_set.get_anndata_file_path(alignment_name=TAXONOMY_ALIGNMENT_NAME))
    
    called_cells_adata = taxonomy_adata[~taxonomy_adata.obs[TAXONOMY_NAME].isna()].copy()
    cell_types = called_cells_adata.obs[TAXONOMY_NAME].unique()
        
    injections = animal.injections

    read_sets = set()

    for sequencing_library in cell_set.sequencing_libraries:
        if sequencing_library.type == "Virus Transcripts":
            read_sets.update(sequencing_library.read_sets)
    
    virus_and_vector_names = set()
    
    if len(read_sets) == 0:
        print("No amplified reads to disambiguate. Only estimating overall transduction")
    else:
        virus_vector_names = {}

        for injection in injections:

            for vector in injection.vector_pool.vectors:

                virus = vector.delivery_vehicle

                if virus.name not in virus_vector_names:
                    virus_vector_names[virus.name] = set([vector.name])
                else:
                    virus_vector_names[virus.name].add(vector.name)

        for virus_name, vector_names in virus_vector_names.items():

            virus_and_vector_names.add(virus_name)

            if len(vector_names) > 1:
                virus_and_vector_names.update(vector_names)
    
    virus_and_vector_names.add("AAV")
                
    for column_name in virus_and_vector_names:
        
        row_name = "-".join([cell_set.name, column_name])
        
        print("Processing %s in %s" % (column_name, cell_set.name))
        
        sample_virus_cell_type_transduction_dict[row_name] = {}

        debris_counts = adata[taxonomy_adata.obs[DEBRIS_TAXONOMY_NAME] == "Debris"].obs[column_name]

        for cell_type in cell_types:
            
            cell_type_mask = taxonomy_adata.obs[TAXONOMY_NAME] == cell_type

            cell_type_counts = adata[cell_type_mask].obs[column_name].values

            virus_rate = aavomics.get_transcript_presence_rate(
                cell_type_counts,
                method=TRANSDUCTION_RATE_METHOD,
                background_transcript_counts=debris_counts,
                resolution=400,
                threshold=TRANSDUCTION_THRESHOLD
            )
            
            if isinstance(virus_rate, tuple):
                virus_rate = virus_rate[0]
                
            num_transduced = virus_rate * cell_type_mask.sum()

            sample_virus_cell_type_transduction_dict[row_name]["%s Transduction Rate" % cell_type] = virus_rate
            sample_virus_cell_type_transduction_dict[row_name]["%s Num Transduced" % cell_type] = num_transduced

In [None]:
transduction_rate_df = pandas.DataFrame.from_dict(sample_virus_cell_type_transduction_dict, orient="index")
transduction_rate_df["Cell Set"] = [x[0] for x in transduction_rate_df.index.str.split("-")]
transduction_rate_df["Virus"] = ["-".join(x[1:]) for x in transduction_rate_df.index.str.split("-")]

In [None]:
transduction_rate_df.to_csv(os.path.join(database.DATA_PATH, FILE_NAME))