In [None]:
import os
import numpy 

from aavomics import database
import scanpy
import anndata
import pandas

from aavomics import aavomics

from plotly import offline as plotly
from plotly import graph_objects
from plotly.subplots import make_subplots

In [None]:
ALIGNMENT_NAME = "cellranger_5.0.1_gex_mm10_2020_A_AAVomics"
TAXONOMY_ALIGNMENT_NAME = "cellranger_5.0.1_gex_mm10_2020_A"

TAXONOMY_NAME = "CCN202105060"

ANNDATA_FILE_NAME = "aavomics_mouse_cortex_2021.h5ad"

In [None]:
adatas = []
cell_set_names = []

for cell_set_index, cell_set in enumerate(database.CELL_SETS):
    
    print("Adding %s" % cell_set.name)
    
    tissue_sample = cell_set.source_tissue
    animal = tissue_sample.animal
    
    adata = anndata.read_h5ad(cell_set.get_anndata_file_path(alignment_name=ALIGNMENT_NAME))
    
    taxonomy_adata = anndata.read_h5ad(cell_set.get_anndata_file_path(alignment_name=TAXONOMY_ALIGNMENT_NAME))
        
    fresh_adata = taxonomy_adata[~taxonomy_adata.obs[TAXONOMY_NAME].isna()].copy()

    for column in fresh_adata.obs.columns:
        fresh_adata.obs.drop(column, axis=1, inplace=True)
    
    injections = animal.injections

    read_sets = set()

    for sequencing_library in cell_set.sequencing_libraries:
        if sequencing_library.type == "Virus Transcripts":
            read_sets.update(sequencing_library.read_sets)
    
    virus_and_vector_names = set()
    
    if len(read_sets) == 0:
        print("No amplified reads to disambiguate. Only including overall transduction")
    else:
        virus_vector_names = {}

        for injection in injections:

            for vector in injection.vector_pool.vectors:

                virus = vector.delivery_vehicle

                if virus.name not in virus_vector_names:
                    virus_vector_names[virus.name] = set([vector.name])
                else:
                    virus_vector_names[virus.name].add(vector.name)

        for virus_name, vector_names in virus_vector_names.items():

            virus_and_vector_names.add(virus_name)

            if len(vector_names) > 1:
                virus_and_vector_names.update(vector_names)
                
    fresh_adata.obs["Cell Type"] = taxonomy_adata.obs.loc[fresh_adata.obs.index.values][TAXONOMY_NAME]
    fresh_adata.obs["AAV"] = adata[fresh_adata.obs.index.values, "AAV"].X.todense()
    cell_set_names.append(cell_set.name)
                
    for column_name in virus_and_vector_names:
        fresh_adata.obs[column_name] = adata.obs.loc[fresh_adata.obs.index.values][column_name]
    
    fresh_adata.obs.index = ["%s-%i" % (x.split("-")[0], cell_set_index + 1) for x in fresh_adata.obs.index.values]
    
    adatas.append(fresh_adata)
    
combined_adata = anndata.concat(adatas, label="Cell Set", keys=cell_set_names, join="outer")

In [None]:
combined_adata.write_h5ad(os.path.join(database.DATA_PATH, ANNDATA_FILE_NAME))