In [None]:
import anndata

import os
import numpy
from aavomics import database
import pandas

from plotly import graph_objects
from plotly import offline as plotly

In [None]:
ANNDATA_FILE_NAME = "aavomics_mouse_cortex_2021.h5ad"
CELL_SET_NAMES = ["20181127_TC1", "20190319_TC2", "20190111_BC1", "20190321_BC2", "20190712_TC5", "20190713_TC6", "20190713_TC7", "20200720_BC4_1", "20200720_BC4_2"]
VIRUS_NAME = "PHP.eB"
TRANSDUCTION_RATE_FILE_NAME = "aavomics_cell_type_transduction_rates.csv"

In [None]:
CELL_TYPE_HIERARCHY = {
    "Astrocytes": {
        "Myoc- Astrocytes": {},
        "Myoc+ Astrocytes": {}
    },
    "Vascular Cells": {
        "Endothelial Cells": {},
        "Pericytes": {},
        "Red Blood Cells": {},
        "Vascular SMCs": {},
        "VLMCs": {}
    },
    "Immune Cells": {
        "Perivascular Macrophages": {},
        "Microglia": {},
        "Leukocytes": {}
    },
    "Oligodendrocytes": {
        "OPCs": {},
        "Committed Oligodendrocytes": {},
        "Mature Oligodendrocytes": {}
    },
    "Neurons": {
        "L2": {},
        "L2/3": {},
        "L3": {},
        "L4/5": {},
        "L5": {},
        "L5/6": {},
        "L6": {},
        "Lamp5": {},
        "Pax6": {},
        "Sncg": {},
        "Vip": {},
        "Sst": {},
        "Pvalb": {}
    }
}

In [None]:
adata = anndata.read_h5ad(os.path.join(database.DATA_PATH, ANNDATA_FILE_NAME))

In [None]:
transduction_rate_df = pandas.read_csv(os.path.join(database.DATA_PATH, TRANSDUCTION_RATE_FILE_NAME), index_col=0)

In [None]:
text = []
transcript_count_values = []
transduction_rate_values = []
cell_type_values = []

for cell_set_name in CELL_SET_NAMES:
    
    cell_set_adata = adata[adata.obs["Cell Set"] == cell_set_name].copy()
    
    for cell_type_name in CELL_TYPE_HIERARCHY:
        
        cell_type_mask = cell_set_adata.obs["Cell Type"].isin(list(CELL_TYPE_HIERARCHY[cell_type_name].keys()))

        cell_type_adata = cell_set_adata[cell_type_mask].copy()
        
        entry_index = "%s-%s" % (cell_set_name, VIRUS_NAME)
        if entry_index not in transduction_rate_df.index.values:
            entry_index = "%s-%s" % (cell_set_name, "AAV")
            transcripts_per_cell = cell_type_adata.obs["AAV"].values.sum()/cell_type_adata.shape[0]
        else:
            transcripts_per_cell = cell_type_adata.obs[VIRUS_NAME].values.sum()/cell_type_adata.shape[0]
        
        total_transduced_cells = 0
            
        transduction_rate_row = transduction_rate_df.loc[entry_index]
        
        for cell_subtype in CELL_TYPE_HIERARCHY[cell_type_name]:
            
            transduction_rate = transduction_rate_row["%s Transduction Rate" % cell_subtype]
            
            num_transduced_cells = transduction_rate_row["%s Num Cells" % cell_subtype]
            
            if numpy.isnan(num_transduced_cells):
                num_transduced_cells = 0
                transduction_rate = 0
            
            total_transduced_cells += transduction_rate * num_transduced_cells
        
        transduction_rate = total_transduced_cells/cell_type_adata.shape[0]
        
        transcript_count_values.append(transcripts_per_cell)
        transduction_rate_values.append(transduction_rate*100)
        cell_type_values.append(cell_type_name)
        text.append("%s" % cell_set_name)

text = numpy.array(text)
transcript_count_values = numpy.array(transcript_count_values)
transduction_rate_values = numpy.array(transduction_rate_values)
cell_type_values = numpy.array(cell_type_values)

In [None]:
traces = []
x_max = max(transcript_count_values) * 1.1
y_max = max(transduction_rate_values) * 1.1

for cell_type in numpy.unique(cell_type_values):
    
    cell_type_mask = cell_type_values == cell_type
    
    scatter = graph_objects.Scatter(
        x=transcript_count_values[cell_type_mask],
        y=transduction_rate_values[cell_type_mask],
        text=text[cell_type_mask],
        mode="markers",
        name=cell_type
    )
    
    traces.append(scatter)
    
layout = {
    "xaxis": {
        "title": "Transcripts/Cell",
        "rangemode": "tozero",
        "gridcolor": "rgba(0, 0, 0, 0.25)",
        "zerolinecolor": "rgba(0, 0, 0, 0.25)"
    },
    "yaxis": {
        "title": "Estimated Transduction Rate (%)",
        "rangemode": "tozero",
        "gridcolor": "rgba(0, 0, 0, 0.25)",
        "zerolinecolor": "rgba(0, 0, 0, 0.25)"
    },
    "plot_bgcolor": "rgba(255, 255, 255, 0)",
    "paper_bgcolor": "rgba(255, 255, 255, 0)"
}


figure = graph_objects.Figure(data=traces, layout=layout)

plotly.iplot(figure)

figure.write_image(os.path.join("out", "PHP-eB_transcripts_per_cell_vs_transduction_rate.svg"))