In [None]:
import os

from aavomics import database
from aavomics import aavomics
import anndata
import pandas
import numpy
from scipy import stats

import plotly.graph_objects as graph_objects
from plotly import offline as plotly

In [None]:
TRANSDUCTION_RATE_FILE_NAME = "aavomics_cell_type_transduction_rates.csv"

In [None]:
CELL_TYPE_HIERARCHY = {
    "Astrocytes": {
        "Myoc- Astrocytes": {},
        "Myoc+ Astrocytes": {}
    },
    "Vascular Cells": {
        "Endothelial Cells": {},
        "Pericytes": {},
        "Red Blood Cells": {},
        "Vascular SMCs": {},
        "VLMCs": {}
    },
    "Immune Cells": {
        "Microglia": {},
        "Perivascular Macrophages": {},
        "Leukocytes": {}
    },
    "Oligodendrocytes": {
        "OPCs": {},
        "Committed Oligodendrocytes": {},
        "Mature Oligodendrocytes": {}
    }
}

cell_types = []

for cell_type, cell_subtypes in CELL_TYPE_HIERARCHY.items():
    cell_types.extend(cell_subtypes.keys())

In [None]:
transduction_rate_df = pandas.read_csv(os.path.join(database.DATA_PATH, TRANSDUCTION_RATE_FILE_NAME), index_col=0)

In [None]:
CELL_SET_NAMES = ["20181127_TC1", "20190319_TC2", "20190111_BC1", "20190321_BC2"]
VIRUS_NAMES = ["PHP.eB", "PHP.V1"]

In [None]:
all_cell_types = set()

for column_name in transduction_rate_df.columns:
    if column_name.endswith("Transduction Rate"):
        all_cell_types.add(" ".join(column_name.split()[0:-2]))

In [None]:
virus_1_x_values = []
virus_1_y_values = []
virus_1_cell_set_names = []

for cell_set_name in CELL_SET_NAMES:
    
    cell_type_num_transduced = {cell_type: 0 for cell_type in CELL_TYPE_HIERARCHY}
    
    cell_set_virus_mask = (transduction_rate_df["Cell Set"] == cell_set_name) & \
        (transduction_rate_df["Virus"] == VIRUS_NAMES[0])
    
    row = transduction_rate_df[cell_set_virus_mask].iloc[0]
    
    total_num_transduced = 0
    
    for parent_cell_type, cell_subtypes in CELL_TYPE_HIERARCHY.items():
    
        for cell_type in cell_subtypes:

            num_cells = row["%s Num Cells" % cell_type]
            transduction_rate = row["%s Transduction Rate" % cell_type]

            if numpy.isnan(num_cells):
                continue

            num_transduced = num_cells * transduction_rate

            cell_type_num_transduced[parent_cell_type] += num_transduced
            total_num_transduced += num_transduced
        
    for cell_type_name in CELL_TYPE_HIERARCHY:

        virus_1_x_values.append(cell_type_name)
        virus_1_y_values.append(cell_type_num_transduced[cell_type_name]/total_num_transduced * 100)
        virus_1_cell_set_names.append(cell_set_name)

In [None]:
virus_2_x_values = []
virus_2_y_values = []
virus_2_cell_set_names = []

for cell_set_name in CELL_SET_NAMES:
    
    cell_type_num_transduced = {cell_type: 0 for cell_type in CELL_TYPE_HIERARCHY}
    
    cell_set_virus_mask = (transduction_rate_df["Cell Set"] == cell_set_name) & \
        (transduction_rate_df["Virus"] == VIRUS_NAMES[1])
    
    row = transduction_rate_df[cell_set_virus_mask].iloc[0]
    
    total_num_transduced = 0
    
    for parent_cell_type, cell_subtypes in CELL_TYPE_HIERARCHY.items():
    
        for cell_type in cell_subtypes:

            num_cells = row["%s Num Cells" % cell_type]
            transduction_rate = row["%s Transduction Rate" % cell_type]

            if numpy.isnan(num_cells):
                continue

            num_transduced = num_cells * transduction_rate

            cell_type_num_transduced[parent_cell_type] += num_transduced
            total_num_transduced += num_transduced

        
    for cell_type_name in CELL_TYPE_HIERARCHY:

        virus_2_x_values.append(cell_type_name)
        virus_2_y_values.append(cell_type_num_transduced[cell_type_name]/total_num_transduced * 100)
        virus_2_cell_set_names.append(cell_set_name)

In [None]:
cell_type_index_map = {cell_type: index for index, cell_type in enumerate(numpy.unique(virus_1_x_values))}

traces = []

trace = graph_objects.Box(
    x=numpy.array([cell_type_index_map[x] for x in virus_1_x_values]) - 0.25,
    y=virus_1_y_values,
    name=VIRUS_NAMES[0],
    marker=dict(
        color="black"
    )
)
    
traces.append(trace)

trace = graph_objects.Scatter(
    x=numpy.array([cell_type_index_map[x] for x in virus_1_x_values]) - 0.5 + numpy.random.rand(len(virus_1_x_values))/10,
    y=virus_1_y_values,
    name=VIRUS_NAMES[1],
    mode="markers",
    marker={
        "color": ["blue" if "BC" in x else "red" for x in virus_1_cell_set_names]
    }
)
    
traces.append(trace)

trace = graph_objects.Box(
    x=numpy.array([cell_type_index_map[x] for x in virus_2_x_values]) + 0.25,
    y=virus_2_y_values,
    name=VIRUS_NAMES[0],
    marker=dict(
        color="grey"
    )
)
    
traces.append(trace)

trace = graph_objects.Scatter(
    x=numpy.array([cell_type_index_map[x] for x in virus_2_x_values]) + numpy.random.rand(len(virus_2_x_values))/10,
    y=virus_2_y_values,
    name=VIRUS_NAMES[1],
    mode="markers",
    marker={
        "color": ["blue" if "BC" in x else "red" for x in virus_2_cell_set_names]
    }
)
    
traces.append(trace)

cell_types = numpy.unique(virus_1_x_values)

layout = {
    "width": 500,
    "xaxis": {
        "tickvals": list(range(len(cell_types))),
        "ticktext": cell_types
    },
    "plot_bgcolor": "rgba(255, 255, 255, 0)",
    "paper_bgcolor": "rgba(255, 255, 255, 0)",
    "yaxis": {
        "rangemode": "tozero",
        "gridcolor": "rgba(0, 0, 0, 0.25)",
        "zerolinecolor": "rgba(0, 0, 0, 0.25)",
        "title": "Fraction of non-neuronal transduced cells (%)"
    }
}

figure = graph_objects.Figure(data=traces, layout=layout)

plotly.iplot(figure)

figure.write_image(os.path.join("out", "PHP-eB_vs_PHP-V1_major_cell_types_fraction_transduced.svg"))

In [None]:
virus_1_x_values

In [None]:
virus_1_y_values

In [None]:
virus_2_x_values

In [None]:
virus_2_y_values