In [None]:
import os

from aavomics import database
from aavomics import aavomics
import anndata
import pandas
import numpy
from scipy import stats

import plotly.graph_objects as graph_objects
from plotly import offline as plotly

In [None]:
VIRUS_NAME = "PHP.eB"

TRANSDUCTION_RATE_FILE_NAME = "aavomics_cell_type_transduction_rates.csv"

In [None]:
CELL_TYPE_HIERARCHY = {
    "Astrocytes": {
        "Myoc- Astrocytes": {},
        "Myoc+ Astrocytes": {}
    },
    "Vascular Cells": {
        "Endothelial Cells": {},
        "Pericytes": {},
        "Red Blood Cells": {},
        "Vascular SMCs": {},
        "VLMCs": {}
    },
    "Immune Cells": {
        "Microglia": {},
        "Perivascular Macrophages": {},
        "Leukocytes": {}
    },
    "Oligodendrocytes": {
        "OPCs": {},
        "Committed Oligodendrocytes": {},
        "Mature Oligodendrocytes": {}
    }
}

cell_types = []

for cell_type, cell_subtypes in CELL_TYPE_HIERARCHY.items():
    cell_types.extend(cell_subtypes.keys())

In [None]:
transduction_rate_df = pandas.read_csv(os.path.join(database.DATA_PATH, TRANSDUCTION_RATE_FILE_NAME), index_col=0)

In [None]:
BARCODE_CELL_SET_NAME = "20190321_BC2"
CARGO_CELL_SET_NAMES = ["20181127_TC1", "20190319_TC2", "20190111_BC1", "20190321_BC2"]

In [None]:
all_cell_types = set()

for column_name in transduction_rate_df.columns:
    if column_name.endswith("Transduction Rate"):
        all_cell_types.add(" ".join(column_name.split()[0:-2]))

In [None]:
num_barcodes = 0
barcode_x_values = []
barcode_y_values = []

cell_set_barcode_mask = (transduction_rate_df["Cell Set"] == BARCODE_CELL_SET_NAME) & \
    (transduction_rate_df["Virus"].str.contains("PHP.eB")) & \
    (transduction_rate_df["Virus"].str.contains("BC"))

for barcode_row in transduction_rate_df[cell_set_barcode_mask].iterrows():
    
    num_barcodes += 1
    
    cell_type_num_transduced = {cell_type: 0 for cell_type in CELL_TYPE_HIERARCHY}
    
    total_num_transduced = 0
    
    for cell_type in all_cell_types:
        
        num_cells = barcode_row[1]["%s Num Cells" % cell_type]
        transduction_rate = barcode_row[1]["%s Transduction Rate" % cell_type]
        
        if numpy.isnan(num_cells):
            continue
        
        num_transduced = num_cells * transduction_rate
        
        counts = False
        for parent_cell_type, cell_subtypes in CELL_TYPE_HIERARCHY.items():
            if cell_type in cell_subtypes:
                cell_type_num_transduced[parent_cell_type] += num_transduced
                counts = True

        if counts:
            total_num_transduced += num_transduced
        
    for cell_type in CELL_TYPE_HIERARCHY:
        
        barcode_x_values.append(cell_type)
        barcode_y_values.append(cell_type_num_transduced[cell_type]/total_num_transduced*100)

In [None]:
barcode_x_values

In [None]:
barcode_y_values

In [None]:
num_cargo_cell_sets = 0
cargo_x_values = []
cargo_y_values = []

cell_set_cargo_mask = (transduction_rate_df["Cell Set"].isin(CARGO_CELL_SET_NAMES)) & \
    (transduction_rate_df["Virus"].str.contains("PHP.eB"))& \
    (~transduction_rate_df["Virus"].str.contains("BC"))

for cargo_row in transduction_rate_df[cell_set_cargo_mask].iterrows():
    
    num_cargo_cell_sets += 1
    
    cell_type_num_transduced = {cell_type: 0 for cell_type in CELL_TYPE_HIERARCHY}
    
    total_num_transduced = 0
    
    for cell_type in all_cell_types:
        
        num_cells = cargo_row[1]["%s Num Cells" % cell_type]
        transduction_rate = cargo_row[1]["%s Transduction Rate" % cell_type]
        
        if numpy.isnan(num_cells):
            continue
            
        num_transduced = num_cells * transduction_rate
        
        counts = False
        
        for parent_cell_type, cell_subtypes in CELL_TYPE_HIERARCHY.items():
            if cell_type in cell_subtypes:
                cell_type_num_transduced[parent_cell_type] += num_transduced
                counts = True

        if counts:
            total_num_transduced += num_transduced
        
    for cell_type in CELL_TYPE_HIERARCHY:
        
        cargo_x_values.append(cell_type)
        cargo_y_values.append(cell_type_num_transduced[cell_type]/total_num_transduced*100)

In [None]:
cargo_x_values

In [None]:
cargo_y_values

In [None]:
traces = []

trace = graph_objects.Box(
    x=barcode_x_values,
    y=barcode_y_values,
    name="n=%i Barcodes" % num_barcodes,
    boxpoints="all"
)
    
traces.append(trace)

trace = graph_objects.Box(
    x=cargo_x_values,
    y=cargo_y_values,
    name="n=%i Animals" % num_cargo_cell_sets,
    boxpoints="all",
    text=CARGO_CELL_SET_NAMES
)
    
traces.append(trace)

layout = {
    "boxmode": "group",
    "boxgroupgap": 0.5,
    "width": 500,
    "plot_bgcolor": "rgba(255, 255, 255, 0)",
    "paper_bgcolor": "rgba(255, 255, 255, 0)",
    "yaxis": {
        "rangemode": "tozero",
        "gridcolor": "rgba(0, 0, 0, 0.25)",
        "zerolinecolor": "rgba(0, 0, 0, 0.25)",
        "title": "Fraction of non-neuronal transduced cells (%)"
    }
}

figure = graph_objects.Figure(data=traces, layout=layout)

plotly.iplot(figure)

figure.write_image(os.path.join("out", "PHP-eB_barcode_vs_animal_tropism.svg"))