In [None]:
from aavomics import database

import pandas
import os
import numpy

from plotly import offline as plotly
from plotly import graph_objects

In [None]:
CELL_COUNT_THRESHOLD = 50

CELL_SET_NAMES = ["20190711_TC4", "20190712_TC5", "20190713_TC6", "20190713_TC7"]

VIRUS_NAMES = ["PHP.eB", "CAP-B10"]

NEURON_TAXONOMY = {
    "Glutamatergic": ["L2", "L2/3", "L3", "L4/5", "L5", "L5/6", "L6"],
    "GABAergic": ["Lamp5", "Pax6", "Sncg", "Vip", "Sst", "Pvalb"]
}

TRANSDUCTION_RATE_FILE_NAME = "aavomics_cell_type_transduction_rates.csv"

In [None]:
transduction_rate_df = pandas.read_csv(os.path.join(database.DATA_PATH, TRANSDUCTION_RATE_FILE_NAME), index_col=0)

In [None]:
cell_types = set([" ".join(x.split()[0:-2]) for x in transduction_rate_df.columns if x.endswith("Num Cells")])

virus_cell_set_num_transduced = {}

for virus_name in VIRUS_NAMES:
            
    virus_mask = transduction_rate_df["Virus"] == virus_name
    
    virus_cell_set_num_transduced[virus_name] = {}
    
    for cell_set_name in CELL_SET_NAMES:
            
        cell_set_mask = transduction_rate_df["Cell Set"] == cell_set_name

        row_mask = virus_mask & cell_set_mask

        if row_mask.sum() != 1:
            continue
        
        virus_cell_set_num_transduced[virus_name][cell_set_name] = 0
        
        for cell_type in cell_types:
            
            num_cells = transduction_rate_df[row_mask]["%s Num Cells" % cell_type].values[0]
            transduction_rate = transduction_rate_df[row_mask]["%s Transduction Rate" % cell_type].values[0]
            
            virus_cell_set_num_transduced[virus_name][cell_set_name] += num_cells * transduction_rate

cell_type_counts = { neuron_type: 0 for neuron_type in NEURON_TAXONOMY}

for cell_type in cell_types:
    
    cell_type_counts[cell_type] = 0
    
    for cell_set_name in CELL_SET_NAMES:
        
        cell_set_mask = transduction_rate_df["Cell Set"] == cell_set_name
        
        cell_type_counts[cell_type] += transduction_rate_df[cell_set_mask]["%s Num Cells" % cell_type].values[0]
        
        for neuron_type in NEURON_TAXONOMY:
            
            if cell_type in NEURON_TAXONOMY[neuron_type]:
                cell_type_counts[neuron_type] += transduction_rate_df[cell_set_mask]["%s Num Cells" % cell_type].values[0]

In [None]:
for neuron_type, neuron_subtypes in NEURON_TAXONOMY.items():
    
    traces = []
    
    max_value = -numpy.infty
        
    for cell_type in neuron_subtypes[::-1]:
        
        if cell_type_counts[cell_type] < CELL_COUNT_THRESHOLD:
            continue

        for virus_name in VIRUS_NAMES:

            virus_mask = transduction_rate_df["Virus"] == virus_name

            x_value = "%s %s" % (virus_name, cell_type)
            y_values = []
            x_values = []

            for cell_set_name in sorted(CELL_SET_NAMES):

                cell_set_mask = transduction_rate_df["Cell Set"] == cell_set_name

                row_mask = virus_mask & cell_set_mask

                if row_mask.sum() != 1:
                    continue
                
                num_cells = transduction_rate_df[row_mask]["%s Num Cells" % cell_type].values[0]
                transduction_rate = transduction_rate_df[row_mask]["%s Transduction Rate" % cell_type].values[0]

                fraction_transduced = num_cells * transduction_rate / virus_cell_set_num_transduced[virus_name][cell_set_name]

                y_values.append(fraction_transduced*100)
                x_values.append(x_value)

            trace = graph_objects.Bar(
                x=[x_value],
                y=[numpy.mean(y_values)],
                name=x_value,
                marker={
                    "line": {
                        "width": 2,
                        "color": "rgba(0, 0, 0, 1)"
                    }
                },
                width=cell_type_counts[cell_type]/(max(cell_type_counts.values()) + 100)
            )
            
            print(cell_type, virus_name)
            for x in y_values:
                print(x)
            
            max_value = max(max(y_values), max_value)

            traces.append(trace)

            trace = graph_objects.Scatter(
                x=x_values,
                y=y_values,
                name=x_value,
                mode="markers",
                marker={
                    "size": 10,
                    "color": "rgba(0, 0, 0, 0.5)"
                }

            )

            traces.append(trace)

    max_value = numpy.ceil(max_value/6)*6

    tick_vals = [int(x) for x in numpy.linspace(0, max_value, 4)]
    
    layout = {
        "paper_bgcolor": "rgba(255, 255, 255, 0)",
        "plot_bgcolor": "rgba(255, 255, 255, 0)",
        "yaxis": {
            "gridcolor": "rgba(0, 0, 0, 0.25)",
            "zerolinecolor": "rgba(0, 0, 0, 1)",
            "tickvals": tick_vals,
            "range": [0, max_value*1.1]
        },
        "xaxis": {
            "zerolinecolor": "rgba(0, 0, 0, 1)",
            "rangemode": "tozero"
        },
        "showlegend": False,
        "width": 500/4*6
    }

    figure = graph_objects.Figure(data=traces, layout=layout)

    plotly.iplot(figure)

    figure.write_image(os.path.join("out", "CAP-B10_PHP-eB_%s.svg" % neuron_type))

In [None]:
traces = []
    
max_value = -numpy.infty
        
for neuron_type, neuron_subtypes in NEURON_TAXONOMY.items():

    for virus_name in VIRUS_NAMES:

        virus_mask = transduction_rate_df["Virus"] == virus_name

        x_value = "%s %s" % (virus_name, neuron_type)
        y_values = []
        x_values = []

        for cell_set_name in sorted(CELL_SET_NAMES):

            cell_set_mask = transduction_rate_df["Cell Set"] == cell_set_name

            row_mask = virus_mask & cell_set_mask

            if row_mask.sum() != 1:
                continue
            
            num_transduced = 0
            
            for neuron_subtype in neuron_subtypes:

                num_cells = transduction_rate_df[row_mask]["%s Num Cells" % neuron_subtype].values[0]
                transduction_rate = transduction_rate_df[row_mask]["%s Transduction Rate" % neuron_subtype].values[0]
                
                num_transduced += num_cells * transduction_rate

            fraction_transduced = num_transduced / virus_cell_set_num_transduced[virus_name][cell_set_name]

            y_values.append(fraction_transduced*100)
            x_values.append(x_value)

        trace = graph_objects.Bar(
            x=[x_value],
            y=[numpy.mean(y_values)],
            name=x_value,
            marker={
                "line": {
                    "width": 2,
                    "color": "rgba(0, 0, 0, 1)"
                }
            },
            width=cell_type_counts[neuron_type]/(max(cell_type_counts.values()) + 100)
        )

        max_value = max(max(y_values), max_value)

        traces.append(trace)

        trace = graph_objects.Scatter(
            x=x_values,
            y=y_values,
            name=x_value,
            mode="markers",
            marker={
                "size": 10,
                "color": "rgba(0, 0, 0, 0.5)"
            }

        )

        traces.append(trace)

max_value = numpy.ceil(max_value/6)*6

tick_vals = [int(x) for x in numpy.linspace(0, max_value, 4)]

layout = {
    "paper_bgcolor": "rgba(255, 255, 255, 0)",
    "plot_bgcolor": "rgba(255, 255, 255, 0)",
    "yaxis": {
        "gridcolor": "rgba(0, 0, 0, 0.25)",
        "zerolinecolor": "rgba(0, 0, 0, 1)",
        "tickvals": tick_vals,
        "range": [0, max_value*1.1]
    },
    "xaxis": {
        "zerolinecolor": "rgba(0, 0, 0, 1)",
        "rangemode": "tozero"
    },
    "showlegend": False,
    "width": 250/4*6
}

figure = graph_objects.Figure(data=traces, layout=layout)

plotly.iplot(figure)

figure.write_image(os.path.join("out", "CAP-B10_PHP-eB_neuron.svg"))

In [None]:
WIDTH_RANGE = 0.2
MINIMUM_WIDTH = 0.01

In [None]:
for cell_type, count in cell_type_counts.items():
    
    width = numpy.sqrt(count) / numpy.sqrt(max(cell_type_counts.values())) * WIDTH_RANGE + MINIMUM_WIDTH
    
    print(cell_type, count, width)

In [None]:
for count in [50000, 5000, 500, 50]:
    
    width = numpy.sqrt(count) / numpy.sqrt(max(cell_type_counts.values())) * WIDTH_RANGE + MINIMUM_WIDTH
    
    print(count, width)