In [None]:
import os

from aavomics import database
from aavomics import aavomics
import scipy
import anndata
import pandas
import numpy
from scipy import stats
from statsmodels.stats.multitest import multipletests

import plotly.graph_objects as graph_objects
from plotly import offline as plotly

In [None]:
VIRUS_NAME = "PHP.eB"

TRANSDUCTION_RATE_FILE_NAME = "aavomics_cell_type_transduction_rates.csv"
ANNDATA_FILE_NAME = "aavomics_mouse_cortex_2021.h5ad"

In [None]:
CELL_TYPE_HIERARCHY = {
    "Astrocytes": {
        "Myoc- Astrocytes": {},
        "Myoc+ Astrocytes": {}
    },
    "Vascular Cells": {
        "Endothelial Cells": {},
        "Pericytes": {},
        "Red Blood Cells": {},
        "Vascular SMCs": {},
        "VLMCs": {}
    },
    "Immune Cells": {
        "Perivascular Macrophages": {},
        "Microglia": {},
        "Leukocytes": {}
    },
    "Oligodendrocytes": {
        "OPCs": {},
        "Committed Oligodendrocytes": {},
        "Mature Oligodendrocytes": {}
    },
    "Neurons": {
        "L2": {},
        "L2/3": {},
        "L3": {},
        "L4/5": {},
        "L5": {},
        "L5/6": {},
        "L6": {},
        "Lamp5": {},
        "Pax6": {},
        "Sncg": {},
        "Vip": {},
        "Sst": {},
        "Pvalb": {}
    }
}

cell_types = []

for cell_type, cell_subtypes in CELL_TYPE_HIERARCHY.items():
    cell_types.extend(cell_subtypes.keys())

In [None]:
transduction_rate_df = pandas.read_csv(os.path.join(database.DATA_PATH, TRANSDUCTION_RATE_FILE_NAME), index_col=0)
adata = anndata.read_h5ad(os.path.join(database.DATA_PATH, ANNDATA_FILE_NAME))

all_cell_types = set()

for column_name in transduction_rate_df.columns:
    if column_name.endswith("Transduction Rate"):
        all_cell_types.add(" ".join(column_name.split()[0:-2]))

In [None]:
names_9_samples = ["20181127_TC1", "20190319_TC2", "20190111_BC1", "20190321_BC2", "20190712_TC5", "20190713_TC6", "20190713_TC7", "20200720_BC4_1", "20200720_BC4_2"]
names_9, samples_9 = [], []
for cell_set_name in names_9_samples:
    
    cell_set_adata = adata[adata.obs["Cell Set"] == cell_set_name].copy()
    cell_type_counts = {}
    
    for cell_type_name in CELL_TYPE_HIERARCHY:

        cell_type_mask = cell_set_adata.obs["Cell Type"].isin(CELL_TYPE_HIERARCHY[cell_type_name].keys())

        cell_type_mask = cell_type_mask
        cell_type_counts[cell_type_name] = cell_type_mask.sum()
    print(cell_set_name)
    df = pandas.DataFrame.from_dict(cell_type_counts, orient="index", columns=["Num Cells"])
    df["% of cells"] = df["Num Cells"]/df["Num Cells"].sum()*100
    display(df)
    names_9.append(cell_set_name)
    samples_9.append(df)

In [None]:
frac_total, frac_tr = [], []
fracs = {'Oligodendrocytes':[[],[]], 'Immune Cells':[[],[]], 'Astrocytes':[[],[]], 'Neurons':[[],[]], 'Vascular Cells':[[],[]]}
all_cell_types = ['Oligodendrocytes', 'Immune Cells', 'Astrocytes', 'Neurons', 'Vascular Cells']
for i in range(len(names_9_samples)):
    print(i)
    tr = []
    
    
    cell_set_name = names_9[i]
    cell_types = list(samples_9[i].head().index)
    
    entry_index = "%s-%s" % (cell_set_name, VIRUS_NAME)
    
    tot, tot_tr = 0, 0
    num_transduced = []
    
    for cell_type_name in CELL_TYPE_HIERARCHY:
        
        cell_type_num_transduced = 0
        
        for cell_subtype in CELL_TYPE_HIERARCHY[cell_type_name].keys():
        
            transduction_rate = transduction_rate_df.loc[entry_index, "%s Transduction Rate" % (cell_subtype)]
            num_cells = transduction_rate_df.loc[entry_index, "%s Num Cells" % (cell_subtype)]
            
            if not numpy.isnan(num_cells):
            
                cell_type_num_transduced += transduction_rate * num_cells

                tot += num_cells
                tot_tr += transduction_rate * num_cells
        
        num_transduced.append(cell_type_num_transduced)
        
    samples_9[i]['# Transduced'] = num_transduced
    individuals = {}
    for index, row in samples_9[i].iterrows():
        individuals[index] = (row['Num Cells'], row['# Transduced'])

    for ct in all_cell_types:
        fracs[ct][0].append(individuals[ct][0] / tot)
        fracs[ct][1].append(individuals[ct][1] / tot_tr)
    #frac_total.append(num_n / tot)
    #frac_tr.append(num_tr_n / tot_tr)

In [None]:
fracs[ct]

In [None]:

all_cell_types = ['Oligodendrocytes', 'Immune Cells', 'Astrocytes', 'Neurons', 'Vascular Cells']
traces = []

all_x = []
all_y = []

for ct in all_cell_types:
    
    x = [n * 100 for n in fracs[ct][0]]
    y = [n * 100 for n in fracs[ct][1]]
    
    all_x.extend(x)
    all_y.extend(y)
    trace_c = graph_objects.Scatter(
        x = x,
        y = y,
        mode="markers",
        name=ct
    )
    traces.append(trace_c)
    
regression = scipy.stats.linregress(all_x, all_y)
intercept = regression.intercept
slope = regression.slope
x1 = 0
y1 = slope * x1 + intercept
x2 = max(all_x)*1.1
y2 = slope * x2 + intercept

trace = graph_objects.Scatter(
    x=[x1, x2],
    y=[y1, y2],
    name="r^2 = %.2f" % regression.rvalue,
    mode="lines"
)

traces.append(trace)
    
layout = {}

layout["height"] = 600
layout["width"] = 800
layout["plot_bgcolor"] = "rgba(255, 255, 255, 0)"
layout["paper_bgcolor"] = "rgba(255, 255, 255, 0)"
layout["yaxis"] = {
    "title": {
        "text": "Percentage of Transduced Cells",
    },
    "range": [0, max(all_y)*1.1],
    "gridcolor": "rgba(0, 0, 0, 0.25)",
    "zerolinecolor": "rgba(0, 0, 0, 0.25)",
    "rangemode": "tozero"
}
layout["xaxis"] = {
    "title": "Percentage of Total Cells",
    "zerolinecolor": "rgba(0, 0, 0, 0.25)",
    "rangemode": "tozero"
}
layout["title"] = {
    "text":"Percentage of Transduced Cells vs. Percentage of Total Cells"
}

figure = graph_objects.Figure(data=traces, layout=layout)
plotly.iplot(figure)
figure.write_image(os.path.join("out", "PHP-eB_fraction_transduced_vs_cell_type_distribution.svg"))