In [None]:
import os
import numpy
import pandas

from aavomics import database
import anndata

from plotly import offline as plotly
from plotly import graph_objects

In [None]:
ANNDATA_FILE_NAME = "aavomics_mouse_cortex_2021.h5ad"

In [None]:
adata = anndata.read_h5ad(os.path.join(database.DATA_PATH, ANNDATA_FILE_NAME))

In [None]:
CELL_SET_NAMES = ["20181127_TC1", "20190319_TC2", "20190111_BC1", "20190321_BC2", "20190711_TC4", "20190712_TC5", "20190713_TC6", "20190713_TC7", "20200720_BC4_1", "20200720_BC4_2", "20210726_TC11", "20200907_C3", "20201119_C4", "20210728_C5", "20210728_TC12", "20200903_TC8", "20200904_TC9", "20201120_TC10"]

In [None]:
CELL_TYPE_HIERARCHY = {
    "Astrocytes": {
        "Myoc- Astrocytes": {},
        "Myoc+ Astrocytes": {}
    },
    "Vascular Cells": {
        "Endothelial Cells": {},
        "Pericytes": {},
        "Red Blood Cells": {},
        "Vascular SMCs": {},
        "VLMCs": {}
    },
    "Immune Cells": {
        "Perivascular Macrophages": {},
        "Microglia": {},
        "Leukocytes": {}
    },
    "Oligodendrocytes": {
        "OPCs": {},
        "Committed Oligodendrocytes": {},
        "Mature Oligodendrocytes": {}
    },
    "Neurons": {
        "Neurons": {}
    }
}

In [None]:
TAXONOMY_NAMES = ["CCN202105051", "CCN202105041"]

names_all, samples_all = [], []

for cell_set_name in CELL_SET_NAMES:

    cell_set_adata = adata[adata.obs["Cell Set"] == cell_set_name].copy()
    
    cell_type_counts = {}
    
    for cell_type_name in CELL_TYPE_HIERARCHY:

        cell_type_mask = numpy.zeros((cell_set_adata.shape[0], )).astype(numpy.bool)

        for taxonomy_name in TAXONOMY_NAMES:
            cell_type_mask = cell_type_mask | cell_set_adata.obs[taxonomy_name].isin(CELL_TYPE_HIERARCHY[cell_type_name].keys())
        
        cell_type_counts[cell_type_name] = cell_type_mask.sum()
    
    print(cell_set_name)
    df = pandas.DataFrame.from_dict(cell_type_counts, orient="index", columns=["Num Cells"])
    df["% of cells"] = df["Num Cells"]/df["Num Cells"].sum()*100
    display(df)
    names_all.append(cell_set_name)
    samples_all.append(df)

In [None]:
types = {'Oligodendrocytes':[],'Vascular Cells':[],'Astrocytes':[],'Immune Cells':[],'Neurons':[]}
sample_names = {'Oligodendrocytes':[],'Vascular Cells':[],'Astrocytes':[],'Immune Cells':[],'Neurons':[]}
    
for name, df in zip(names_all,samples_all):
    if name not in CELL_SET_NAMES:
        continue
    for index, row in df.iterrows():
        types[index].append(row['% of cells'])
        sample_names[index].append(name)

fig = graph_objects.Figure()
for k in types.keys():
    fig.add_trace(graph_objects.Box(y=types[k],x=[k]*len(types[k]),name=k,text=sample_names[k]))

layout = {}

layout["plot_bgcolor"] = "rgba(255, 255, 255, 0)"
layout["paper_bgcolor"] = "rgba(255, 255, 255, 0)"
layout["yaxis"] = {
    "title": {
        "text": "% of Cells",
    },
    "tickvals": numpy.linspace(0, 60, 7),
    "ticktext": ["%i" % x for x in numpy.linspace(0, 60, 7)],
    "rangemode": "tozero",
    "gridcolor": "rgba(0, 0, 0, 0.25)",
    "zerolinecolor": "rgba(0, 0, 0, 0.25)"
}
layout["title"] = "Cell Type Distribution Across Major Cell Types"

fig.update_traces(boxpoints='all', jitter=0.3, pointpos=-1.8)
fig.update_layout(layout)
fig.show()

fig.write_image(os.path.join("out", "major_cell_type_distribution_study_samples_only.svg"))