In [None]:
import os
import numpy
import pandas

from aavomics import database
import anndata

from plotly import offline as plotly
from plotly import graph_objects

In [None]:
ANNDATA_FILE_NAME = "aavomics_mouse_cortex_2021.h5ad"

In [None]:
adata = anndata.read_h5ad(os.path.join(database.DATA_PATH, ANNDATA_FILE_NAME))

In [None]:
TAXONOMY_NAME_1 = "CCN202105050"
TAXONOMY_NAME_2 = "CCN202105051"

In [None]:
names_neu, samples_neu = [], []
names_nn, samples_nn = [], []

for cell_set in database.CELL_SETS:

    cell_set_adata = adata[adata.obs["Cell Set"] == cell_set.name].copy()
    
    cell_type_counts = {}
    
    for cell_type in adata.obs[TAXONOMY_NAME_1].unique():
        
        if cell_type in ["nan", "Debris", "Multiplets"]:
            continue
            
        cell_type_mask = (cell_set_adata.obs[TAXONOMY_NAME_1] == cell_type)
        
        cell_type_counts[cell_type] = cell_type_mask.sum()
    
    print(cell_set.name)
    df = pandas.DataFrame.from_dict(cell_type_counts, orient="index", columns=["Num Cells"])
    df["% of cells"] = df["Num Cells"]/df["Num Cells"].sum()*100
    display(df)
    names_neu.append(cell_set.name)
    samples_neu.append(df)
    
    cell_type_counts = {}
    
    for cell_type in cell_set_adata.obs[TAXONOMY_NAME_2].unique():
        
        if cell_type in ["nan", "Debris", "Multiplets"] or pandas.isna(cell_type):
            continue
            
        cell_type_mask = (cell_set_adata.obs[TAXONOMY_NAME_2] == cell_type)
        
        cell_type_counts[cell_type] = cell_type_mask.sum()
    
    print(cell_set.name)
    df = pandas.DataFrame.from_dict(cell_type_counts, orient="index", columns=["Num Cells"])
    df["% of cells"] = df["Num Cells"]/df["Num Cells"].sum()*100
    display(df)
    names_nn.append(cell_set.name)
    samples_nn.append(df)

In [None]:
# Neurons Counts

counts = {'L2':0,'L2/3':0,'L3':0,'L4/5':0,'L5':0,'L5/6':0,'L6':0,'Pvalb':0,'Sst':0,'Vip':0,'Sncg':0,'Pax6':0,'Lamp5':0}
for df in samples_neu:
    for index, row in df.iterrows():
        counts[index] += row['Num Cells']

In [None]:
# Non-neuronal Counts

counts_nn = {}
for df in samples_nn:
    for index, row in df.iterrows():
        if index not in counts_nn.keys():
            counts_nn[index] = 0
        counts_nn[index] += row['Num Cells']

In [None]:
labels = []
parent_labels = []
values = []
colors = []

center = "%d samples<BR>%i cells" % (len(samples_neu), sum(counts.values()) + sum(counts_nn.values()))
labels.append(center)
parent_labels.append("")
values.append(0)

labels.append('Neurons')
parent_labels.append(center)
values.append(0)

labels.append('Non-Neuronal Cells')
parent_labels.append(center)
values.append(0)

labels.append('Glutaminergic')
parent_labels.append('Neurons')
values.append(0)

labels.append('GABAergic')
parent_labels.append('Neurons')
values.append(0)

nnc = ['Astrocytes', 'Vascular Cells', 'Immune Cells', 'Oligodendrocytes']
for n in nnc:
    labels.append(n)
    parent_labels.append('Non-Neuronal Cells')
    values.append(0)

glut = ['L2', 'L2/3', 'L3', 'L4/5', 'L5', 'L5/6', 'L6']
for g in glut:
    labels.append(g)
    parent_labels.append('Glutaminergic')
    values.append(counts[g])

gaba = ['Pvalb', 'Sst', 'Vip', 'Sncg', 'Pax6', 'Lamp5']
for g in gaba:
    labels.append(g)
    parent_labels.append('GABAergic')
    values.append(counts[g])
    
astro = ["Myoc- Astrocytes", "Myoc+ Astrocytes"]
for g in astro:
    labels.append(g)
    parent_labels.append('Astrocytes')
    values.append(counts_nn[g])
    
vasc = ["Endothelial Cells",
            "Pericytes",
            "Red Blood Cells",
            "Vascular SMCs",
            "VLMCs"]
for g in vasc:
    labels.append(g)
    parent_labels.append('Vascular Cells')
    values.append(counts_nn[g])

immun = ["Perivascular Macrophages",
            "Microglia",
        "Leukocytes"]
for g in immun:
    labels.append(g)
    parent_labels.append('Immune Cells')
    values.append(counts_nn[g])

oligo = ["OPCs",
            "Committed Oligodendrocytes",
            "Mature Oligodendrocytes"]
for g in oligo:
    labels.append(g)
    parent_labels.append('Oligodendrocytes')
    values.append(counts_nn[g])

colors = ['#d62728','#2ca02c','#17becf','#8c564b','#bcbd22']

sunburst = graph_objects.Sunburst(
    labels=labels,
    parents=parent_labels,
    values=values
)

    
layout = graph_objects.Layout(
    title="Cell Type Distribution"
)
        
figure = graph_objects.Figure([sunburst], layout=layout)

plotly.iplot(figure)

figure.write_image(os.path.join("out", "cell_type_sunburst.svg"))