In [None]:
from aavomics import database
import os
import pandas
import numpy
import anndata
import scanpy

from plotly import offline as plotly
from plotly import graph_objects
import plotly.express as px
import plotly.graph_objects as go
import plotly.figure_factory as ff
from plotly import graph_objects
from statsmodels.distributions.empirical_distribution import ECDF
import matplotlib.pyplot as plt
import dc_stat_think as dcst
from plotly.subplots import make_subplots

In [None]:
TRANSDUCTION_RATE_FILE_NAME = "aavomics_cell_type_transduction_rates.csv"
ANNDATA_FILE_NAME = "aavomics_mouse_cortex_2021.h5ad"
transduction_rate_df = pandas.read_csv(os.path.join(database.DATA_PATH, TRANSDUCTION_RATE_FILE_NAME), index_col=0)
adata = anndata.read_h5ad(os.path.join(database.DATA_PATH, ANNDATA_FILE_NAME))

In [None]:
# Convert low level cell type hierarchy into high level hierarchy (For Excitatory Neurons, Inhibitory Neurons, OPCs)
low_level_cell_types = adata.obs["Cell Type"].values
high_level_cell_types = []

for cell_index,cell in enumerate(low_level_cell_types):
    if (cell == "L4/5" or cell == "L2" or cell == "L3" or cell == "L6" or cell == "L2/3" or cell == "L5" or cell == "L5/6"):
        high_level_cell_types.append("Excitatory Neurons")
    elif (cell == "Sncg" or cell == "Lamp5"or cell == "Pvalb" or cell =="Vip" or cell == "Sst" or cell == "Pax6"):
        high_level_cell_types.append("Inhibitory Neurons")
    elif (cell == "Top2a+ OPCs" or cell == "Top2a- OPCs"):
        high_level_cell_types.append("OPCs")
    else:
        high_level_cell_types.append(cell)
        
adata.obs['High Level Cell Type'] = high_level_cell_types

In [None]:
CELL_TYPES = ['Committed Oligodendrocytes',
 'Excitatory Neurons',
 'Pericytes',
 'VLMCs',
 'Myoc+ Astrocytes',
 'Endothelial Cells',
 'Mature Oligodendrocytes',
 'Inhibitory Neurons',
 'Vascular SMCs',
 'Perivascular Macrophages',
 'OPCs',
 'Myoc- Astrocytes',
 'Microglia']

In [None]:
immune_data = adata.copy()

CONTROL_SAMPLES = ['20200907_C3','20201119_C4','20210728_C5']
THREE_DPI_SAMPLES = ['20200903_TC8','20200904_TC9','20210728_TC12']
TWENTYFIVE_DPI_SAMPLES = ['20190713_TC7','20201120_TC10','20210726_TC11']


immune_data_samples = numpy.array(immune_data.obs['Cell Set'].values)
immune_data_cells = numpy.array(immune_data.obs["High Level Cell Type"].values)
#Create masks
control_mask = []
three_dpi_mask = []
twentyfive_dpi_mask = []
for cell_set_index,cell_set in enumerate(immune_data_samples):
    control_mask.append(cell_set in CONTROL_SAMPLES)
    three_dpi_mask.append(cell_set in THREE_DPI_SAMPLES)
    twentyfive_dpi_mask.append(cell_set in TWENTYFIVE_DPI_SAMPLES)
    
    
cell_masks = []
for cell in CELL_TYPES:
    cell_masks.append(immune_data_cells==cell)
    
    



In [None]:
target_sum = 10000
X_norm_new = scanpy.pp.normalize_total(immune_data,target_sum=target_sum,inplace=False)['X']
immune_data.X = X_norm_new
scanpy.pp.log1p(immune_data)

In [None]:
# 3 DPI

GENE = 'Slfn2'
CELL_TYPE = 'Microglia'
NUM_BINS = 50
title_histogram_control = 'Control'
title_histogram_condition = '3DPI'
title = GENE+" "+title_histogram_condition+" "+title_histogram_control

gene_ensg_map = immune_data.var
gene_list = numpy.array(gene_ensg_map['Gene Name'].values)
corresponding_ensg = numpy.array(gene_ensg_map.index[gene_ensg_map['Gene Name']==GENE])[0]
corresponding_ensg_loc = list(immune_data.var.index).index(corresponding_ensg)

control_cell_mask = (cell_masks[CELL_TYPES.index(CELL_TYPE)] & control_mask)
control_transcripts = immune_data.X[control_cell_mask,corresponding_ensg_loc].todense()
control_transcripts = numpy.array(control_transcripts).flatten()

three_dpi_cell_mask = (cell_masks[CELL_TYPES.index(CELL_TYPE)] & three_dpi_mask)
three_dpi_transcripts = immune_data.X[three_dpi_cell_mask,corresponding_ensg_loc].todense()
three_dpi_transcripts = numpy.array(three_dpi_transcripts).flatten()


# Code adapted from https://www.nature.com/articles/s41598-020-77073-3

bin_counts, bin_edges = numpy.histogram(
    numpy.concatenate(
        (
            control_transcripts[control_transcripts != 0],
            three_dpi_transcripts[three_dpi_transcripts != 0]
        )
    ),
    bins=NUM_BINS
)

figure = make_subplots(rows=1, cols=2, column_widths=[0.1, 0.9])

bin_counts, bin_edges = numpy.histogram(
    numpy.concatenate(
        (
            control_transcripts[control_transcripts != 0],
            three_dpi_transcripts[three_dpi_transcripts != 0]
        )
    ),
    bins=NUM_BINS
)


control_bin_counts, _ = numpy.histogram(control_transcripts[control_transcripts != 0], bins=bin_edges)
three_dpi_bin_counts, _ = numpy.histogram(three_dpi_transcripts[three_dpi_transcripts != 0], bins=bin_edges)

control_trace_name = title_histogram_control+" (u=%.3e, %i cells)" % (control_transcripts.mean(), len(control_transcripts))
three_dpi_trace_name = title_histogram_condition+" (u=%.3e, %i cells)" % (three_dpi_transcripts.mean(), len(three_dpi_transcripts))

# Non-zero value histogram
control_histogram = graph_objects.Bar(
    x=bin_edges,
    y=control_bin_counts/control_transcripts.shape[0] * 100,
    opacity=0.5,
    name=control_trace_name,
    marker={
        "color": 'blue'
    }
)

figure.add_trace(control_histogram, row=1, col=2)

three_dpi_histogram = graph_objects.Bar(
    x=bin_edges,
    y=three_dpi_bin_counts/three_dpi_transcripts.shape[0] * 100,
    opacity=0.5,
    name=three_dpi_trace_name,
    marker={
        "color": 'red'
    }
)

# Zero value bar

control_zero_histogram = graph_objects.Bar(
    x=[0],
    y=[control_transcripts[control_transcripts==0].shape[0]/control_transcripts.shape[0] * 100],
    opacity=0.5,
    showlegend=False,
    name=control_trace_name,
    marker={
        "color": 'blue'
    }
)

figure.add_trace(control_zero_histogram, row=1, col=1)



figure.add_trace(three_dpi_histogram, row=1, col=2)

threedpi_zero_histogram = graph_objects.Bar(
    x=[0],
    y=[three_dpi_transcripts[three_dpi_transcripts==0].shape[0]/three_dpi_transcripts.shape[0] * 100],
    opacity=0.5,
    showlegend=False,
    name=three_dpi_trace_name,
    marker={
        "color": 'red'
    }
)
plot_title = title

figure.add_trace(threedpi_zero_histogram, row=1, col=1)

figure.update_layout(
    {
        "barmode": "overlay",
        "title": plot_title,
        "plot_bgcolor": "rgba(255, 255, 255, 0)",
        "paper_bgcolor": "rgba(255, 255, 255, 0)",
        "xaxis": {
            "title": "Gene Abundance",
        },
        "yaxis": {
            "title": "% of Cells",
        },
        "bargap": 0
    }
)

figure.update_yaxes(
    {
        "range": [0, 100]
    },
    row=1,
    col=1
)



figure.update_xaxes(
    {
        "tickvals": [0]
    },
    row=1,
    col=1
)

figure.show()
figure.write_image('out/'+title+'.svg')

In [None]:
# 25DPI


NUM_BINS = 50
title_histogram_control = 'Control'
title_histogram_condition = '25DPI'
title = GENE+" "+title_histogram_condition+" "+title_histogram_control

gene_ensg_map = immune_data.var
gene_list = numpy.array(gene_ensg_map['Gene Name'].values)
corresponding_ensg = numpy.array(gene_ensg_map.index[gene_ensg_map['Gene Name']==GENE])[0]
corresponding_ensg_loc = list(immune_data.var.index).index(corresponding_ensg)

control_cell_mask = (cell_masks[CELL_TYPES.index(CELL_TYPE)] & control_mask)
control_transcripts = immune_data.X[control_cell_mask,corresponding_ensg_loc].todense()
control_transcripts = numpy.array(control_transcripts).flatten()

twentyfive_dpi_cell_mask = (cell_masks[CELL_TYPES.index(CELL_TYPE)] & twentyfive_dpi_mask)
twentyfive_dpi_transcripts = immune_data.X[twentyfive_dpi_cell_mask,corresponding_ensg_loc].todense()
twentyfive_dpi_transcripts = numpy.array(twentyfive_dpi_transcripts).flatten()

# Code adapted from https://www.nature.com/articles/s41598-020-77073-3

bin_counts, bin_edges = numpy.histogram(
    numpy.concatenate(
        (
            control_transcripts[control_transcripts != 0],
            twentyfive_dpi_transcripts[twentyfive_dpi_transcripts != 0]
        )
    ),
    bins=NUM_BINS
)

figure = make_subplots(rows=1, cols=2, column_widths=[0.1, 0.9])

bin_counts, bin_edges = numpy.histogram(
    numpy.concatenate(
        (
            control_transcripts[control_transcripts != 0],
            twentyfive_dpi_transcripts[twentyfive_dpi_transcripts != 0]
        )
    ),
    bins=NUM_BINS
)


control_bin_counts, _ = numpy.histogram(control_transcripts[control_transcripts != 0], bins=bin_edges)
twentyfive_dpi_bin_counts, _ = numpy.histogram(twentyfive_dpi_transcripts[twentyfive_dpi_transcripts != 0], bins=bin_edges)

control_trace_name = title_histogram_control+" (u=%.3e, %i cells)" % (control_transcripts.mean(), len(control_transcripts))
twentyfive_dpi_trace_name = title_histogram_condition+" (u=%.3e, %i cells)" % (twentyfive_dpi_transcripts.mean(), len(twentyfive_dpi_transcripts))

# Non-zero value histogram
control_histogram = graph_objects.Bar(
    x=bin_edges,
    y=control_bin_counts/control_transcripts.shape[0] * 100,
    opacity=0.5,
    name=control_trace_name,
    marker={
        "color": 'blue'
    }
)

figure.add_trace(control_histogram, row=1, col=2)

twentyfive_dpi_histogram = graph_objects.Bar(
    x=bin_edges,
    y=twentyfive_dpi_bin_counts/twentyfive_dpi_transcripts.shape[0] * 100,
    opacity=0.5,
    name=twentyfive_dpi_trace_name,
    marker={
        "color": 'red'
    }
)

# Zero value bar

control_zero_histogram = graph_objects.Bar(
    x=[0],
    y=[control_transcripts[control_transcripts==0].shape[0]/control_transcripts.shape[0] * 100],
    opacity=0.5,
    showlegend=False,
    name=control_trace_name,
    marker={
        "color": 'blue'
    }
)

figure.add_trace(control_zero_histogram, row=1, col=1)



figure.add_trace(twentyfive_dpi_histogram, row=1, col=2)

twentyfivedpi_zero_histogram = graph_objects.Bar(
    x=[0],
    y=[twentyfive_dpi_transcripts[twentyfive_dpi_transcripts==0].shape[0]/twentyfive_dpi_transcripts.shape[0] * 100],
    opacity=0.5,
    showlegend=False,
    name=twentyfive_dpi_trace_name,
    marker={
        "color": 'red'
    }
)
plot_title = title

figure.add_trace(twentyfivedpi_zero_histogram, row=1, col=1)

figure.update_layout(
    {
        "barmode": "overlay",
        "title": plot_title,
        "plot_bgcolor": "rgba(255, 255, 255, 0)",
        "paper_bgcolor": "rgba(255, 255, 255, 0)",
        "xaxis": {
            "title": "Gene Abundance",
        },
        "yaxis": {
            "title": "% of Cells",
        },
        "bargap": 0
    }
)

figure.update_yaxes(
    {
        "range": [0, 100]
    },
    row=1,
    col=1
)



figure.update_xaxes(
    {
        "tickvals": [0]
    },
    row=1,
    col=1
)

figure.show()
figure.write_image('out/'+title+'.svg')