In [None]:
import os

from plotly import offline as plotly
from plotly import graph_objects

from aavomics import database
from aavomics import aavomics
from diffxpy import api as diffxpy
import pandas
import numpy
import scipy

import anndata

In [None]:
NUM_SAMPLES = 1000
DEBRIS_SAMPLES = ["20190711_TC4", "20190712_TC5", "20190713_TC6", "20190713_TC7"]
ALIGNMENT_NAME = "cellranger_5.0.1_gex_mm10_2020_A"
VIRUS_ALIGNMENT_NAME = "cellranger_5.0.1_gex_mm10_2020_A_AAVomics"
TAXONOMY_NAME = "CCN202105041"

In [None]:
def plot_negative_binomial_overlay(
    values,
    dist=None,
    title=None
):
    
    values = numpy.round(values)

    figure_traces = []

    min_value = 0
    max_value = max(values)

    value_range = max_value - min_value
    bin_size = 1
    bins = numpy.arange(min_value, max_value, bin_size)

    y, x = numpy.histogram(values, bins=bins)

    histogram = graph_objects.Bar(
        x=x,
        y=y,
        name="Counts out of %i" % len(values)
    )
    
    num_data_points = len(values)

    figure_traces.append(histogram)
    
    if dist is not None:
        x = []
        y = []

        for value in bins:
            x.append(value)
            y.append(dist.pmf(value)*num_data_points)

        negative_binomial = graph_objects.Scatter(
            x=x,
            y=y,
            mode="lines",
            name="Negative Binomial a=%.2f b=%.2f" % (dist.args[0], dist.args[1])
        )

        figure_traces.append(negative_binomial)

    layout_parameters = {
        "hovermode": "closest",
        "bargap": 0
    }
    
    layout_parameters["height"] = 800
    layout_parameters["width"] = 800
    layout_parameters["plot_bgcolor"] = "rgba(255, 255, 255, 0)"
    layout_parameters["paper_bgcolor"] = "rgba(255, 255, 255, 0)"
    if title is not None:
        layout_parameters["title"] = title
    
    layout = graph_objects.Layout(layout_parameters)

    figure = graph_objects.Figure(data=figure_traces, layout=layout)

    plotly.iplot(figure)

In [None]:
debris_dists = []
debris_dist_cell_set_indices = []

for cell_set_index, cell_set_name in enumerate(DEBRIS_SAMPLES):
    
    cell_set = database.CELL_SETS_DICT[cell_set_name]
    
    virus_adata_file_path = cell_set.get_anndata_file_path(alignment_name=VIRUS_ALIGNMENT_NAME, transcript_type="transcriptome")
    adata_file_path = cell_set.get_anndata_file_path(alignment_name=ALIGNMENT_NAME, transcript_type="transcriptome")

    adata = anndata.read_h5ad(adata_file_path)
    virus_adata = anndata.read_h5ad(virus_adata_file_path)

    debris_mask = adata.obs[TAXONOMY_NAME] == "Debris"
    virus_debris_adata = virus_adata[debris_mask]
    
    viruses = set()
    vectors = set()
        
    for injection in cell_set.source_tissue.animal.injections:
        for vector in injection.vector_pool.vectors:
            viruses.add(vector.delivery_vehicle.name)
            vectors.add(vector.name)
    
    for vector in vectors:
        vector_counts = virus_debris_adata.obs[vector].values
        dist = aavomics.get_neg_binomial_diffxpy(vector_counts)
        
        debris_dist_cell_set_indices.append(cell_set_index)
        
        plot_negative_binomial_overlay(vector_counts, dist, title="%s Debris" % cell_set_name)

        debris_dists.append(dist)

In [None]:
R_RANGE = numpy.linspace(0.1, 10, 40)
P_RANGE = numpy.linspace(0.001, 0.99, 25)

true_transduction_rates = []
texts = []
transduction_rates_simple = []
transduction_rates = []

for debris_dist_index, debris_dist in enumerate(debris_dists):

    p = debris_dist.args[1]
    background_dist = debris_dist
    background_r = background_dist.args[0]
    cell_set_index = debris_dist_cell_set_indices[debris_dist_index]
    
    for signal_r in R_RANGE:
        
        for signal_p in P_RANGE:
            
            background_dist = debris_dist
            signal_dist = scipy.stats.nbinom(signal_r, signal_p)

            background_transduction_rate = 1 - background_dist.pmf(0)
            signal_transduction_rate = 1 - signal_dist.pmf(0)

            true_transduction_rates.append(signal_transduction_rate)

            background_samples = background_dist.rvs(NUM_SAMPLES)
            signal_samples = signal_dist.rvs(NUM_SAMPLES)
            background_plus_signal_samples = background_dist.rvs(NUM_SAMPLES) + signal_samples
            texts.append("%s<BR>background_rate=%.2f, signal_rate=%.2f<BR>p=%.2f, signal_r=%.2f, background_r=%.2f" % 
                        (DEBRIS_SAMPLES[cell_set_index], background_transduction_rate, signal_transduction_rate, p, signal_r, background_r))

            transduction_rate = aavomics.get_transcript_presence_rate(
                background_plus_signal_samples,
                method=aavomics.Infection_Rate_Method.COUNTING,
                background_transcript_counts=background_samples,
                resolution=1000
            )
            
            transduction_rates.append(transduction_rate[0])

            p_values = numpy.linspace(0, 1, 1000+1)

            unique_noise_counts = set(background_plus_signal_samples)
            unique_signal_counts = set(background_samples)

            unique_counts = sorted(list(unique_noise_counts.union(unique_signal_counts)))

            signal_count_counts = {}
            noise_count_counts = {}

            for count in unique_counts:
                signal_count_counts[count] = background_plus_signal_samples[background_plus_signal_samples == count].shape[0]
                noise_count_counts[count] = background_samples[background_samples == count].shape[0]

            scores = []

            for p_debris in p_values:

                error = 0

                for count in unique_counts:

                    noise_count = (noise_count_counts[count] * p_debris) /\
                        background_samples.shape[0]*background_plus_signal_samples.shape[0] 
                    signal_count = signal_count_counts[count]

                    error += numpy.abs(noise_count - signal_count)

                scores.append(error)

            estimated_infectivity_rate = 1-p_values[numpy.argmin(scores)]

            transduction_rates_simple.append(estimated_infectivity_rate)

In [None]:
traces = []
    
scatter = graph_objects.Scatter(
    x=true_transduction_rates,
    y=transduction_rates,
    mode="markers",
    text=texts,
    name="Cumulative error",
    marker={
        "size": 3
    }
)

traces.append(scatter)
    
scatter = graph_objects.Scatter(
    x=[0, 1],
    y=[0, 1],
    mode="lines",
    name="x=y",
    text=texts
)

traces.append(scatter)
    
layout_parameters = {
    "plot_bgcolor": "rgba(255, 255, 255, 0)",
    "paper_bgcolor": "rgba(255, 255, 255, 0)",
    "xaxis": {
        "title": "Transduction Rate (Simulated Ground Truth)",
        "range": [0, 1],
        "linecolor": "rgba(0, 0, 0, 1)",
        "gridcolor": "rgba(0, 0, 0, 0.25)"
    },
    "yaxis": {
        "title": "Estimated Transduction Rate",
        "linecolor": "rgba(0, 0, 0, 1)",
        "range": [0, 1],
        "gridcolor": "rgba(0, 0, 0, 0.25)"
    },
    "showlegend": True
}

figure = graph_objects.Figure(data=traces, layout=layout_parameters)

plotly.iplot(figure)

figure.write_image(os.path.join("out", "simulated_debris_recovery.svg"))