In [None]:
import os
from aavomics import aavomics
from aavomics import database
import anndata
import pandas

from sklearn.linear_model import LinearRegression
import scipy
import numpy

from plotly import graph_objects
from plotly import offline as plotly

In [None]:
CELL_SET_NAMES = ["20190711_TC4", "20190712_TC5", "20190713_TC6", "20190713_TC7"]
VIRUS_NAMES = ["CAP-B10", "PHP.eB"]

MARKER_GENES = ["Rbfox3", "S100b", "Olig2"]

TRANSDUCTION_RATE_FILE_NAME = "aavomics_marker_gene_transduction_rates.csv"
ANNDATA_FILE_NAME = "aavomics_mouse_cortex_2021.h5ad"

In [None]:
transduction_rate_df = pandas.read_csv(os.path.join(database.DATA_PATH, TRANSDUCTION_RATE_FILE_NAME), index_col=0)
adata = anndata.read_h5ad(os.path.join(database.DATA_PATH, ANNDATA_FILE_NAME))

In [None]:
cell_set_virus_marker_fraction_of_transduced = {}
scRNAseq_gene_marker_transduction_rate = {}

for cell_set_name in CELL_SET_NAMES:
    
    cell_set_virus_marker_fraction_of_transduced[cell_set_name] = {}
    scRNAseq_gene_marker_transduction_rate[cell_set_name] = {}
    
    for virus_name in VIRUS_NAMES:
        
        row_index = "%s-%s" % (cell_set_name, virus_name)
        
        if row_index not in transduction_rate_df.index:
            continue
    
        cell_set_virus_marker_fraction_of_transduced[cell_set_name][virus_name] = {}
        scRNAseq_gene_marker_transduction_rate[cell_set_name][virus_name] = {}
        
        total_transduction_rate = transduction_rate_df.loc[row_index, "All Cells Transduction Rate"]
        total_num_cells = transduction_rate_df.loc[row_index, "All Cells Num Cells"]
        total_num_transduced = total_transduction_rate * total_num_cells
    
        for gene_name in MARKER_GENES:
            
            ensembl_id = adata.var.loc[adata.var["Gene Name"] == gene_name].index.values[0]
            
            transduction_rate = transduction_rate_df.loc[row_index, "%s Transduction Rate" % ensembl_id]
            num_cells = transduction_rate_df.loc[row_index, "%s Num Cells" % ensembl_id]
            
            num_transduced = transduction_rate * num_cells

            cell_set_virus_marker_fraction_of_transduced[cell_set_name][virus_name][gene_name] = num_transduced/total_num_transduced
            scRNAseq_gene_marker_transduction_rate[cell_set_name][virus_name][gene_name] = transduction_rate

In [None]:
# Plot the data
fig=graph_objects.Figure()
color_idx_line = []

x_values = []
y_values = []
colors = []
color_lines = []

for sample_idx,sample_name in enumerate(CELL_SET_NAMES):
    
    for virus_index, virus_name in enumerate(VIRUS_NAMES):
        
        if virus_name not in cell_set_virus_marker_fraction_of_transduced[sample_name]:
            continue
            
        if len(cell_set_virus_marker_fraction_of_transduced[sample_name])>1:
            color_idx_line = 'black'
        else:
            color_idx_line = 'rgba(0, 0, 0, 0)'
            
        if virus_index == 0:
            x_adjust = 0.25
            color_idx_fill = 'red'
        else:
            x_adjust = 0
            color_idx_fill = 'blue'
        for marker_gene_index, marker_gene in enumerate(MARKER_GENES):
            x_values.append(0.5+marker_gene_index+x_adjust)
            y_values.append(cell_set_virus_marker_fraction_of_transduced[
                                         sample_name][virus_name][marker_gene]*100)
#             y_values.append(cell_set_virus_marker_fraction_of_transduced[
#                                          sample_name][virus_name][marker_gene]*100)
            colors.append(color_idx_fill)
            color_lines.append(color_idx_line)
            
fig.add_trace(graph_objects.Scatter(
    x=x_values,
    y=y_values,
    mode='markers',
    marker=dict(
        size=10,
        color=colors,
        line=dict(
            color=color_lines,
            width=2
        )
    )
))

layout = {
    "plot_bgcolor": "rgba(255, 255, 255, 0)",
    "paper_bgcolor": "rgba(255, 255, 255, 0)",
    "yaxis": {
        "title":"Fraction of transduced cells<BR> in scRNA-seq (%)",
        "zerolinecolor": "rgba(0, 0, 0, 1)",
        "zeroline": True,
        "rangemode": "tozero",
        "gridcolor": "rgba(0, 0, 0, 0.25)",
        "gridwidth": 2
    },
    "width": 400,
    "height": 400,
    "showlegend": False,
    "xaxis": {
        "title":"Cell Types",
        "tickvals": numpy.array(list(range(0, len(MARKER_GENES))))+0.625,
        "ticktext": ["%s+" % (x) for x in MARKER_GENES],
        "zerolinecolor": "rgba(0, 0, 0, 1)",
        "zeroline": True,
        "rangemode": "tozero"
    }
}

fig.update_layout(layout)
fig.show()

fig.write_image(os.path.join("out", "%s_%s_fraction_transduced.svg" % (VIRUS_NAMES[0].replace(".","-"),VIRUS_NAMES[1].replace(".","-"))))

In [None]:
cell_set_virus_marker_fraction_of_transduced

In [None]:
# Results from IHC
imaging_gene_marker_transduction_rate = dict()
imaging_gene_marker_transduction_rate['20190711_TC4'] = dict()
imaging_gene_marker_transduction_rate['20190711_TC4']['CAP-B10'] = dict()
imaging_gene_marker_transduction_rate['20190711_TC4']['CAP-B10']['Rbfox3'] = 57.7
imaging_gene_marker_transduction_rate['20190711_TC4']['CAP-B10']['S100b'] = 12.7
imaging_gene_marker_transduction_rate['20190711_TC4']['CAP-B10']['Olig2'] = 7.8
imaging_gene_marker_transduction_rate['20190712_TC5'] = dict()
imaging_gene_marker_transduction_rate['20190712_TC5']['PHP.eB'] = dict()
imaging_gene_marker_transduction_rate['20190712_TC5']['PHP.eB']['Rbfox3'] = 47.6
imaging_gene_marker_transduction_rate['20190712_TC5']['PHP.eB']['S100b'] = 39.8
imaging_gene_marker_transduction_rate['20190712_TC5']['PHP.eB']['Olig2'] = 9.8
imaging_gene_marker_transduction_rate['20190712_TC5']['CAP-B10'] = dict()
imaging_gene_marker_transduction_rate['20190712_TC5']['CAP-B10']['Rbfox3'] = 56.6
imaging_gene_marker_transduction_rate['20190712_TC5']['CAP-B10']['S100b'] = 19.5
imaging_gene_marker_transduction_rate['20190712_TC5']['CAP-B10']['Olig2'] = 4.4
imaging_gene_marker_transduction_rate['20190713_TC6'] = dict()
imaging_gene_marker_transduction_rate['20190713_TC6']['PHP.eB'] = dict()
imaging_gene_marker_transduction_rate['20190713_TC6']['PHP.eB']['Rbfox3'] = 40.6
imaging_gene_marker_transduction_rate['20190713_TC6']['PHP.eB']['S100b'] = 48.9
imaging_gene_marker_transduction_rate['20190713_TC6']['PHP.eB']['Olig2'] = 5.3
imaging_gene_marker_transduction_rate['20190713_TC6']['CAP-B10'] = dict()
imaging_gene_marker_transduction_rate['20190713_TC6']['CAP-B10']['Rbfox3'] = 50.6
imaging_gene_marker_transduction_rate['20190713_TC6']['CAP-B10']['S100b'] = 15.2
imaging_gene_marker_transduction_rate['20190713_TC6']['CAP-B10']['Olig2'] = 3.8
imaging_gene_marker_transduction_rate['20190713_TC7'] = dict()
imaging_gene_marker_transduction_rate['20190713_TC7']['PHP.eB'] = dict()
imaging_gene_marker_transduction_rate['20190713_TC7']['PHP.eB']['Rbfox3'] = 13.4
imaging_gene_marker_transduction_rate['20190713_TC7']['PHP.eB']['S100b'] = 25.4
imaging_gene_marker_transduction_rate['20190713_TC7']['PHP.eB']['Olig2'] = 5.6

In [None]:
# Plot the data
fig=graph_objects.Figure()
x_values = []
y_values = []
colors = []
text = []

for sample_idx,sample_name in enumerate(CELL_SET_NAMES):
    for virus_name in VIRUS_NAMES:
        
        if virus_name not in scRNAseq_gene_marker_transduction_rate[sample_name]:
            continue
        
        for marker_gene in MARKER_GENES:
            x_values.append(imaging_gene_marker_transduction_rate[
                                sample_name][virus_name][marker_gene])
            y_values.append(scRNAseq_gene_marker_transduction_rate[
                                sample_name][virus_name][marker_gene]*100)
            colors.append("black")
            text.append(marker_gene)
            
fig.add_trace(graph_objects.Scatter(x=x_values,
                                     y=y_values,
                                     mode='markers',
                                     marker=dict(size=8,color=colors),
                                    text=text,
             showlegend=False))

# Perform linear regression           
model = LinearRegression()
model.fit(numpy.asarray([[i] for i in x_values]),numpy.asarray([i for i in y_values]))
x_range = numpy.linspace(numpy.min(x_values), max(x_values), 100)
y_range = model.predict(x_range.reshape(-1, 1))
r, p = scipy.stats.pearsonr(x_values,y_values)
fig.add_trace(graph_objects.Scatter(x=x_range,
                         y=y_range,
                         name='Linear regression, r=%.2f' % r,
                         mode='lines'))

layout = {
    "plot_bgcolor": "rgba(255, 255, 255, 0)",
    "paper_bgcolor": "rgba(255, 255, 255, 0)",
    "xaxis": {
        "title": "IHC - Transduction rate (%)",
        "zerolinecolor": "rgba(0, 0, 0, 1)",
        "zeroline": True,
        "rangemode": "tozero",
        "gridcolor": "rgba(0, 0, 0, 0.25)",
        "gridwidth": 2
    },
    "yaxis": {
        "title": "scRNA-seq - Transduction rate (%)",
        "zerolinecolor": "rgba(0, 0, 0, 1)",
        "zeroline": True,
        "rangemode": "tozero",
        "gridcolor": "rgba(0, 0, 0, 0.25)",
        "gridwidth": 2
    },
    "width": 700,
    "height": 450,
    "showlegend": True,
    "legend": {
        "yanchor": "bottom",
        "xanchor": "right",
#         "x": 0
    }
}
fig.update_layout(layout)
fig.show()


fig.write_image(os.path.join("out", "%s_%s_imaging_vs_scRNA_seq.svg" % (VIRUS_NAMES[0].replace(".","-"),VIRUS_NAMES[1].replace(".","-"))))