In [None]:
from aavomics import database

import os

import anndata

from plotly import graph_objects
from plotly import offline as plotly
import numpy

In [None]:
droplet_classifier_adata = anndata.read_h5ad(os.path.join(database.DATA_PATH, "aavomics_mouse_cortex_2021_droplet_training_data.h5ad"))

In [None]:
cluster_number = "53"
cluster_mask = (droplet_classifier_adata.obs["leiden_scVI"] == cluster_number)
GENES_OF_INTEREST = ["Cldn5", "Cx3cr1"]

In [None]:
coordinates = droplet_classifier_adata[cluster_mask].obsm["X_tsne"]

x_range = numpy.max(coordinates[:, 0]) - numpy.min(coordinates[:, 0])
y_range = numpy.max(coordinates[:, 1]) - numpy.min(coordinates[:, 1])

max_x, max_y = numpy.percentile(coordinates, q=95, axis=0)
min_x, min_y = numpy.percentile(coordinates, q=5, axis=0)

coordinates = droplet_classifier_adata.obsm["X_tsne"]

x_mask = (coordinates[:, 0] >= min_x - x_range*.1) & (coordinates[:, 0] <= max_x + x_range*.1)
y_mask = (coordinates[:, 1] >= min_y - y_range*.1) & (coordinates[:, 1] <= max_y + y_range*.1)

cluster_adata = droplet_classifier_adata[x_mask & y_mask].copy()
coordinates = cluster_adata.obsm["X_tsne"]

for gene_of_interest in GENES_OF_INTEREST:
    
    ensembl_id = cluster_adata.var.loc[cluster_adata.var['Gene Name']==gene_of_interest].index[0]
    gene_counts = numpy.log2(numpy.array(cluster_adata[:, ensembl_id].X.todense()).reshape((-1,))+1)
    text = None
    
    num_non_zero = (droplet_classifier_adata[cluster_mask, ensembl_id].X > 0).sum()
    title = "Cluster %s %s Expression<BR>%i/%i (%.2f%%) > 0" % \
        (cluster_number, gene_of_interest, num_non_zero, cluster_mask.sum(), num_non_zero/cluster_mask.sum()*100)

    coordinates = numpy.array(coordinates)
    gene_counts = numpy.array(gene_counts).reshape((-1, ))

    if text is None:
        text = gene_counts
    else:
        text = ["%s %.2f" % (gene_count, text) for gene_count, text in zip(gene_counts, text)]

    traces = []

    scatter_trace = graph_objects.Scatter(
        x=coordinates[:, 0],
        y=coordinates[:, 1],
        marker={
            "color": gene_counts,
            "showscale": False,
            "size": 3
        },
        mode="markers",
        text=text
    )

    traces.append(scatter_trace)

    layout = {}

    layout["height"] = 500
    layout["width"] = 500
    layout["plot_bgcolor"] = "rgba(255, 255, 255, 0)"
    layout["paper_bgcolor"] = "rgba(255, 255, 255, 0)"
    layout["title"] = title
    layout["xaxis"] = {
        "zeroline": False,
        "showgrid": False
    }
    layout["yaxis"] = {
        "zeroline": False,
        "showgrid": False
    }

    layout = graph_objects.Layout(layout)

    figure = graph_objects.Figure(
        data=traces,
        layout=layout
    )

    plotly.iplot(figure)

    figure.write_image("out/%s_expression.png" % gene_of_interest, scale=4)
    
traces = []

scatter_trace = graph_objects.Scatter(
    x=coordinates[:, 0],
    y=coordinates[:, 1],
    marker={
        "color": ["red" if x else "blue" for x in cluster_adata.obs["doublet"]],
        "size": 3
    },
    mode="markers",
    text=text
)

traces.append(scatter_trace)

layout = {}

num_doublets = (droplet_classifier_adata[cluster_mask].obs["doublet"]).sum()

layout["height"] = 500
layout["width"] = 500
layout["plot_bgcolor"] = "rgba(255, 255, 255, 0)"
layout["paper_bgcolor"] = "rgba(255, 255, 255, 0)"
layout["title"] = "Cluster %s doublets<BR>%i/%i (%.2f%%)" % (cluster_number, num_doublets, cluster_mask.sum(), num_doublets/cluster_mask.sum()*100)
layout["xaxis"] = {
    "zeroline": False,
    "showgrid": False
}
layout["yaxis"] = {
    "zeroline": False,
    "showgrid": False
}

layout = graph_objects.Layout(layout)

figure = graph_objects.Figure(
    data=traces,
    layout=layout
)

plotly.iplot(figure)

figure.write_image("out/doublets.png", scale=4)

traces = []

for cluster in cluster_adata.obs["leiden_scVI"].unique():

    scatter_trace = graph_objects.Scatter(
        x=coordinates[cluster_adata.obs["leiden_scVI"] == cluster, 0],
        y=coordinates[cluster_adata.obs["leiden_scVI"] == cluster, 1],
        marker={
            "size": 3
        },
        mode="markers",
        name=cluster
    )

    traces.append(scatter_trace)

layout = {}


layout["height"] = 500
layout["width"] = 500
layout["plot_bgcolor"] = "rgba(255, 255, 255, 0)"
layout["paper_bgcolor"] = "rgba(255, 255, 255, 0)"
layout["title"] = "Cluster %s, %i cell(s)" % (cluster_number, cluster_mask.sum())
layout["xaxis"] = {
    "zeroline": False,
    "showgrid": False
}
layout["yaxis"] = {
    "zeroline": False,
    "showgrid": False
}

layout = graph_objects.Layout(layout)

figure = graph_objects.Figure(
    data=traces,
    layout=layout
)

plotly.iplot(figure)

figure.write_image("out/doublet_cluster.png", scale=4)