In [None]:
from aavomics import database
import anndata
import os
import scanpy
import numpy
from aavomics import aavomics
import scvi
from plotly import graph_objects
from plotly import offline as plotly

In [None]:
CELL_SET = "20190712_TC5"
ANNDATA_FILE_NAME = "aavomics_mouse_cortex_2021.h5ad"
SEED = 1042

In [None]:
adata = anndata.read_h5ad(os.path.join(database.DATA_PATH, ANNDATA_FILE_NAME))

In [None]:
cell_set_adata = adata[adata.obs["Cell Set"] == CELL_SET].copy()

In [None]:
scvi.data.setup_anndata(cell_set_adata, batch_key="Cell Set")

In [None]:
vae = scvi.model.SCVI(
    cell_set_adata,
    n_latent=20,
    n_layers=2,
    n_hidden=256
)

vae.train(
    frequency=1,
    n_epochs=100,
    n_epochs_kl_warmup=None,
    n_iter_kl_warmup=128*5000/400, # Based on documentation at https://www.scvi-tools.org/en/stable/api/reference/scvi.core.trainers.UnsupervisedTrainer.html
    seed=SEED
)

In [None]:
normalized_gene_expression = vae.get_normalized_expression(vae.adata)
vae.adata.obsm["X_scVI"] = vae.get_latent_representation(vae.adata)
# scanpy.pp.neighbors(vae.adata, use_rep="X_scVI", random_state=SEED)
scanpy.tl.tsne(vae.adata, use_rep="X_scVI", n_jobs=8, random_state=SEED)
# scanpy.tl.leiden(vae.adata, key_added=CLUSTER_OBS_NAME, random_state=SEED, resolution=2) # Resolution 2 to distinguish between doublet clusters

In [None]:
vae.adata.obs["CAP-B10>0"] = vae.adata.obs["CAP-B10"] > 0
vae.adata.obs["PHP.eB>0"] = vae.adata.obs["PHP.eB"] > 0

In [None]:
transduced_mask = vae.adata.obs["PHP.eB>0"]

transduced_scatter = graph_objects.Scatter(
    x=vae.adata[transduced_mask].obsm["X_tsne"][:, 0],
    y=vae.adata[transduced_mask].obsm["X_tsne"][:, 1],
    name="Transduced",
    mode="markers",
    marker={
        "size": 2,
        "color": "blue",
        "opacity": 0.8
    }
)

untransduced_scatter = graph_objects.Scatter(
    x=vae.adata[~transduced_mask].obsm["X_tsne"][:, 0],
    y=vae.adata[~transduced_mask].obsm["X_tsne"][:, 1],
    name="Untransduced",
    mode="markers",
    marker={
        "size": 2,
        "color": "orange",
        "opacity": 0.8
    }
)

layout = {
    "xaxis": {
        "title": "t-SNE 1"
    },
    "plot_bgcolor": "rgba(255, 255, 255, 0)",
    "paper_bgcolor": "rgba(255, 255, 255, 0)",
    "showlegend": False,
    "width": 800,
    "height": 600
}

figure = graph_objects.Figure(data=[transduced_scatter, untransduced_scatter], layout=layout)

plotly.iplot(figure)

figure.write_image(os.path.join("out", "PHP-eB_viral_transcripts_tSNE.svg"))

In [None]:
transduced_mask = vae.adata.obs["CAP-B10>0"]

transduced_scatter = graph_objects.Scatter(
    x=vae.adata[transduced_mask].obsm["X_tsne"][:, 0],
    y=vae.adata[transduced_mask].obsm["X_tsne"][:, 1],
    name="Transduced",
    mode="markers",
    marker={
        "size": 2,
        "color": "blue",
        "opacity": 0.8
    }
)

untransduced_scatter = graph_objects.Scatter(
    x=vae.adata[~transduced_mask].obsm["X_tsne"][:, 0],
    y=vae.adata[~transduced_mask].obsm["X_tsne"][:, 1],
    name="Untransduced",
    mode="markers",
    marker={
        "size": 2,
        "color": "orange",
        "opacity": 0.8
    }
)

layout = {
    "xaxis": {
        "title": "t-SNE 1"
    },
    "plot_bgcolor": "rgba(255, 255, 255, 0)",
    "paper_bgcolor": "rgba(255, 255, 255, 0)",
    "showlegend": False,
    "width": 800,
    "height": 600
}

figure = graph_objects.Figure(data=[transduced_scatter, untransduced_scatter], layout=layout)

plotly.iplot(figure)

figure.write_image(os.path.join("out", "CAP-B10_viral_transcripts_tSNE.svg"))

In [None]:
ensembl_id = vae.adata.var.loc[vae.adata.var['Gene Name']=="S100b"].index[0]
normalized_gene_counts = numpy.array(normalized_gene_expression.loc[:, ensembl_id].values).reshape((-1,))

raw_gene_counts = numpy.array(vae.adata[:, ensembl_id].X.todense()).reshape((-1,))

aavomics.plot_gene_expression(vae.adata.obsm["X_tsne"], numpy.log2(raw_gene_counts+1))

In [None]:
ensembl_id = vae.adata.var.loc[vae.adata.var['Gene Name']=="Olig2"].index[0]
normalized_gene_counts = numpy.array(normalized_gene_expression.loc[:, ensembl_id].values).reshape((-1,))

raw_gene_counts = numpy.array(vae.adata[:, ensembl_id].X.todense()).reshape((-1,))

aavomics.plot_gene_expression(vae.adata.obsm["X_tsne"], numpy.log2(raw_gene_counts+1))

In [None]:
ensembl_id = vae.adata.var.loc[vae.adata.var['Gene Name']=="Rbfox3"].index[0]
normalized_gene_counts = numpy.array(normalized_gene_expression.loc[:, ensembl_id].values).reshape((-1,))

raw_gene_counts = numpy.array(vae.adata[:, ensembl_id].X.todense()).reshape((-1,))

aavomics.plot_gene_expression(vae.adata.obsm["X_tsne"], numpy.log2(raw_gene_counts+1))