In [None]:
# This notebook performs clustering and DE analysis for Ted's scRNAseq data
import pandas as pd
import numpy as np
import scanpy as sc
import seaborn as sns
import matplotlib.pyplot as plt
import scipy
import torch
import scvi
import anndata as ad
from collections import Counter

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

In [None]:
sc.settings.verbosity = 2
sc.logging.print_header()
sc.set_figure_params(dpi=150, facecolor='white', figsize=(8, 8), vector_friendly=False, fontsize=12)

torch.set_float32_matmul_precision("high")
scvi.settings.seed = 0

In [None]:
input_folder = '.../Desktop/BasicResults/'
out_folder = '.../Desktop/Integration/'

In [None]:
adata_e12 = sc.read_h5ad(filename="{}/E12p5.h5ad".format(input_folder))
adata_e12.obs['sample'] = 'E12p5'
adata_e12.obs['group'] = 'Palate'
adata_e12

In [None]:
type(adata_e12.obsm['spatial'])

In [None]:
adata_e13 = sc.read_h5ad(filename="{}/E13p5.h5ad".format(input_folder))
adata_e13.obs['sample'] = 'E13p5'
adata_e13.obs['group'] = 'Palate'
adata_e13

In [None]:
adata_e15 = sc.read_h5ad(filename="{}/E15p5.h5ad".format(input_folder))
adata_e15.obs['sample'] = 'E15p5'
adata_e15.obs['group'] = 'Palate'
adata_e15

In [None]:
adata = ad.concat([adata_e12, adata_e13, adata_e15], merge="same")
adata

In [None]:
adata.raw = adata

In [None]:
adata.layers['counts'] = adata.X

In [None]:
scvi.model.SCVI.setup_anndata(
    adata,
    continuous_covariate_keys=["pct_counts_mt", "total_counts"],
    batch_key="sample",
    layer="counts")

In [None]:
model = scvi.model.SCVI(adata, n_layers=2, n_hidden=30)

In [None]:
model.train(use_gpu=True)

In [None]:
SCVI_LATENT_KEY = "X_scVI"
SCVI_NORMALIZED_KEY = "scvi_normalized"
SCVI_MODEL_PATH = "scvi_model"

In [None]:
outpath = "{}\\scvi_integration".format(out_folder)
model.save("{}\\{}".format(outpath, SCVI_MODEL_PATH), overwrite=True)

In [None]:
# save the latent representation
adata.obsm[SCVI_LATENT_KEY] = model.get_latent_representation()
# save normalized expression values
adata.layers[SCVI_NORMALIZED_KEY] = model.get_normalized_expression(library_size=10e4)

In [None]:
sc.pp.neighbors(adata, use_rep=SCVI_LATENT_KEY, n_neighbors=20)
sc.tl.umap(adata, min_dist=0.3)

In [None]:
sc.pl.umap(adata, color=["sample"], frameon=True, ncols=1, size=1.0, palette=sc.pl.palettes.default_102)

In [None]:
sc.pl.umap(adata, color=["Krt14", "Krt5", "Krt15"], frameon=True, ncols=4, size=1.5, color_map='hot', layer='scvi_normalized')

In [None]:
# neighbors were already computed using scVI
SCVI_CLUSTERS_KEY = "leiden_scVI"
sc.tl.leiden(adata, key_added=SCVI_CLUSTERS_KEY, resolution=0.5)

In [None]:
sc.pl.umap(
    adata,
    color=[SCVI_CLUSTERS_KEY],
    frameon=True,
    size=1.0,
    palette=sc.pl.palettes.default_102
)

In [None]:
de_df = model.differential_expression(
    groupby="leiden_scVI",
)
de_df.head()

In [None]:
markers = {}
cats = adata.obs['leiden_scVI'].cat.categories
for i, c in enumerate(cats):
    cid = f"{c} vs Rest"
    cell_type_df = de_df.loc[de_df.comparison == cid]

    cell_type_df = cell_type_df[cell_type_df.lfc_mean > 0]

    cell_type_df = cell_type_df[cell_type_df["bayes_factor"] > 2.5]
    cell_type_df = cell_type_df[cell_type_df["non_zeros_proportion1"] > 0.1]

    markers[c] = cell_type_df.index.tolist()[:2]

In [None]:
sc.tl.dendrogram(adata, groupby="leiden_scVI", use_rep="X_scVI")

In [None]:
sc.pl.dotplot(
    adata,
    markers,
    groupby="leiden_scVI",
    dendrogram=True,
    color_map="Blues",
    swap_axes=False,
    use_raw=True,
    standard_scale="var",
    save='_DE_dotplot',
)

In [None]:
sc.pl.heatmap(
    adata,
    markers,
    groupby="leiden_scVI",
    layer="scvi_normalized",
    standard_scale="var",
    dendrogram=True,
    figsize=(16, 10),
    cmap='inferno',
    swap_axes=True,
    show_gene_labels=True,
    save='_DE_heatmap'
)

In [None]:
import csv
with open('{}\\DE_genes.csv'.format(out_folder), 'w') as csv_file:  
    writer = csv.writer(csv_file)
    for key, value in markers.items():
       writer.writerow([key, value])

In [None]:
adata.obs

In [None]:
adata12 = adata[adata.obs['sample'].isin(['E12p5roi2'])]
adata13 = adata[adata.obs['sample'].isin(['E13p5roi1'])]
adata15 = adata[adata.obs['sample'].isin(['E15p5roi1'])]

In [None]:
sc.pl.spatial(adata12, color=['leiden_scVI'], spot_size=72, show=True, palette=sc.pl.palettes.default_102)

In [None]:
sc.pl.spatial(adata13, color=['leiden_scVI'], spot_size=72, show=True, palette=sc.pl.palettes.default_102)

In [None]:
sc.pl.spatial(adata15, color=['leiden_scVI'], spot_size=75, show=True, palette=sc.pl.palettes.default_102)

In [None]:
adata.write_h5ad('{}/240625_Palate_integrate_3.h5ad'.format(out_folder), compression='gzip')