## Integrate MERFISH experiments after QC filtering

In [None]:
import scanpy as sc
import scvi
from tqdm.notebook import tqdm
import os
import numpy as np
import pandas as pd
from scvi.model.utils import mde
import matplotlib.pyplot as plt
import pickle
import glob

In [None]:
sc.set_figure_params(figsize=(4, 4))

Add the paths to all folders for experiments to be integrated

In [None]:
input_folders = glob.glob("D:/amonell/timecourse_final/day*")

In [None]:
output_folder = r"D:/amonell/timecourse_final/analysis/cleaned"

Run scvi integration

In [None]:
adata_list = []
for input_file in tqdm(input_folders):
    experiment = sc.read(os.path.join(input_file, "adatas", "04_tissue_cleared.h5ad"))
    experiment.obs["batch"] = os.path.basename(input_file)
    adata_list.append(experiment.copy())

concatenated = sc.concat(adata_list)

concatenated.obs_names_make_unique()
concatenated.layers["counts"] = concatenated.X
scvi.model.SCVI.setup_anndata(concatenated, layer="counts", batch_key="batch")
vae = scvi.model.SCVI(concatenated, n_layers=2, n_latent=30, gene_likelihood="nb")
vae.train()

Use integration to calculate new clustering and neighbors

In [None]:
concatenated.obsm["X_scVI"] = vae.get_latent_representation()
sc.pp.neighbors(concatenated, use_rep="X_scVI")

Use MDE as new visualization. Can sometimes give extreme outlier values so I put some filters on at 8 units in all directions

In [None]:
concatenated.obsm["X_mde"] = mde(concatenated.obsm["X_scVI"])
concatenated.obs["new_batch"] = concatenated.obs.batch.values

In [None]:
xcoords = concatenated.obsm["X_mde"][:, 0]
ycoords = concatenated.obsm["X_mde"][:, 1]
category_names = concatenated.obs.new_batch.values

In [None]:
# Create a list of unique categories
unique_categories = [
    "day6_SI",
    "day7_SI_DMSO",
    "day7_SI_RARi",
    "day8_SI_Ctrl",
    "day8_SI_Xcr1DTR",
    "day30_SI",
    "day90_SI",
]

# Create a color map for the categories
colors = plt.cm.tab10.colors[: len(unique_categories)]

# Shuffle the data to randomize the scatter plot
np.random.seed(42)  # for reproducibility, you can remove this line
shuffle_indices = np.random.permutation(len(xcoords))
xcoords = xcoords[shuffle_indices]
ycoords = ycoords[shuffle_indices]
category_names = category_names[shuffle_indices]
new_colors = [unique_categories.index(i) for i in category_names]
new_colors = np.array(colors)[new_colors]

# Create the scatter plot
plt.figure(figsize=(8, 6), dpi=300)
legend_handles = []
for i, category in enumerate(unique_categories):
    mask = category_names == category

    # Create legend handles with larger marker size
    legend_handles.append(
        plt.Line2D(
            [0],
            [0],
            marker="o",
            color="w",
            label=category,
            markerfacecolor=colors[i],
            markersize=10,
        )
    )
# Scatter points on the main plot with a smaller marker size
plt.scatter(
    xcoords, ycoords, label=category_names, c=new_colors, s=0.05, linewidths=0.1
)  # Small marker size on the main plot

# Add legend
plt.legend(handles=legend_handles, loc="best")


plt.title("Combined Timecourse")
plt.grid(False)
plt.axis("off")
# Show the plot or save it to a file
try:
    os.mkdir(os.path.join(os.path.dirname(input_folders[0]), "figures"))
except:
    None

plt.savefig(os.path.join(os.path.dirname(input_folders[0]), "figures", "combined.png"))
plt.show()

In [None]:
# indices_new = np.where((concatenated.obsm["X_mde"][:, 0] > -8) & (concatenated.obsm["X_mde"][:, 0] < 8) & (concatenated.obsm["X_mde"][:, 1] > -8) & (concatenated.obsm["X_mde"][:, 1] < 8))[0]
# concatenated = concatenated[indices_new, :]

In [None]:
sc.tl.leiden(concatenated)

In [None]:
os.mkdir(output_folder)

In [None]:
concatenated.write(os.path.join(output_folder, "concatenated_integrated.h5ad"))

## Save our SCVI VAE

In [None]:
import pickle

# File path where you want to save the object
file_path = os.path.join(output_folder, "vae_object.pkl")

# Dump the object to the file
with open(file_path, "wb") as file:
    pickle.dump(vae, file)