In [None]:
# import libraries
import scanpy as sc
import pandas as pd
import numpy as np
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt
import imageio as io
import cv2
import matplotlib.patches as patches
import os
import json

In [None]:
experiment_name = "day8_SI_r2"
whole_adata = sc.read("../data/adata/timecourse.h5ad")
finalized_adata = whole_adata[whole_adata.obs.batch == experiment_name]

# the following has the transcripts saved. It is a temporary adata along the processing pipeline
path_to_adata_with_transcripts = "../data/adata/day8_r2_with_transcripts.h5ad"

# all h and e and IF are generated and saved
path_to_h_and_e = "../data/images/day8_r2_h_and_e_alignment_gan.npy"
path_to_if = "../data/images/day8_r2_IF_alignment.npy"

xenium_output_path = "../data/xenium_output/day8_r2"

In [None]:
# load in the h and e, DAPI, and IF images
def import_image(path: str):
    """
    Import the max projected DAPI staining from the provided xenium output folder

    Args:
        path (str): path to the xenium folder

    Returns:
        img (np.array): image as a numpy array
    """

    file = os.path.join(path, "morphology_mip.ome.tif")
    img = io.imread(file)
    return img


# transform the transcript coordinates from microns to pixels
def get_pixel_size(path: str) -> float:
    """
    Get the pixel size for micron to pixel transform from the provided xenium output folder

    Args:
        path (str): path to the xenium folder

    Returns:
        pixel_size (float): pixel size in microns
    """

    file = open(os.path.join(path, "experiment.xenium"))
    experiment = json.load(file)
    pixel_size = experiment["pixel_size"]
    return pixel_size

In [None]:
# Read in the max-projected DAPI
xenium_dapi = import_image(xenium_output_path)

In [None]:
# Read in the adata holding the transcripts
transcripts = sc.read(path_to_adata_with_transcripts)
points = transcripts.uns["points"]

In [None]:
##save the different parts of the transcripts df for fast indexing
points_x = points.x.values
points_y = points.y.values
points_z = points.z.values
points_gene = points.gene.values
points_cell = points.cell.values
points_split_cell = points.split_cell.values
points["split_cell"] = points["split_cell"].values.astype(int)

In [None]:
# Transform the x and y coordinates to pixels from microns
pixel_size = get_pixel_size(xenium_output_path)
transformed_x = points_x * (1 / pixel_size)
transformed_y = points_y * (1 / pixel_size)

In [None]:
# Downscaling the dapi overview by 50x (you can change this with no side effects other than runtime in the next cell)
down_factor = 50

new_width = int(xenium_dapi.shape[1] / down_factor)
new_height = int(xenium_dapi.shape[0] / down_factor)

thumbnail = cv2.resize(xenium_dapi, (new_width, new_height))

Defining the area to zoom in on and subsetting the transcripts file to those lying in this region

In [None]:
min_x = int(2120 / pixel_size)
max_x = int(2600 / pixel_size)

min_y = int(1420 / pixel_size)
max_y = int(1915 / pixel_size)

In [None]:
# Get the transcripts falling in the box you created
subsetted_indices = np.where(
    (transformed_x > min_y)
    & (transformed_x < max_y)
    & (transformed_y > min_x)
    & (transformed_y < max_x)
)[0]

transcripts_df = pd.DataFrame(
    zip(
        transformed_x[subsetted_indices],
        transformed_y[subsetted_indices],
        points_gene[subsetted_indices],
        points_split_cell[subsetted_indices],
    ),
    index=points_cell[subsetted_indices],
    columns=["x", "y", "gene", "split_cell"],
)

Plotting TGFb isoforms

In [None]:
plt.figure(figsize=(10, 4), dpi=200)
ax1 = plt.gca()

segmentation_face_color = "leiden"
inside_alpha = 0.34
outside_alpha = 0.34

celltypes = []
ids = np.array([i.split("_")[-1] for i in finalized_adata.obs.index.values]).astype(int)
id_df = pd.DataFrame(
    zip(ids, finalized_adata.obs[segmentation_face_color].values),
    columns=["id", segmentation_face_color],
)

img_cropped = xenium_dapi[min_x:max_x, min_y:max_y]
ax1.imshow(img_cropped, vmax=np.percentile(img_cropped, 99.9) * 1.5, cmap="Greys_r")


tgfb = "#FD04EE"  # pink
tgfb2 = "#1FFD04"  # green
tgfb3 = "#ea871e"  # red

transcript_colors = [tgfb, tgfb2, tgfb3]

pt_size = 11
gene_subset = ["Tgfb1", "Tgfb2", "Tgfb3"]
col_ct = 0
for i in gene_subset:
    transcripts_genes_only_current = transcripts_df[transcripts_df["gene"] == i]
    for x, y in zip(
        transcripts_genes_only_current.x.values, transcripts_genes_only_current.y.values
    ):
        circle = patches.Circle(
            (x - min_y, y - min_x),
            radius=pt_size,
            edgecolor="black",
            linewidth=0.15,
            facecolor=transcript_colors[col_ct],
            alpha=1,
            zorder=2,
        )
        ax1.add_patch(circle)
    col_ct += 1

col_ct = 0
for i in gene_subset:
    plt.scatter([], [], c=transcript_colors[col_ct], label=i)
    col_ct += 1

ax1.set_xlim(0, max_y - min_y)
ax1.set_ylim(0, max_x - min_x)
ax1.invert_yaxis()
ax1.axis("off")
plt.show()