In [None]:
%pip install imageio opencv-python alphashape

In [None]:
# import libraries
import scanpy as sc
import pandas as pd
import numpy as np
import os
from tqdm.notebook import tqdm
import geopandas as gpd
import imageio as io
import shapely.affinity as sa
import cv2
import json
import matplotlib.patches as patches

### Things that we may want to overlay:
##### Transcripts, Aligned IF, Xenium DAPI, H and E, cell segmentation, transcripts/cell masks colored by cell type

In [None]:
experiment_name = "day8_SI_r2"
whole_adata = sc.read("../data/adata/timecourse.h5ad")
finalized_adata = whole_adata[whole_adata.obs.batch == experiment_name]

# the following has the transcripts saved. It is a temporary adata along the processing pipeline
path_to_adata_with_transcripts = "../data/adata/day8_r2_with_transcripts.h5ad"

# all h and e and IF are generated and saved
path_to_h_and_e = "../data/images/day8_r2_h_and_e_alignment_gan.npy"
path_to_if = "../data/images/day8_r2_IF_alignment.npy"

xenium_output_path = "../data/xenium_output/day8_r2"

In [None]:
def import_image(path: str):
    file = os.path.join(path, "morphology_mip.ome.tif")
    img = io.imread(file)
    return img


# load in H&E, DAPI, and IF images
xenium_dapi = import_image(xenium_output_path)

try:
    IF_image = np.load(path_to_if)
except:
    print("No IF for this experiment")
    IF_image = xenium_dapi

try:
    h_an_e = np.load(path_to_h_and_e)
except:
    print("No H&E for this experiment")
    h_an_e = xenium_dapi

In [None]:
# Read in the adata holding the transcripts
transcripts = sc.read(path_to_adata_with_transcripts)
points = transcripts.uns["points"]

In [None]:
##save the different parts of the transcripts df for fast indexing
points_x = points.x.values
points_y = points.y.values
points_z = points.z.values
points_gene = points.gene.values
points_cell = points.cell.values
points_split_cell = points.split_cell.values
points["split_cell"] = points["split_cell"].values.astype(int)

In [None]:
def get_pixel_size(path: str) -> float:
    file = open(os.path.join(path, "experiment.xenium"))
    experiment = json.load(file)
    pixel_size = experiment["pixel_size"]
    return pixel_size


# transform the transcript coordinates from microns to pixels
pixel_size = get_pixel_size(xenium_output_path)
transformed_x = points_x * (1 / pixel_size)
transformed_y = points_y * (1 / pixel_size)

In [None]:
# Downscaling the dapi overview by 50x (you can change this with no side effects other than runtime in the next cell)

down_factor = 50

new_width = int(xenium_dapi.shape[1] / down_factor)
new_height = int(xenium_dapi.shape[0] / down_factor)

thumbnail = cv2.resize(xenium_dapi, (new_width, new_height))

In [None]:
min_y = 555
max_y = 600

min_x = 368
max_x = 379

In [None]:
# Get the transcripts falling in the box you created

min_x = min_x * down_factor
min_y = min_y * down_factor
max_x = max_x * down_factor
max_y = max_y * down_factor


subsetted_indices = np.where(
    (transformed_x > min_y)
    & (transformed_x < max_y)
    & (transformed_y > min_x)
    & (transformed_y < max_x)
)[0]

transcripts_df = pd.DataFrame(
    zip(
        transformed_x[subsetted_indices],
        transformed_y[subsetted_indices],
        points_gene[subsetted_indices],
        points_split_cell[subsetted_indices],
    ),
    index=points_cell[subsetted_indices],
    columns=["x", "y", "gene", "split_cell"],
)

### Specify regions

In [None]:
region_dict = {
    "B-Cell": "LP",
    "Cd4_T-Cell": "LP",
    "Cd8_T-Cell_P14": "LP",
    "Cd8_T-Cell_aa+": "Epithelial",
    "Cd8_T-Cell_ab+": "Epithelial",
    "Complement_Fibroblast": "Muscularis",
    "DC2": "LP",
    "Early_Enterocyte": "Epithelial",
    "Enterocyte_1": "Epithelial",
    "Enterocyte_2": "Epithelial",
    "Enterocyte_3": "Epithelial",
    "Enteroendocrine": "Epithelial",
    "Eosinophil": "LP",
    "Fibroblast": "Muscularis",
    "Fibroblast_Ncam1": "Muscularis",
    "Fibroblast_Pdgfra+": "Muscularis",
    "Fibroblast_Pdgfrb+ ": "Muscularis",
    "Goblet": "Epithelial",
    "ILC": "LP",
    "ISC": "Crypt",
    "Lymphatic": "Muscularis",
    "MAIT": "LP",
    "Macrophage": "LP",
    "MegakaryocytePlatelet": "None",
    "Monocyte": "None",
    "Myofibroblast": "Muscularis",
    "NK-Cell": "LP",
    "Neuron": "Muscularis",
    "Paneth": "Crypt",
    "Resting Fibroblast": "Crypt",
    "T-Cell": "Epithelial",
    "T-Cell gd": "Epithelial",
    "Transit_Amplifying": "Crypt",
    "Tuft": "Epithelial",
    "Vascular Endothelial": "Muscularis",
    "cDC1": "LP",
}

In [None]:
all_regions = []
for i in finalized_adata.obs["Subtype"]:
    all_regions.append(region_dict.get(i))

finalized_adata.obs["Region"] = pd.Categorical(all_regions)

finalized_adata.uns["Region_colors"] = [
    "#FFB6C1",
    "#ADD8E6",
    "#FDFA72",
    "#90EE90",
    "#D3D3D3",
]

## Money shot

In [None]:
plot_down = 4

### Part 1

In [None]:
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle

In [None]:
thumbnail = cv2.resize(
    h_an_e, (np.shape(h_an_e)[0] // plot_down, np.shape(h_an_e)[1] // plot_down)
)
# Define the RGB value for black
black_color = [0, 0, 0]

# Create a mask for black pixels
black_pixels = np.all(thumbnail[:, :, :3] == black_color, axis=-1)

# Replace black pixels with white
thumbnail[black_pixels] = [255, 255, 255]

In [None]:
plt.figure(figsize=(10, 10))
ax0 = plt.gca()
# Assuming 'thumbnail' is your image data
ax0.imshow(thumbnail)
ax0.set_xlim(300, np.shape(thumbnail)[1])
ax0.set_ylim(np.shape(thumbnail)[0], 400)

# Add a black rectangle
rectangle = Rectangle(
    (min_y // plot_down, min_x // plot_down),
    max_y // plot_down - min_y // plot_down,
    max_x // plot_down - min_x // plot_down,
    linewidth=2,
    edgecolor="black",
    facecolor="none",
)
ax0.add_patch(rectangle)
ax0.axis("off")
plt.show()

### Part 2

In [None]:
second_min_y = 1440
second_max_y = 1600

second_min_x = 150
second_max_x = 450

side1 = min_y + second_min_y
side2 = max_y - (max_y - (min_y + second_max_y))
side3 = second_min_x + min_x
side4 = max_x - (max_x - (second_max_x + min_x))

subsetted_indices_second = np.where(
    (transformed_x > side1)
    & (transformed_x < side2)
    & (transformed_y > side3)
    & (transformed_y < side4)
)[0]

transcripts_df_second = pd.DataFrame(
    zip(
        transformed_x[subsetted_indices_second],
        transformed_y[subsetted_indices_second],
        points_gene[subsetted_indices_second],
        points_split_cell[subsetted_indices_second],
    ),
    index=points_cell[subsetted_indices_second],
    columns=["x", "y", "gene", "split_cell"],
)

In [None]:
plt.figure(figsize=(10, 4), dpi=300)
ax3 = plt.gca()
# Assuming 'thumbnail' is your image data
img_cropped = h_an_e[min_x:max_x, min_y:max_y]
ax3.imshow(img_cropped)

# Add a black rectangle
rectangle = Rectangle(
    (second_min_y, second_min_x),
    second_max_y - second_min_y,
    second_max_x - second_min_x,
    linewidth=4,
    edgecolor="black",
    facecolor="none",
)
ax3.add_patch(rectangle)
ax3.axis("off")
plt.show()

### Part 3

In [None]:
plt.figure(figsize=(10, 4), dpi=300)
ax4 = plt.gca()
if_channels = [2, 1]
# Rest of the axes
mapped_ims = []
for g in range(len(if_channels)):
    image = IF_image[min_x:max_x, min_y:max_y, if_channels[g]]
    min_val = np.min(image)
    max_val = np.max(image)

    normalized_image = (image - min_val) / (max_val - min_val)

    # plt.hist(image)
    # plt.show()

    if if_channels[g] == 2:
        kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (30, 30))
        # Top Hat Transform
        topHat = cv2.morphologyEx(normalized_image, cv2.MORPH_TOPHAT, kernel)
        # Black Hat Transform
        blackHat = cv2.morphologyEx(normalized_image, cv2.MORPH_BLACKHAT, kernel)

        normalized_image = normalized_image + topHat - blackHat

        normalized_image = normalized_image * 2

    mapped_ims.append(normalized_image)
mapped_ims.append(
    np.zeros(np.shape(IF_image[min_x:max_x, min_y:max_y, if_channels[g]]))
)
full_im = np.dstack(mapped_ims)
ax4.imshow(full_im)
# Add a black rectangle
rectangle2 = Rectangle(
    (second_min_y, second_min_x),
    second_max_y - second_min_y,
    second_max_x - second_min_x,
    linewidth=4,
    edgecolor="white",
    facecolor="none",
)
ax4.add_patch(rectangle2)
ax4.axis("off")
plt.show()

### Part 4

In [None]:
import alphashape


def make_alphashape(points: pd.DataFrame, alpha: float):
    points = np.array(points)
    shape = alphashape.alphashape(points, alpha=alpha)
    return shape


plt.figure(figsize=(10, 4), dpi=300)
ax1 = plt.gca()

segmentation_face_color = "leiden"
inside_alpha = 0.34
outside_alpha = 0.34
celltypes = []
ids = np.array([i.split("_")[-1] for i in finalized_adata.obs.index.values]).astype(int)
id_df = pd.DataFrame(
    zip(ids, finalized_adata.obs[segmentation_face_color].values),
    columns=["id", segmentation_face_color],
)
transcripts_with_obs = transcripts_df.merge(
    id_df, left_on="split_cell", right_on="id", how="left"
)
transcripts_with_obs = transcripts_with_obs.dropna(axis=0)


print("Making Shapes")
gby = transcripts_with_obs[
    (transcripts_with_obs.split_cell != 0) & (transcripts_with_obs.split_cell != -1)
].groupby("split_cell")


shapes = []
for group in tqdm(gby):
    shapes.append(make_alphashape(group[1][["x", "y"]].values, alpha=0.05))
    ctype = group[1][segmentation_face_color].values[0]
    cell_location = np.where(
        finalized_adata.obs[segmentation_face_color].cat.categories == ctype
    )[0]
    try:
        celltypes.append(
            finalized_adata.uns[f"{segmentation_face_color}_colors"][cell_location][0]
        )
    except:
        celltypes.append(
            finalized_adata.uns[f"{segmentation_face_color}_colors"][cell_location[0]]
        )
shapes = gpd.GeoSeries(shapes)
colors = celltypes


img_cropped = xenium_dapi[
    min_x:max_x, min_y:max_y
]  # [second_min_x:second_max_x, second_min_y:second_max_y]
ax1.imshow(img_cropped, vmax=np.percentile(img_cropped, 99.9), cmap="Greys_r")

# Create an empty GeoDataFrame to store adjusted polygons
adjusted_shapes = []

# Iterate through the shapes DataFrame and adjust each polygon
for original_polygon in shapes:
    scaled_polygon = sa.translate(original_polygon, -min_y, -min_x)
    adjusted_shapes.append(scaled_polygon)

adjusted_shapes = gpd.GeoSeries(adjusted_shapes)

for geometry, color in zip(adjusted_shapes, colors):
    if geometry.geom_type == "Polygon":
        patch = plt.Polygon(
            list(zip(*geometry.exterior.xy)),
            facecolor=color,
            edgecolor="none",
            alpha=inside_alpha,
            zorder=1,
        )
        ax1.add_patch(patch)
    elif geometry.geom_type == "MultiPolygon":
        for poly in geometry:
            patch = plt.Polygon(
                list(zip(*poly.exterior.xy)),
                facecolor=color,
                edgecolor="none",
                alpha=inside_alpha,
                zorder=1,
            )
            ax1.add_patch(patch)

# Plot polygon edges with edgecolor based on data values
for geometry, color in zip(adjusted_shapes, colors):
    if geometry.geom_type == "Polygon":
        ax1.plot(*geometry.exterior.xy, color=color, alpha=outside_alpha)
    elif geometry.geom_type == "MultiPolygon":
        for poly in geometry:
            ax1.plot(*poly.exterior.xy, color=color, alpha=outside_alpha)


rectangle2 = Rectangle(
    (second_min_y, second_min_x),
    second_max_y - second_min_y,
    second_max_x - second_min_x,
    linewidth=4,
    edgecolor="white",
    facecolor="none",
    zorder=2,
)
ax1.add_patch(rectangle2)
ax1.set_xlim(0, max_y - min_y)
ax1.set_ylim(0, max_x - min_x)
ax1.invert_yaxis()
# ax1.axis('equal')
ax1.axis("off")
plt.show()

### Part 5

In [None]:
plt.figure(figsize=(5, 10), dpi=300)
ax1 = plt.gca()

segmentation_face_color = "leiden"
inside_alpha = 0.34
outside_alpha = 0.8
celltypes = []
ids = np.array([i.split("_")[-1] for i in finalized_adata.obs.index.values]).astype(int)
id_df = pd.DataFrame(
    zip(ids, finalized_adata.obs[segmentation_face_color].values),
    columns=["id", segmentation_face_color],
)
transcripts_with_obs = transcripts_df_second.merge(
    id_df, left_on="split_cell", right_on="id", how="left"
)
transcripts_with_obs = transcripts_with_obs.dropna(axis=0)


print("Making Shapes")
gby = transcripts_with_obs[
    (transcripts_with_obs.split_cell != 0) & (transcripts_with_obs.split_cell != -1)
].groupby("split_cell")


shapes = []
for group in tqdm(gby):
    shapes.append(make_alphashape(group[1][["x", "y"]].values, alpha=0.05))
    ctype = group[1][segmentation_face_color].values[0]
    cell_location = np.where(
        finalized_adata.obs[segmentation_face_color].cat.categories == ctype
    )[0]
    try:
        celltypes.append(
            finalized_adata.uns[f"{segmentation_face_color}_colors"][cell_location][0]
        )
    except:
        celltypes.append(
            finalized_adata.uns[f"{segmentation_face_color}_colors"][cell_location[0]]
        )
shapes = gpd.GeoSeries(shapes)
colors = ["#D3D3D3" for s in range(len(shapes))]


img_cropped = xenium_dapi[min_x:max_x, min_y:max_y][
    second_min_x:second_max_x, second_min_y:second_max_y
]
ax1.imshow(
    img_cropped,
    vmax=np.percentile(img_cropped, 99.9),
    vmin=np.percentile(img_cropped, 30),
    cmap="Greys_r",
)

# Create an empty GeoDataFrame to store adjusted polygons
adjusted_shapes = []

# Iterate through the shapes DataFrame and adjust each polygon
for original_polygon in shapes:
    scaled_polygon = sa.translate(
        original_polygon, -min_y - second_min_y, -min_x - second_min_x
    )
    adjusted_shapes.append(scaled_polygon)

adjusted_shapes = gpd.GeoSeries(adjusted_shapes)

for geometry, color in zip(adjusted_shapes, colors):
    if geometry.geom_type == "Polygon":
        patch = plt.Polygon(
            list(zip(*geometry.exterior.xy)),
            facecolor=color,
            edgecolor="none",
            alpha=inside_alpha,
            zorder=1,
        )
        ax1.add_patch(patch)
    elif geometry.geom_type == "MultiPolygon":
        for poly in geometry:
            patch = plt.Polygon(
                list(zip(*poly.exterior.xy)),
                facecolor=color,
                edgecolor="none",
                alpha=inside_alpha,
                zorder=1,
            )
            ax1.add_patch(patch)

# Plot polygon edges with edgecolor based on data values
for geometry, color in zip(adjusted_shapes, colors):
    if geometry.geom_type == "Polygon":
        ax1.plot(*geometry.exterior.xy, color=color, linewidth=4, alpha=outside_alpha)
    elif geometry.geom_type == "MultiPolygon":
        for poly in geometry:
            ax1.plot(*poly.exterior.xy, color=color, linewidth=4, alpha=outside_alpha)

transcripts_genes_only = transcripts_df_second

import random


# Function to generate a random color in RGB format
def random_color():
    return "#{:02x}{:02x}{:02x}".format(
        random.randint(0, 255), random.randint(0, 255), random.randint(0, 255)
    )


transcript_colors = [random_color() for _ in range(500)]
pt_size = 1.2
gene_subset = finalized_adata.var.index.values
col_ct = 0
for i in gene_subset:
    transcripts_genes_only_current = transcripts_genes_only[
        transcripts_genes_only["gene"] == i
    ]
    for x, y in zip(
        transcripts_genes_only_current.x.values, transcripts_genes_only_current.y.values
    ):
        circle = patches.Circle(
            (x - (min_y + second_min_y), y - (min_x + second_min_x)),
            radius=pt_size,
            edgecolor="black",
            linewidth=0.01,
            facecolor=transcript_colors[col_ct],
            alpha=1,
            zorder=2,
        )
        ax1.add_patch(circle)
    col_ct += 1

col_ct = 0
for i in gene_subset:
    plt.scatter([], [], c=transcript_colors[col_ct], label=i)
    col_ct += 1

ax1.invert_yaxis()
ax1.axis("off")
plt.show()

In [None]:
p14s = []
for i in finalized_adata.obs["Subtype"].values:
    if i == "Cd8_T-Cell_P14":
        p14s.append(1)
    else:
        p14s.append(0)
finalized_adata.obs["p14"] = p14s

In [None]:
plt.figure(figsize=(5, 10), dpi=300)
ax1 = plt.gca()

segmentation_face_color = "p14"
inside_alpha = 0.2
outside_alpha = 0.8
celltypes = []
ids = np.array([i.split("_")[-1] for i in finalized_adata.obs.index.values]).astype(int)
id_df = pd.DataFrame(
    zip(ids, finalized_adata.obs[segmentation_face_color].values),
    columns=["id", segmentation_face_color],
)
transcripts_with_obs = transcripts_df_second.merge(
    id_df, left_on="split_cell", right_on="id", how="left"
)
transcripts_with_obs = transcripts_with_obs.dropna(axis=0)


print("Making Shapes")
gby = transcripts_with_obs[
    (transcripts_with_obs.split_cell != 0) & (transcripts_with_obs.split_cell != -1)
].groupby("split_cell")


shapes = []
colors = []
for group in tqdm(gby):
    shapes.append(make_alphashape(group[1][["x", "y"]].values, alpha=0.05))
    ctype = group[1][segmentation_face_color].values[0]
    # if ctype == 1:
    #     colors.append('#00FF00')
    # else:
    colors.append("#D3D3D3")
shapes = gpd.GeoSeries(shapes)

if_channels = [2, 1]
# Rest of the axes
mapped_ims = []
for g in range(len(if_channels)):
    image = IF_image[min_x:max_x, min_y:max_y, if_channels[g]][
        second_min_x:second_max_x, second_min_y:second_max_y
    ]
    min_val = np.min(image)
    max_val = np.max(image)

    normalized_image = (image - min_val) / (max_val - min_val)

    if if_channels[g] == 2:
        kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (50, 50))
        # Top Hat Transform
        topHat = cv2.morphologyEx(normalized_image, cv2.MORPH_TOPHAT, kernel)
        # Black Hat Transform
        blackHat = cv2.morphologyEx(normalized_image, cv2.MORPH_BLACKHAT, kernel)

        normalized_image = normalized_image + topHat - blackHat

        normalized_image = normalized_image**2

    mapped_ims.append(normalized_image)
mapped_ims.append(
    np.zeros(
        np.shape(
            IF_image[min_x:max_x, min_y:max_y, if_channels[g]][
                second_min_x:second_max_x, second_min_y:second_max_y
            ]
        )
    )
)
full_im = np.dstack(mapped_ims)

img_cropped = full_im
ax1.imshow(
    img_cropped,
    vmax=np.percentile(img_cropped, 99.9),
    vmin=np.percentile(img_cropped, 30),
    cmap="Greys_r",
)

# Create an empty GeoDataFrame to store adjusted polygons
adjusted_shapes = []

# Iterate through the shapes DataFrame and adjust each polygon
for original_polygon in shapes:
    scaled_polygon = sa.translate(
        original_polygon, -min_y - second_min_y, -min_x - second_min_x
    )
    adjusted_shapes.append(scaled_polygon)

transcripts_genes_only = transcripts_df_second

import random


# Function to generate a random color in RGB format
def random_color():
    return "#{:02x}{:02x}{:02x}".format(
        random.randint(0, 255), random.randint(0, 255), random.randint(0, 255)
    )


xist = "#00FFFF"
cd8a = "#FEFDFD"  # White
cd8b1 = "#a7a7a7"  # Bright Red
gzmb = "#FF00FF"

transcript_colors = [cd8a, cd8b1, gzmb, xist]

pt_size = 1.5
gene_subset = ["Cd8a", "Cd8b1", "Gzmb", "Xist"]
col_ct = 0
for i in gene_subset:
    transcripts_genes_only_current = transcripts_genes_only[
        transcripts_genes_only["gene"] == i
    ]
    for x, y in zip(
        transcripts_genes_only_current.x.values, transcripts_genes_only_current.y.values
    ):
        circle = patches.Circle(
            (x - (min_y + second_min_y), y - (min_x + second_min_x)),
            radius=pt_size,
            edgecolor="black",
            linewidth=0.01,
            facecolor=transcript_colors[col_ct],
            alpha=1,
            zorder=2,
        )
        ax1.add_patch(circle)
    col_ct += 1

col_ct = 0
for i in gene_subset:
    plt.scatter([], [], c=transcript_colors[col_ct], label=i)
    col_ct += 1

ax1.axis("off")
ax1.legend()
plt.show()