In [None]:
import scanpy as sc
import numpy as np
from tqdm.notebook import tqdm
import scipy.stats as stats
import pandas as pd
import json
import os
import seaborn as sns
import matplotlib.pyplot as plt
import shapely
from sklearn.neighbors import NearestNeighbors
from PIL import Image, ImageDraw

## Unrolling the adata with the best morphology (Reference)

Put in the path to the SI adata to be unrolled

In [None]:
path_adata = "D:/amonell/merscope_final/SI-Ctrl-L-RAR-R-dist-1-VS120-NP_Beta8"

In [None]:
adata = sc.read(os.path.join(path_adata, "adatas", "04_tissue_cleared.h5ad"))

In [None]:
all_spatial = adata.obsm["X_spatial"]

In [None]:
try:
    adata.X = adata.X.A
except:
    print("Adata already in array format")

Identifying cells on the bottom of the villi by neighborhood. Put the bottom neighborhoods in the topics_contain list

In [None]:
def plot_topic_scatter(adata, topic_column):
    # Extract data for plotting
    topics = adata.obs[topic_column]
    spatial_coords = adata.obsm["X_spatial"]

    # Get unique topics
    unique_topics = topics.unique()

    # Create separate plots for each topic
    for topic in unique_topics:
        mask = topics == topic

        # Set up the plot for the current topic
        plt.figure(figsize=(2, 2), dpi=100)
        plt.scatter(
            spatial_coords[:, 0],
            spatial_coords[:, 1],
            color="lightgray",
            label="Other Cells",
            s=0.1,
            linewidths=0.1,
        )
        plt.scatter(
            spatial_coords[mask, 0],
            spatial_coords[mask, 1],
            color="red",
            label=f"Topic: {topic}",
            alpha=0.8,
            s=0.1,
            linewidths=0.1,
        )

        # Add labels and legend for the current plot
        plt.xlabel("X Spatial")
        plt.ylabel("Y Spatial")
        plt.title(f"Spatial Distribution of Topic: {topic}")
        plt.legend()
        plt.grid(False)

        # Save the plot with a unique filename for each topic
        plt.show()
        plt.close()

In [None]:
plot_topic_scatter(adata, topic_column="topic")

Using the above topic highlight maps, put all neighborhoods appearing at the base of the swiss roll in the following list

In [None]:
topics_contain = ["2", "12"]

In [None]:
spatial_points = np.array(
    adata[adata.obs["topic"].isin(topics_contain), :].obsm["X_spatial"]
)
other_spatial = np.array(
    adata[~adata.obs["topic"].isin(topics_contain), :].obsm["X_spatial"]
)

Further removing outlier points that lay outside of the general spiral shape

In [None]:
# Step 1: Compute distances between each point
nn = 100
nbrs = NearestNeighbors(n_neighbors=nn, algorithm="kd_tree").fit(spatial_points)
distances, _ = nbrs.kneighbors(spatial_points)

# Step 2: For each point, remove the distance to itself (which will be 0)
distances = distances[:, 1:]

# Step 3: Calculate the average distance to the 5 nearest neighbors for each point
avg_distances = np.mean(distances, axis=1)

# Step 4: Create a histogram to visualize the distribution of average distances
plt.hist(avg_distances, bins=100, edgecolor="black")
plt.xlabel(f"Average Distance to {nn} Nearest Neighbors")
plt.ylabel("Frequency")
plt.title(f"Histogram of Average Distances to {nn} Nearest Neighbors")
plt.grid(True)
plt.show()

Make a manual cutoff to remove outliers

In [None]:
cutoff = 100

In [None]:
spatial_points = spatial_points[avg_distances < cutoff]

See if the spiral looks clean

In [None]:
plt.scatter(spatial_points[:, 0], spatial_points[:, 1], s=1)

Write the spiral points to an image so that they can be labeled

In [None]:
downsize = 10
adata.uns["unrolling_downsize"] = downsize
# Example 2D point array (replace this with your own data)
points = spatial_points / downsize
other_points = other_spatial / downsize

# Define the size of the image (adjust as needed)
image_width = 2000
image_height = 2000

# Create a white canvas as the base image
base_image = Image.new("RGB", (image_width, image_height), (255, 255, 255))

# Draw the points on the image
draw = ImageDraw.Draw(base_image)
point_size = 1  # Size of the points
ct = 0
for point in points:
    draw.ellipse(
        (
            point[0] - point_size,
            point[1] - point_size,
            point[0] + point_size,
            point[1] + point_size,
        ),
        fill="blue",
    )
    ct += 1

ct = 0
for point in other_points:
    draw.ellipse(
        (
            point[0] - point_size,
            point[1] - point_size,
            point[0] + point_size,
            point[1] + point_size,
        ),
        fill="red",
    )
    ct += 1

Save the image to an output path

In [None]:
base_image

In [None]:
try:
    os.mkdir(os.path.join(path_adata, "unrolling"))
except:
    print("unrolling directory already exists")
file_path = os.path.join(path_adata, "unrolling", "roll_image.png")
base_image.save(file_path)

In [None]:
np.save(
    os.path.join(path_adata, "unrolling", "base_topicas.npy"), np.array(topics_contain)
)

Go to labelme (pip install labelme > labelme) > open > open roll_image.png > create polygons > click along the base of the blue spiral starting from the outside. Catch as little red as possible. When finished clicking, click enter, name point, group 0, save to roll_image.json in same directory.

Read in the json

In [None]:
json_file_path = os.path.join(path_adata, "unrolling", "roll_image.json")

# Load the JSON data from the file
with open(json_file_path, "r") as json_file:
    data = json.load(json_file)

# Extract relevant information from the JSON data
image_height = data["imageHeight"]
image_width = data["imageWidth"]
image_path = data["imagePath"]
shapes = data["shapes"]

# Process the shapes (annotations)
removals = []
points = []
use_points = []
for shape in shapes:
    label = shape["label"]
    if (label == "roll") or (label == "point"):
        points.append(shape["points"])
    elif label == "reference":
        use_points.append(shape["points"])
    else:
        removals.append(shape["points"])

In [None]:
total_indices = []
for ir in removals:
    ir_ = np.array(ir) * downsize
    poly = shapely.Polygon(ir_)
    indices = []
    for i in tqdm(range(len(all_spatial))):
        pt = shapely.Point(all_spatial[i])
        if pt.within(poly):
            indices.append(i)
    total_indices.append(indices)

In [None]:
total_indices = list(set([element for sublist in total_indices for element in sublist]))

In [None]:
index_set = set(total_indices)
dont_remove = [i for i in tqdm(range(len(all_spatial))) if i not in index_set]

In [None]:
micron_to_mosaic_file = "D:/amonell/merscope_final/SI-Ctrl-L-RAR-R-dist-1-VS120-NP_Beta8/images/micron_to_mosaic_pixel_transform.csv"

In [None]:
micron_to_mosaic_file = pd.read_csv(
    micron_to_mosaic_file, header=None, delim_whitespace=True
)

In [None]:
spiral_main = [(np.array(i) * downsize) for i in points]

In [None]:
plt.figure(figsize=(4, 4))
for i in spiral_main:
    plt.plot(i[:, 0], i[:, 1])
plt.scatter(
    all_spatial[dont_remove][:, 0], all_spatial[dont_remove][:, 1], s=1, color="red"
)
plt.show()

## Evenly space points along drawn spiral

In [None]:
%matplotlib inline
x_points = []
y_points = []
for k in spiral_main:
    x = k[:, 0]
    y = k[:, 1]
    # Number of points you want to evenly space
    num_points = int(100000 * (len(k) / np.sum([np.shape(l)[0] for l in spiral_main])))

    # Calculate the distances between consecutive points on the line
    distances = np.sqrt(np.diff(x) ** 2 + np.diff(y) ** 2)

    # Calculate the cumulative sum of distances and normalize it to [0, 1]
    cumulative_distances = np.cumsum(distances)
    normalized_distances = cumulative_distances / cumulative_distances[-1]

    # Create evenly spaced values from 0 to 1
    evenly_spaced_values = np.linspace(0, 1, num_points)

    # Use linear interpolation to calculate the x and y coordinates of the points
    x_points_sub = list(np.interp(evenly_spaced_values, normalized_distances, x[:-1]))
    y_points_sub = list(np.interp(evenly_spaced_values, normalized_distances, y[:-1]))
    x_points += x_points_sub
    y_points += y_points_sub
    # Plot the original line and the evenly spaced points
    # plt.plot(x, y, 'bo-', label='Original Line')
    # plt.plot(x_points, y_points, 'r.', label='Evenly Spaced Points')
    # plt.xlabel('X')
    # plt.ylabel('Y')
    # plt.legend()
    # plt.grid(True)
    # plt.show()

## Unroll the spiral

Specify the center of the spiral using the plot above as a reference

In [None]:
plt.figure(figsize=(4, 4))
for i in spiral_main:
    plt.plot(i[:, 0], i[:, 1])
plt.scatter(
    all_spatial[dont_remove][:, 0], all_spatial[dont_remove][:, 1], s=1, color="red"
)
plt.scatter(9700, 8800)
plt.show()

In [None]:
# center = [3600, 3400]
center = [9700, 8800]
# D30 #center = [22000, 20000]
# center = [20000, 25000]

In [None]:
x_points = np.array(x_points)
y_points = np.array(y_points)

In [None]:
distances = np.sqrt((x_points - center[0]) ** 2 + (y_points - center[1]) ** 2)

In [None]:
all_points = adata.obsm["X_spatial"][dont_remove]

In [None]:
all_distances = np.sqrt(
    (all_points[:, 0] - center[0]) ** 2 + (all_points[:, 1] - center[1]) ** 2
)

In [None]:
point_index = np.array([i for i in range(len(x_points))])

In [None]:
center_x = center[0]
center_y = center[1]

## This is the proper way to get distal-proximal axis

In [None]:
window_size = 5000

# Calculate the differences between each point and the center point
delta_x = all_points[:, 0] - center_x
delta_y = all_points[:, 1] - center_y

# Calculate the distance (r) from the center to each point using the hypot function
distances = np.hypot(delta_x, delta_y)

# Calculate the angle (theta) of each point from the center using the arctan2 function
angles = np.arctan2(delta_y, delta_x)

# Convert angles from radians to degrees
angles_degrees_all_points = np.degrees(angles)

# Calculate the differences between each point and the center point
delta_x = x_points - center_x
delta_y = y_points - center_y

# Calculate the distance (r) from the center to each point using the hypot function
distances = np.hypot(delta_x, delta_y)

# Calculate the angle (theta) of each point from the center using the arctan2 function
angles = np.arctan2(delta_y, delta_x)

# Convert angles from radians to degrees
angles_degrees_spiral = np.degrees(angles)

near_points = []
near_distances = []
angle_calc = True
cd_points = [e for e in range(len(x_points))]
for k in tqdm(range(len(all_points))):

    if angle_calc == True:
        values = np.where(
            abs(angles_degrees_all_points[k] - angles_degrees_spiral) < 0.1
        )[0]

        # Sort the array (if not already sorted)
        values = np.sort(values)

        # Calculate the differences between adjacent values
        differences = np.diff(values)

        diff_locs = np.where(differences > window_size)[0]

        # Create a boolean mask to group values within 2000 of each other
        mask = np.zeros(len(values), dtype=int)

        for idx in diff_locs:
            mask[0 : idx + 1] += 1

        mask_unique = np.unique(mask)
        candidate_points = np.array([], dtype=int)
        for g in mask_unique:
            ids_mask = values[np.where(mask == g)[0]]
            if np.all(distances[ids_mask] < all_distances[k]):
                candidate_points = np.append(candidate_points, ids_mask)
    else:
        candidate_points = cd_points

    distances_to_center = np.sqrt(
        (x_points[candidate_points] - all_points[k][0]) ** 2
        + (y_points[candidate_points] - all_points[k][1]) ** 2
    )
    try:
        nearest_idx = np.argmin(distances_to_center)
        near_dist = np.min(distances_to_center)
        point = point_index[candidate_points][nearest_idx]
    except:
        distances_to_center = np.sqrt(
            (x_points - all_points[k][0]) ** 2 + (y_points - all_points[k][1]) ** 2
        )
        nearest_idx = np.argmin(distances_to_center)
        near_dist = np.min(distances_to_center)
        point = point_index[nearest_idx]
    near_points.append(point)
    near_distances.append(near_dist)

## Display unrolling

In [None]:
ad_unrolled_points = np.zeros(len(all_spatial))
ad_unrolled_distances = np.zeros(len(all_spatial))

In [None]:
ad_unrolled_points[dont_remove] = near_points
ad_unrolled_distances[dont_remove] = near_distances

In [None]:
ad_unrolled_points[total_indices] = -1
ad_unrolled_distances[total_indices] = -1

In [None]:
adata.obs["not_removed_from_longitudinal"] = ad_unrolled_points != -1

In [None]:
adata.obsm["X_unrolled"] = np.array([ad_unrolled_points, ad_unrolled_distances]).T

In [None]:
sc.set_figure_params(figsize=(10, 5), dpi=300)
sc.pl.embedding(adata, basis="unrolled", color="topic", title="Neighborhood Unrolled")

### Normalize unrolling

In [None]:
plt.hist(adata.obsm["X_unrolled"][:, 1], bins=100)
plt.show()

In [None]:
adata.obsm["X_unrolled"][:, 1] = np.clip(adata.obsm["X_unrolled"][:, 1], 0, 700)

In [None]:
points = adata.obsm["X_unrolled"]
adata.obs["normalized_crypt_villi"] = [0 for i in range(len(adata.obs.index))]
# Define the number of bins
num_bins = 200

# Bin the points along the x-axis
bins = np.linspace(np.min(points[:, 0]), np.max(points[:, 0] + 1), num_bins + 1)

# Initialize the normalized_crypt list
normalized_crypt = []

# Iterate over each bin
for i in range(num_bins):
    # Find the indices of points within the current bin
    indices = np.where((points[:, 0] >= bins[i]) & (points[:, 0] < bins[i + 1]))[0]
    # print(indices)
    # Extract the y-values of points within the bin
    y_values = points[indices, 1]

    try:
        # Normalize the y-values by dividing by the maximum value in the bin
        normalized_y = y_values / np.max(y_values)

        # Append the normalized y-values to the normalized_crypt list
        adata.obs["normalized_crypt_villi"][indices] = normalized_y.tolist()
    except:
        None
    # normalized_crypt.extend(normalized_y.tolist())

# Convert the normalized_crypt list to a NumPy array
# normalized_crypt = np.array(normalized_crypt)
# adata.obs['normalized_crypt_villi'] = normalized_crypt

In [None]:
adata.obsm["X_unrolled_norm"] = np.array(
    [adata.obsm["X_unrolled"][:, 0], adata.obs["normalized_crypt_villi"].values]
).T

In [None]:
sc.set_figure_params(figsize=(10, 5), dpi=300)
sc.pl.embedding(
    adata,
    basis="unrolled_norm",
    color="topic",
    title="Neighborhood Unrolled Normalized",
)

## Reordering Neighborhoods

In [None]:
topics = adata.obs["topic"]
try:
    normalized_crypt_villi = adata.obs["normalized_crypt_villi"]
except:
    adata.obs["normalized_crypt_villi"] = adata.obsm["X_unrolled"][:, 1]
    normalized_crypt_villi = adata.obs["normalized_crypt_villi"]
# Calculate the mean 'normalized_crypt_villi' for each topic
mean_crypt_villi_per_topic = normalized_crypt_villi.groupby(topics).mean()

# Sort the topics based on the mean 'normalized_crypt_villi' in ascending order
sorted_topics = mean_crypt_villi_per_topic.sort_values().index

# Create a dictionary to map the current topics to the new order
topic_mapping = {
    topic: new_topic for new_topic, topic in enumerate(sorted_topics, start=1)
}

# Update the 'topics' in the adata object
adata.obs["topic"] = topics.map(topic_mapping)

In [None]:
# Calculate the mean 'normalized_crypt_villi' for each topic
mean_crypt_villi_per_topic = adata.obs.groupby("topic")["normalized_crypt_villi"].mean()

# Sort the topics based on the mean 'normalized_crypt_villi' in ascending order
sorted_topics = mean_crypt_villi_per_topic.sort_values().index
# Convert the 'sorted_topics' to a list of strings, as Seurat expects the group names to be strings
sorted_topics_str = [(topic) for topic in sorted_topics]

In [None]:
adata.obs.topic = adata.obs.topic.cat.reorder_categories(sorted_topics_str)

In [None]:
sc.set_figure_params(figsize=(10, 5), dpi=300)
fig = sc.pl.embedding(
    adata, basis="unrolled", color="topic", title="Reference Unrolled", return_fig=True
)
fig.tight_layout()
try:
    os.mkdir(os.path.join(path_adata, "figures", "reference_unrolling"))
except:
    print("Already made unrolling directory")
fig.savefig(os.path.join(path_adata, "figures", "reference_unrolling", "unrolled.png"))

In [None]:
adata.obs.topic = adata.obs.topic.astype(str)

In [None]:
# Use the 'order' parameter in the sc.pl.violin() function to set the order of the violins
ax = sc.pl.violin(
    adata,
    groupby="topic",
    keys="normalized_crypt_villi",
    xlabel="Neighborhood",
    ylabel="Distance from Base of Villi",
    order=sorted_topics_str,
    show=False,
)
plt.savefig(
    os.path.join(
        path_adata, "figures", "reference_unrolling", "neighborhoods_sorted.png"
    )
)

## Cutoff

Make a cutoff on the unrolled slice. It will not throw away cells, just mark them. Do not change the parameters without asking Alex

In [None]:
plt.scatter(
    adata.obsm["X_unrolled"][:, 0],
    adata.obsm["X_unrolled"][:, 1],
    s=0.1,
    linewidths=0.01,
    color=adata.uns["topic_colors"][adata.obs["topic"].values.astype(int) - 1],
)

In [None]:
upper_cutoff = 100000
lower_cutoff = 0

In [None]:
bool_array = (lower_cutoff < adata.obsm["X_unrolled"][:, 0]) & (
    upper_cutoff > adata.obsm["X_unrolled"][:, 0]
)

In [None]:
adata.obs["keep_section_unrolled"] = bool_array

In [None]:
adata.obs["longitudinal"] = adata.obsm["X_unrolled"][:, 0]

In [None]:
sc.pl.embedding(adata, basis="spatial", color="longitudinal", size=2)

In [None]:
adata.write(os.path.join(path_adata, "adatas", "05_reference_unrolled.h5ad"))

## Plot crypt-villi and longitudinal axis

In [None]:
sc.set_figure_params(figsize=(7, 7))
sc.pl.embedding(
    adata,
    basis="spatial",
    color="normalized_crypt_villi",
    title=f"Crypt-Villi Axis",
    size=2,
)

In [None]:
sc.pl.embedding(adata, basis="spatial", color="longitudinal", size=2)