In [None]:
import scanpy as sc
import numpy as np
from tqdm.notebook import tqdm
import scipy.stats as stats
import pandas as pd
import os
import seaborn as sns
import matplotlib.pyplot as plt
import shapely
import glob
from sklearn.neighbors import NearestNeighbors
from PIL import Image, ImageDraw
import numpy as np
from scipy.spatial import cKDTree
import json
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import Input, Embedding, Flatten, Dense, Concatenate
from tensorflow.keras.models import Model
from sklearn.preprocessing import OneHotEncoder
from core_functions.unrolling import *

Enter the uninfected data path

In [None]:
data_dir = r"/mnt/sata1/Analysis_Alex/uninfected"
input_folders = glob.glob(os.path.join(data_dir, "recalculating_axes", "*"))

The images created in the previous script need to be labeled with labelme to create jsons, with polygon points marking the bottom of several villi, segmentations around these villi, and peyers patches. The following code will parse the labelme jsons for every adata. 

In [None]:
for input_file in input_folders:
    json_file_path = os.path.join(input_file, "label_img.json")
    adata = sc.read(os.path.join(input_file, "03_intial_neighborhoods.h5ad"))
    all_spatial = adata.obsm["X_spatial"]
    print(json_file_path)
    # Load the JSON data from the file
    with open(json_file_path, "r") as json_file:
        data = json.load(json_file)

    # Extract relevant information from the JSON data
    image_height = data["imageHeight"]
    image_width = data["imageWidth"]
    image_path = data["imagePath"]
    shapes = data["shapes"]

    for shape in tqdm(shapes):
        label = shape["label"]
        if label == "bottom_keypoint":
            poly = shapely.Polygon(
                np.array(shape["points"]) * adata.uns["unrolling_downsize"]
            )
            x = np.array(poly.boundary.xy[0])
            y = np.array(poly.boundary.xy[1])
    bottom_points = np.array([x, y]).T

    # Process the shapes (annotations)
    villis = []
    peyers = []
    villus_ct = 0
    for shape in tqdm(shapes):
        label = shape["label"]
        if (label == "villlus") or (label == "villus"):
            poly = shapely.Polygon(
                np.array(shape["points"]) * adata.uns["unrolling_downsize"]
            )
            indices = []
            for i in range(len(all_spatial)):
                pt = shapely.Point(all_spatial[i])
                if pt.within(poly):
                    indices.append(i)
            villis.append(indices)
            villus_ct += 1
        elif label == "peyers":
            peyers.append(shape["points"])

        def find_closest_point(target_point, point_array):
            distances = np.linalg.norm(point_array - target_point, axis=1)
            closest_index = np.argmin(distances)
            return point_array[closest_index], np.min(distances)

    total_indices = []
    for ir in peyers:
        ir_ = np.array(ir) * adata.uns["unrolling_downsize"]
        poly = shapely.Polygon(ir_)
        indices = []
        for i in tqdm(range(len(all_spatial))):
            pt = shapely.Point(all_spatial[i])
            if pt.within(poly):
                indices.append(i)
        total_indices.append(indices)

    total_indices = list(
        set([element for sublist in total_indices for element in sublist])
    )

    peyers = np.zeros(len(adata.obs.index))
    peyers[total_indices] = 1
    adata.obs["peyers"] = peyers

    villi_bottoms = []
    for i, e in enumerate(villis):
        points = adata.obsm["X_spatial"][e]
        closest_points = []
        distance = []
        for point in points:
            closest_point, dt = find_closest_point(point, bottom_points)
            closest_points.append(closest_point)
            distance.append(dt)
        # Find the overall closest point
        villi_bottoms.append(closest_points[np.argmin(distance)])

    def euclidean_distance(point1, point2):
        return np.sqrt(np.sum((point1 - point2) ** 2))

    def distances_to_reference(array, reference_point):
        return [euclidean_distance(point, reference_point) for point in array]

    normalized_crypt_villi = np.zeros(len(adata.obs.index))
    for i, e in enumerate(villis):
        reference_point = np.array(villi_bottoms[i])
        array = adata.obsm["X_spatial"][e]
        distances = distances_to_reference(array, reference_point)
        distances = distances / max(distances)
        normalized_crypt_villi[e] = distances

    adata.obs["reference_crypt_villi"] = normalized_crypt_villi

    villi_number = np.zeros(len(adata.obs.index))
    for i, e in enumerate(villis):
        villi_number[e] = i
    adata.obs["villi_number"] = villi_number

    sc.pl.embedding(
        adata,
        basis="spatial",
        color=["villi_number", "reference_crypt_villi", "peyers"],
    )

    adata.write(os.path.join(input_file, "04_axes_in_preparation.h5ad"))