In [1]:
import scanpy as sc
import os
import numpy as np
from tqdm.notebook import tqdm
import glob
from scipy.spatial import KDTree
import matplotlib.pyplot as plt
from tensorflow import keras

2023-12-11 17:16:37.555845: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2023-12-11 17:16:37.555875: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2023-12-11 17:16:37.556807: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2023-12-11 17:16:37.561674: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


### Put in the path to the reference dataset

In [2]:
merscope_path = r'/projects/2023_Spatial_Paper/Analysis_Alex/merscope_final/SI-Ctrl-L-RAR-R-dist-1-VS120-NP_Beta8'

In [3]:
reference = sc.read(os.path.join(merscope_path, 'adatas', '07_axes_defined_reference_roll1.h5ad'))




### Put in the path to the rest of the MERSCOPE datasets

In [None]:
input_folders = glob.glob('/projects/2023_Spatial_Paper/Analysis_Alex/merscope_final/SI*') 

##### Calculate epithelial distance for every MERSCOPE dataset

In [None]:

sc.set_figure_params(dpi=1000, dpi_save=1000)
for input_file in input_folders:
    for roll in ['roll1', 'roll2']:
        ad = sc.read(os.path.join(input_file, 'adatas', f'06_before_decomposition_model_{roll}.h5ad')) 
        points_epi = ad[ad.obs.Class.isin(['Epithelial'])].obsm['X_spatial']

        all_tree = KDTree(ad.obsm['X_spatial'])
        epi_tree = KDTree(points_epi)
        distances_all, neighbors_all = all_tree.query(ad.obsm['X_spatial'], k=5)
        distances, neighbors = epi_tree.query(ad.obsm['X_spatial'], k=5)
        distance_medians = (np.mean(distances, axis=1)/np.mean(distances_all, axis=1))
        ad.obs['epithelial_distance'] = distance_medians
        ad.obs['epithelial_distance'] = ad.obs['epithelial_distance']/np.percentile(ad.obs['epithelial_distance'], 99)
        fig = sc.pl.embedding(ad, basis = 'spatial', color='epithelial_distance', return_fig=True, show=False, vmax=1, cmap='viridis', size=4)
        fig.tight_layout()
        plt.axis('equal')
        fig.savefig(os.path.join(input_file, 'figures', 'axes', f'spatial_epithelial_{roll}.png'))
        plt.close()
        ad.write(os.path.join(input_file, 'adatas', f'06_before_decomposition_model_{roll}.h5ad'))

##### Use the reference crypt-villus axis labels to predict the crypt-villus axis for all other datasets. Also, normalization of the longitudinal axis

In [5]:
reference = reference[reference.obs['in_villi']]

In [6]:
training_data = reference.obs[reference.obs.columns[reference.obs.columns.str.contains('Topic')]].values
training_labels = reference.obs['normalized_crypt_villi_scaled'].values

In [None]:
# Define model
model = keras.Sequential([
    keras.layers.Dense(64, activation='relu', input_shape=(training_data.shape[1],)),
    keras.layers.Dense(32, activation='relu'),
    keras.layers.Dense(1, activation='sigmoid')
])

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')

# Train the model
model.fit(training_data, training_labels, epochs=10, batch_size=32, verbose= 0)

In [8]:
for input_file in input_folders:
    for roll in ['roll1', 'roll2']:
        adata = sc.read(os.path.join(input_file, 'adatas', f'06_before_decomposition_model_{roll}.h5ad'))
        testing_data = adata.obs[adata.obs.columns[adata.obs.columns.str.contains('Topic')]].values
        predictions = model.predict(testing_data)
        adata.obs['crypt_villi_axis'] = predictions
        try:
            adata.obs['predicted_longitudinal'] = adata.obs['predicted_longitudinal'].replace(-1.0, np.nan)
            adata.obs['predicted_longitudinal'] = adata.obs['predicted_longitudinal']/100000
        except:
            adata.obs['predicted_longitudinal'] = adata.obs['longitudinal'].replace(-1.0, np.nan)
            adata.obs['predicted_longitudinal'] = adata.obs['predicted_longitudinal']/100000        
        
        adata.write(os.path.join(input_file, 'adatas', f'07_axes_defined_{roll}.h5ad'))
        fig = sc.pl.embedding(adata, basis = 'spatial', color='crypt_villi_axis', return_fig=True, show=False, vmax=1, cmap='viridis', size=4)
        fig.tight_layout()
        plt.axis('equal')
        fig.savefig(os.path.join(input_file, 'figures', 'axes', f'spatial_crypt_villi_{roll}.png'))
        plt.close()















































