In [2]:
import tensorflow as tf
import scanpy as sc
import os
from scipy.spatial import KDTree
from tensorflow import keras
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.decomposition import NMF
from tqdm.notebook import tqdm
from core_functions.neighborhood_decomposition import *
import glob

### Spatial decomposition is calculatedto use as a feature set in defining axes. We perform spatial decomposition in the following script

In [3]:
output_folder = r'D:/amonell/timecourse_final/analysis/cleaned'

In [4]:
input_folders = glob.glob('D:/amonell/timecourse_final/day*')

In [5]:
combined_adata = sc.read(os.path.join(output_folder, 'integrated_celltyped.h5ad'))

### Celltype classes that should remain relatively consistent at different timepoints pi. Since we calculate the dimensionality reduction on all datasets simulataneously, we expect to capture spatially conerved features across timepoints rather than variation caused by a changing infection.

In [6]:

unchanging_type_keys = ['Epithelial', 'Stromal']

In [7]:
combined_adata_no_immune = combined_adata[combined_adata.obs['Class'].isin(unchanging_type_keys)]

  if not is_categorical_dtype(df_full[k]):


In [8]:
unique_batches = np.unique(combined_adata_no_immune.obs.new_batch.values)

In [9]:
nneighbors = 10
dfs = []
for input_file in input_folders:
    adata = combined_adata_no_immune[combined_adata_no_immune.obs['batch'] == os.path.basename(input_file)]
    adata_arr = np.array(adata.X)
    celltype_cluster = adata.obs.index.values
    list_of_arrays = []
    spatial_points = np.array([adata.obsm['X_spatial'][:,0], adata.obsm['X_spatial'][:,1]]).T
    tree = KDTree(spatial_points)
    for i_bac in tqdm(range(len(celltype_cluster))):
        current_cell = celltype_cluster[i_bac]
        distances, neighbors = tree.query(spatial_points[i_bac], k=nneighbors)
        neighbors = np.array(list(neighbors))
        gene_array = np.array(np.sum(adata_arr[neighbors, :], axis=0)).squeeze()
        list_of_arrays.append(gene_array)
    
    X = pd.DataFrame(np.array(list_of_arrays))
    dfs.append(X)

  0%|          | 0/201261 [00:00<?, ?it/s]

  0%|          | 0/248659 [00:00<?, ?it/s]

  0%|          | 0/175239 [00:00<?, ?it/s]

  0%|          | 0/163953 [00:00<?, ?it/s]

  0%|          | 0/188189 [00:00<?, ?it/s]

  0%|          | 0/155762 [00:00<?, ?it/s]

  0%|          | 0/212996 [00:00<?, ?it/s]

In [10]:
X_arr = pd.concat(dfs)

In [12]:
num_neighborhoods = 15
X = X_arr
f = len(X.columns)
n = len(X.index.tolist())

model = NMF(n_components=num_neighborhoods, random_state=0)
W = model.fit_transform(X)
H = model.components_



In [57]:
for input_file in input_folders:
    if os.path.exists(os.path.join(input_file, 'adatas', '05_reference_unrolled.h5ad')):
        adata = sc.read(os.path.join(input_file, 'adatas', '05_reference_unrolled.h5ad'))
    else:
        adata = sc.read(os.path.join(input_file, 'adatas', '05_unrolled.h5ad'))
    
    superclusters = combined_adata[combined_adata.obs['batch'] == os.path.basename(input_file)].obs['Class'].values
    cluster_observations = combined_adata[combined_adata.obs['batch'] == os.path.basename(input_file)].obs[['leiden', 'Sub_leiden', 'Class', 'Type', 'Subtype', 'Immunocentric_Type']]
    cluster_observations.index = [i.split('-')[0] for i in cluster_observations.index.values]
    adata.obs = adata.obs.merge(cluster_observations, left_on='cell', right_index=True, how='left')
    celltype_cluster = adata.obs.index.values
    base_dictionary = {}
    for i in np.unique(celltype_cluster):
        base_dictionary[i] = 0
    nneighbors = 10
    list_of_arrays = []
    adata_epi = adata[adata.obs['Class'].isin(unchanging_type_keys)]
    spatial_points_epi = np.array([adata_epi.obsm['X_spatial'][:,0], adata_epi.obsm['X_spatial'][:,1]]).T
    spatial_points = np.array([adata.obsm['X_spatial'][:,0], adata.obsm['X_spatial'][:,1]]).T
    adata_epi_arr = np.array(adata_epi.X)
    
    tree = KDTree(spatial_points_epi)
    for i_bac in tqdm(range(len(celltype_cluster))):
        current_cell = celltype_cluster[i_bac]
        distances, neighbors = tree.query(spatial_points[i_bac], k=nneighbors)
        neighbors = np.array(list(neighbors))
        gene_array = np.array(np.sum(adata_epi_arr[neighbors, :], axis=0)).squeeze()
        list_of_arrays.append(gene_array)
    
    X = pd.DataFrame(np.array(list_of_arrays)).astype(H.dtype)
    W = model.transform(X)
    
    topics_frame = pd.DataFrame(W)
    
    topics_frame.columns = ['Topic '+str(i+1) for i in range(len(topics_frame.columns))]
    topics_frame.index = adata.obs.index.tolist()
    def zscore(column):
        return (column - column.mean()) / column.std()
    
    # Apply the z-score function to each column in the dataframe
    topics_frame = topics_frame.apply(zscore)
    adata.obs=adata.obs.merge(topics_frame, left_index=True, right_index=True)
    adata.obs['topic'] = pd.Categorical((np.argmax(topics_frame.values, axis = 1)+1).astype(str))

    sc.set_figure_params(dpi=300)
    figure = sc.pl.embedding(adata, basis='spatial', color='topic', vmax=1, cmap='Blues', title='Neighborhood', size=2, show=False, return_fig=True)
    try:
        os.mkdir(os.path.join(input_file,'figures', 'neighborhoods'))
    except:
        print('Figures/neighborhoods already made.')
    figure.tight_layout()
    plt.axis('equal')
    figure.savefig(os.path.join(input_file,'figures', 'neighborhoods', 'neighborhoods.png'))
    plt.close()
    adata.write(os.path.join(input_file, 'adatas', '06_before_decomposition_model.h5ad'))

  if not is_categorical_dtype(df_full[k]):
  if not is_categorical_dtype(df_full[k]):


  0%|          | 0/253155 [00:00<?, ?it/s]

  if not is_categorical_dtype(values):
  color_vector = pd.Categorical(values.map(color_map))
  cax = scatter(


Figures/neighborhoods already made.


  if not is_categorical_dtype(df_full[k]):
  if not is_categorical_dtype(df_full[k]):


  0%|          | 0/325884 [00:00<?, ?it/s]

  if not is_categorical_dtype(values):
  color_vector = pd.Categorical(values.map(color_map))
  cax = scatter(


Figures/neighborhoods already made.


  if not is_categorical_dtype(df_full[k]):
  if not is_categorical_dtype(df_full[k]):


  0%|          | 0/225671 [00:00<?, ?it/s]

  if not is_categorical_dtype(values):
  color_vector = pd.Categorical(values.map(color_map))
  cax = scatter(


Figures/neighborhoods already made.


  if not is_categorical_dtype(df_full[k]):
  if not is_categorical_dtype(df_full[k]):


  0%|          | 0/220663 [00:00<?, ?it/s]

  if not is_categorical_dtype(values):
  color_vector = pd.Categorical(values.map(color_map))
  cax = scatter(


Figures/neighborhoods already made.


  if not is_categorical_dtype(df_full[k]):
  if not is_categorical_dtype(df_full[k]):


  0%|          | 0/276641 [00:00<?, ?it/s]

  if not is_categorical_dtype(values):
  color_vector = pd.Categorical(values.map(color_map))
  cax = scatter(


Figures/neighborhoods already made.


  if not is_categorical_dtype(df_full[k]):
  if not is_categorical_dtype(df_full[k]):


  0%|          | 0/222584 [00:00<?, ?it/s]

  if not is_categorical_dtype(values):
  color_vector = pd.Categorical(values.map(color_map))
  cax = scatter(


Figures/neighborhoods already made.


  if not is_categorical_dtype(df_full[k]):
  if not is_categorical_dtype(df_full[k]):


  0%|          | 0/258006 [00:00<?, ?it/s]

  if not is_categorical_dtype(values):
  color_vector = pd.Categorical(values.map(color_map))
  cax = scatter(


Figures/neighborhoods already made.
