In [1]:
import os
import sys
import numpy as np
import pandas as pd

sys.path.append('/mnt/data1/spatial/tissue-alignment/traversal')
from io_utils import read_image, read_spatial_data, read_transcription_data, read_embedding_data
from graph import compute_distance_matrix, compute_path_idxs, compute_clusters, compute_path_counts

%matplotlib inline

In [2]:
section_path = '/mnt/data1/spatial/data/colon/UC/B'
fullres = False
im = read_image(section_path, fullres)
pos_df, spot_radius = read_spatial_data(section_path, fullres)

## Path Sampling

In [3]:
bounds = np.load('colon-UC-B-boundaries.npy', allow_pickle=True)

In [4]:
aa = set(bounds[0])
bb = set(bounds[-1])
aa_list = pos_df.loc[list(aa)].sort_values('y').index.tolist()
bb_list = pos_df.loc[list(bb)].sort_values('y').index.tolist()

In [5]:
pairs = []
radius = 15
for i, a in enumerate(aa_list):
    for b in bb_list[max(i-radius, 0) : min(i+radius+1, len(bb_list))]:
        pairs.append((a, b))
all_combos = pd.DataFrame(np.asarray(pairs), columns=['start', 'end'])
all_combos.to_csv('path_combos_all.csv', index=False)

In [6]:
n = 99
combos = all_combos.sample(n=n, random_state=42).reset_index(drop=True)
# make sure the start and end from fig 2 NOT included before we add it
assert (~((combos['start'] == 241) & (combos['end'] == 3601))).all()
combos.loc[n] = [241, 3601]
combos.to_csv('path_combos_sampled.csv', index=False)

## Traversal and Length Normalization

In [None]:
model = 'triplet-gi-0999'
embeds = read_embedding_data(section_path, model)
counts = read_transcription_data(section_path)

dists, hex = compute_distance_matrix(embeds, pos_df)

normalized_length = 101

from tqdm import tqdm

for start, end in tqdm(combos.to_numpy(), desc='COMBOS'):
    output_dir = f'sampled_paths/{start}-{end}'
    os.makedirs(output_dir, exist_ok=True)
    uc_path_idxs = compute_path_idxs(dists, hex, start, end)
    uc_clusters = compute_clusters(embeds, embeds[uc_path_idxs])
    uc_path_counts, _ = compute_path_counts(pos_df, counts, uc_path_idxs, uc_clusters, parallel=40)
    uc_path_counts.to_csv(f'{output_dir}/path_counts.csv', index=False)
    xp = np.linspace(0, 1, len(uc_path_idxs))
    x = np.linspace(0, 1, normalized_length)
    out = []
    for i in tqdm(range(uc_path_counts.shape[1])):
        interp = np.interp(x, xp, uc_path_counts.iloc[:, i].to_numpy())
        out.append(interp)
    normalized_path_counts = pd.DataFrame(np.asarray(out).T, columns=counts.columns)
    normalized_path_counts.to_csv(f'{output_dir}/normalized_path_counts.csv', index=False)
