In [7]:
from sklearn.neighbors import NearestNeighbors
import pandas as pd
import numpy as np

This script searches the closest macrophage/monocyte from each smooth muscle cells, based on the mibi image.

Once searched nearest cell, subsequent distanced based analysis can be peformed

In [5]:
# read mibi dataframe
mibidf = pd.read_csv('../../MIBI_analysis/final_data/host_csv/mibi_cell_resultann_0411.csv')
mibidf.shape

(126426, 32)

In [23]:
mibidf["run_fov"] = mibidf["run"] + '_' + mibidf["fov"]
mibidf.run_fov.value_counts()

2023-03-08T18-48-43-DSST3-Run5_fov-1-scan-1    2204
2023-03-07T11-17-56-DSSC2-Run2_fov-2-scan-1    1238
2023-03-19T15-20-40-DSSC4-Run2_fov-3-scan-1    1184
2023-03-05T15-51-42-DSST2-Run1_fov-5-scan-1    1169
2023-03-05T18-22-42-DSST2-Run2_fov-5-scan-1    1150
                                               ... 
2023-03-06T13-40-46-DSST2-Run6_fov-4-scan-1     225
2023-03-28T11-45-15-DSSC1-Run4_fov-6-scan-1     198
2023-03-08T18-48-43-DSST3-Run5_fov-3-scan-1     197
2023-03-05T23-58-06-DSST2-Run4_fov-5-scan-1     181
2023-03-05T23-58-06-DSST2-Run4_fov-2-scan-1      94
Name: run_fov, Length: 202, dtype: int64

Now here is just testing:

In [24]:
temp = mibidf.loc[mibidf.run_fov == '2023-03-08T18-48-43-DSST3-Run5_fov-1-scan-1',:]
locations = temp.loc[:,['x', 'y']].to_numpy()
locations

array([[   5.32830189,  393.09433962],
       [   6.67024129,  331.72117962],
       [   5.91428571,  441.64642857],
       ...,
       [1018.22916667,  731.07638889],
       [1020.58241758,  844.8021978 ],
       [1019.34482759,  886.32758621]])

In [25]:
## get knn function:

def get_spatial_knn_indices(locations, n_neighbors=15, method='kd_tree'):
    """
    Compute k-nearest neighbors of locations.

    Parameters
    ----------
    locations: np.ndarray of shape (n_samples, 2)
        Data matrix
    n_neighbors: int
        Number of nearest neighbors
    method: str, default='kd_tree'
        Method to use when computing the nearest neighbors, one of ['ball_tree', 'kd_tree', 'brute']

    Returns
    -------
    knn_indices: np.ndarray of shape (n_samples, n_neighbors)
        Each row represents the knn of that sample
    """
    locations = np.array(locations)
    assert n_neighbors <= locations.shape[0]
    # k-NN indices, may be asymmetric
    _, knn_indices = NearestNeighbors(
        n_neighbors=n_neighbors, algorithm=method
    ).fit(locations).kneighbors(locations)
    return knn_indices

Metric 1: search nearest 100 cells, and see how long till you see a macrophage for an sma cell

goal is to get per run-fov a medium search value

In [61]:
dd = {'run_fov':[], 'ksearch_medium':[], 'ksearch_dist_medium':[]}

run_fov = np.unique(mibidf.run_fov).tolist()

for rf in run_fov:
    temp = mibidf.loc[mibidf.run_fov == rf,:]
    
    ## check if muscle cell amout is enough
    sma_num = np.sum(temp.annV1 == 'Smooth muscle')
    mac_num = np.sum(temp.annV1 == 'Macrophage') + np.sum(temp.annV1 == 'Monocyte')
    if sma_num < 10 or mac_num < 5: # disregard fovs with too few target cell types
        continue
    if temp.shape[0] < 100: # incase fov does not have much cells
        continue
    else: # enough muscle cell proceed
        locations = temp.loc[:,['x', 'y']].to_numpy()
        k100search = get_spatial_knn_indices(locations, n_neighbors=100) # seach nearest 100 cells
        
        ##### start searching for macrophages for muscle cells
        n = temp.shape[0]
        ann = temp.annV1.to_list()
        knnsearch = []
        knndist = []
        
        for i in range(n):
            if ann[i] == 'Smooth muscle':
                knnlist = k100search[i]
                count = -1
                for index in knnlist:
                    if ann[index] == 'Macrophage' or ann[index] == 'Monocyte':
                        break
                    else:
                        count = count + 1
                knnsearch.append(count)
                ## also calculate euclidean distance
                tarindx = knnlist[count]
                tar_xy = locations[tarindx,:]
                orig_xy = locations[i,:]
                dist = np.linalg.norm(orig_xy - tar_xy) * (400/1024) # convert to um
                knndist.append(dist)
                
        result = np.median(knnsearch)
        result_dist = np.median(knndist)
        ###### end, save result
        dd['run_fov'].append(rf)
        dd['ksearch_medium'].append(result)
        dd['ksearch_dist_medium'].append(result_dist)
               

In [62]:
ksearch = pd.DataFrame.from_dict(dd)
ksearch

Unnamed: 0,run_fov,ksearch_medium,ksearch_dist_medium
0,2023-03-04T16-22-15-DSSC1-Run1_fov-3-scan-1,11.5,20.944791
1,2023-03-04T16-22-15-DSSC1-Run1_fov-4-scan-1,20.5,31.201222
2,2023-03-04T16-22-15-DSSC1-Run1_fov-5-scan-1,8.0,15.914268
3,2023-03-04T19-46-22-DSSC1-Run2_fov-1-scan-1,26.0,31.472637
4,2023-03-04T19-46-22-DSSC1-Run2_fov-2-scan-1,32.0,33.831375
...,...,...,...
138,2023-03-28T11-45-15-DSSC1-Run4_fov-2-scan-1,32.5,35.317639
139,2023-03-28T11-45-15-DSSC1-Run4_fov-3-scan-1,47.0,53.464502
140,2023-03-28T11-45-15-DSSC1-Run4_fov-4-scan-1,28.0,31.207425
141,2023-03-28T11-45-15-DSSC1-Run4_fov-5-scan-1,21.0,32.609008


In [63]:
ksearch.to_csv('../data/KNN100_searchMhigh.csv', index = False)

ok next version

## Cnbhd approach and get percentage

In [64]:
def get_neighborhood_composition(knn_indices, labels):
    """
    Compute the composition of neighbors for each sample.
    Parameters
    ----------
    knn_indices: np.ndarray of shape (n_samples, n_neighbors)
        Each row represents the knn of that sample
    labels: np.ndarray of shape (n_samples, )
        Cluster labels

    Returns
    -------
    comp: np.ndarray of shape (n_samples, n_neighbors)
        The composition (in proportion) of neighbors for each sample.
    """
    labels = list(labels)
    n, k = knn_indices.shape
    unique_clusters = np.sort(np.unique(labels))
    n_clusters = len(unique_clusters)
    label_to_clust_idx = {label: i for i, label in enumerate(unique_clusters)}

    comp = np.zeros((n, n_clusters))
    for i, neighbors in enumerate(knn_indices):
        good_neighbors = [nb for nb in neighbors if nb != -1]
        for nb in good_neighbors:
            comp[i, label_to_clust_idx[labels[nb]]] += 1

    return (comp.T / comp.sum(axis=1)).T

In [129]:
dd = {'run_fov':[], 'perc_medium':[], 'perc_mean':[]}

run_fov = np.unique(mibidf.run_fov).tolist()

for rf in run_fov:
    temp = mibidf.loc[mibidf.run_fov == rf,:]
    
    ## check if muscle cell amout is enough
    sma_num = np.sum(temp.annV1 == 'Smooth muscle')
    mac_num = np.sum(temp.annV1 == 'Macrophage') + np.sum(temp.annV1 == 'Monocyte')
    if sma_num < 10 or mac_num < 5: # disregard fovs with too few target cell types
        continue
    if temp.shape[0] < 100: # incase fov does not have much cells
        continue
    else: # enough muscle cell proceed
        locations = temp.loc[:,['x', 'y']].to_numpy()
        k20search = get_spatial_knn_indices(locations, n_neighbors=20) # seach nearest 100 cells
        
        ##### start searching for macrophages for muscle cells
        n = temp.shape[0]
        ann = temp.annV1.to_list()
        
        # replace counting
        ann = list(map(lambda x: x.replace('Macrophage', 'M'), ann))
        ann = list(map(lambda x: x.replace('Monocyte', 'M'), ann))
        nbhd = get_neighborhood_composition(k20search, ann)
        
        ####
        sma_idx = []
        for i, x in enumerate(ann):
            if x == 'Smooth muscle':
                sma_idx.append(i)
        
        unique_clusters = np.sort(np.unique(ann))
        tid = np.where(unique_clusters == 'M')[0][0]
        nbhd_mean = np.mean(nbhd[sma_idx,tid])
        nbhd_mediam = np.median(nbhd[sma_idx,tid])
        ##
        
        dd['run_fov'].append(rf)
        dd['perc_medium'].append(nbhd_mediam)
        dd['perc_mean'].append(nbhd_mean)
    #break
               

In [130]:
knbhd = pd.DataFrame.from_dict(dd)
knbhd

Unnamed: 0,run_fov,perc_medium,perc_mean
0,2023-03-04T16-22-15-DSSC1-Run1_fov-3-scan-1,0.05,0.057895
1,2023-03-04T16-22-15-DSSC1-Run1_fov-4-scan-1,0.00,0.046053
2,2023-03-04T16-22-15-DSSC1-Run1_fov-5-scan-1,0.10,0.083333
3,2023-03-04T19-46-22-DSSC1-Run2_fov-1-scan-1,0.00,0.048551
4,2023-03-04T19-46-22-DSSC1-Run2_fov-2-scan-1,0.00,0.042241
...,...,...,...
138,2023-03-28T11-45-15-DSSC1-Run4_fov-2-scan-1,0.00,0.021053
139,2023-03-28T11-45-15-DSSC1-Run4_fov-3-scan-1,0.00,0.015217
140,2023-03-28T11-45-15-DSSC1-Run4_fov-4-scan-1,0.00,0.031818
141,2023-03-28T11-45-15-DSSC1-Run4_fov-5-scan-1,0.00,0.043548


In [131]:
knbhd.to_csv('../data/Knbhd_searchMhigh.csv', index = False)

In [19]:
n = temp.shape[0]
ann = temp.annV1.to_list()
knnsearch = []

for i in range(n):
    if ann[i] == 'Smooth muscle':
        knnlist = k100search[i]
        
        count = -1
        for index in knnlist:
            if ann[index] == 'Macrophage':
                break
            else:
                count = count + 1
                
        knnsearch.append(count)
                       

In [21]:
np.median(knnsearch)

23.0