This script matches spots from 2 different channels to their nearest neighbour. 1 spot is assigned only to 1 pair!

## Input:
`.csv` file of spots in each of the 2 channels, containg following columns:
- *img* - image name
- *channel* - channel number the spot belongs to  
- *x, y* and *z* coordiantes

## Output
- `distances.csv` with information on the image name, x,y,z positions of matched spots and the distances between them \[nm\].

# Functions and imports
*also part of `pipelines/fish_utils`*

In [1]:
import pandas as pd
import numpy as np
from collections import defaultdict
from itertools import product
from scipy.optimize import linear_sum_assignment

In [2]:
def detect_spot_pairs(path, out, ch, voxel_size=(300, 130, 130)):
    df = pd.read_csv(path)
    df['img'] = df['img'].apply(lambda x: x.rsplit('_', 1)[0])
    
    result = defaultdict(list)
    voxel_size = np.array(voxel_size)

    # group the DataFrame by 'img' 
    grouped = df.groupby(['img'])
    
    for img, group_df in grouped:
        spot_coords_ch1 = group_df.loc[group_df['channel'] == ch[0], ['z', 'y', 'x']].values
        spot_coords_ch2 = group_df.loc[group_df['channel'] == ch[1], ['z', 'y', 'x']].values

        distances = np.zeros((len(spot_coords_ch1), len(spot_coords_ch2)))

        for (i1,c1), (i2, c2) in product(enumerate(spot_coords_ch1), enumerate(spot_coords_ch2)):
            distances[i1, i2] = np.linalg.norm((c1 - c2)*voxel_size) # np.sqrt(np.sum((c1 - c2)**2))

        row_ind, col_ind = linear_sum_assignment(distances)
        
        for ri, ci in zip(row_ind, col_ind):
            result['img'].append(img[0])
            result['distance_um'].append(distances[ri,ci]/1000)

            
            for dim_i, dim in enumerate('zyx'):
                result[f'{dim}_1'].append(spot_coords_ch1[ri][dim_i])
                result[f'{dim}_2'].append(spot_coords_ch2[ci][dim_i])

    result_df = pd.DataFrame(result)
    
    # add acquisition info and reshape
    df = df.drop(columns=['c','t'])
    right = ["img","x","y","z"] 
    left1 =  ["img","x_1","y_1","z_1"] 
    left2 = ["img","x_2","y_2","z_2"] 
    
    result_df = result_df.merge(df, left_on=left1, right_on=right ,how='left')
    result_df = result_df.merge(df, left_on=left2, right_on=right ,how='left',suffixes=('.1', '.2'))
    
    result_df = result_df.T.drop_duplicates().T
    
    # save to csv
    result_df.to_csv(out, index=False)

# Match spots and calculate distances

In [3]:
in_path = "/home/stumberger/ep2024/example/" #upper level experiment folder
channels = [1,2] # which channels to match
voxel_size=(300, 130, 130) #sizes of zyx [nm]
rel_spot_path = "/detections/merge_shift-corrected.csv" #spot file path relative to in_path

In [4]:
path_spots = f"{in_path}/{rel_spot_path}"
out_distances = f"{in_path}/distances.csv"

data = detect_spot_pairs(path_spots,out_distances,channels,voxel_size)