In [1]:
# import packages
import itertools
import numpy as np
from scipy.spatial.distance import cdist
import pandas as pd
# import modin.pandas as pd # for faster parallelized processing
# from distributed import Client # this is required when using modin with dask

In [2]:
# client = Client() # this is required when using modin with dask

In [3]:
def pairwise_distance_vector(arr_coord):
    p_dsit_arr = pairwise_cdist(arr_coord)      
    # Get the indices of the upper-triangle elements of the matrix
    indices = np.triu_indices(p_dsit_arr.shape[0], k=1)
    # Extract the upper-triangle elements as a 1D vector
    pairwise_dist_vec = p_dsit_arr[indices]
    return pairwise_dist_vec

def pairwise_cdist(coord_lst_arr):
    return cdist(coord_lst_arr, coord_lst_arr)

In [4]:
# import data
raw_data_dir = r"F:\Dropbox (UFL)\sleap_umap_tube_test_03312023\tube_test_analysis\all_matches_videos\\"
coord_filename = r"all_matches_videos_node_coordinates.csv"
df_data = pd.read_csv(raw_data_dir+coord_filename, index_col="Unnamed: 0")
# get all the identifiers in column headers
node_names = ['ear', 'nose', 'tail_base', 'thorax', 'front_foot', 'rear_foot']
track_names = ["track_0", "track_1"]
track_0_node_names = ["track_0__" + s for s in node_names]
track_1_node_names = ["track_1__" + s for s in node_names]
track_node_names = track_0_node_names + track_1_node_names
dist_pair_names = ['__dist_to__'.join(x) for x in list(itertools.combinations(track_node_names, 2))]
axis_names = ["x", "y"]
trials_lst = df_data['trial'].unique().tolist()
df_data

Unnamed: 0,track_0__ear__x,track_0__ear__y,track_0__nose__x,track_0__nose__y,track_0__tail_base__x,track_0__tail_base__y,track_0__thorax__x,track_0__thorax__y,track_0__front_foot__x,track_0__front_foot__y,...,track_1__nose__y,track_1__tail_base__x,track_1__tail_base__y,track_1__thorax__x,track_1__thorax__y,track_1__front_foot__x,track_1__front_foot__y,track_1__rear_foot__x,track_1__rear_foot__y,trial
0,556.438477,759.979004,640.454773,832.518127,263.967560,799.843384,419.760437,728.312378,473.044586,851.900085,...,840.003723,1619.995605,759.691345,1476.038696,739.677002,1432.320679,852.020569,1572.259033,856.523376,0
1,571.781555,759.694214,659.949585,823.887512,272.226288,796.264954,432.189423,732.321838,475.448364,855.442505,...,835.684082,1591.834961,760.021973,1416.202881,743.805481,1284.381104,863.635437,1580.007812,860.230286,0
2,588.577881,760.477417,680.339172,827.781311,288.113892,792.017273,463.890991,735.924683,627.482422,856.359253,...,843.688171,1552.063599,759.735107,1364.216797,740.569031,1287.441162,864.243408,1467.488281,860.258301,0
3,623.700134,760.324402,704.254944,836.014404,308.344482,787.635376,499.883392,739.505371,628.538025,856.430542,...,843.937378,1515.923584,760.057800,1319.845215,743.640137,1283.664429,864.071411,1471.894165,864.390869,0
4,656.213013,763.654358,736.066589,836.423523,331.990662,779.740234,527.751282,739.640259,636.138611,859.856567,...,840.173767,1480.045898,775.621399,1308.401978,743.831970,1216.207520,860.015198,1464.272461,864.410095,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
186,296.936609,753.601668,-8.247585,816.164760,12.305551,832.356140,138.069631,696.946136,316.208817,860.223969,...,840.019440,819.683594,796.112183,660.223938,724.355652,475.395740,865.695020,736.293945,848.132202,35
187,296.936609,753.601668,83.662815,808.349721,12.305551,832.356140,147.873566,704.102051,316.701782,860.450684,...,840.001587,800.011902,795.780579,620.024780,727.946106,452.330200,864.078308,735.579529,851.557556,35
188,296.936609,753.601668,175.573214,800.534681,12.305551,832.356140,152.005157,712.358521,320.211365,863.882751,...,823.739075,779.762573,804.034729,599.999084,727.864502,436.444977,864.276794,735.582703,851.917786,35
189,296.936609,753.601668,267.483613,792.719642,12.305551,832.356140,152.050751,712.370300,320.283112,863.906067,...,823.874329,779.766235,804.043457,599.859131,727.888672,436.436523,864.361389,735.552307,851.923462,35


In [5]:
# coordinate numpy array pairing x and y coordinates
arr_data = np.array(df_data.iloc[:,:-1])
arr_coord_data = arr_data.reshape(arr_data.shape[0], int(arr_data.shape[1]/2), 2)

# calcualte all pairwise distances
# not the msot efficient with a for loop here, but it gets the job done for this amount of data
pairwise_distance_lst = []
for i in range(len(arr_coord_data)): 
    pairwise_distance_lst.append(pairwise_distance_vector(arr_coord_data[i,:,:]))
pairwise_distance_arr = np.array(pairwise_distance_lst)
pairwise_distance_df = pd.DataFrame(pairwise_distance_arr)
pairwise_distance_df.columns = dist_pair_names

pairwise_distance_df

Unnamed: 0,track_0__ear__dist_to__track_0__nose,track_0__ear__dist_to__track_0__tail_base,track_0__ear__dist_to__track_0__thorax,track_0__ear__dist_to__track_0__front_foot,track_0__ear__dist_to__track_0__rear_foot,track_0__ear__dist_to__track_1__ear,track_0__ear__dist_to__track_1__nose,track_0__ear__dist_to__track_1__tail_base,track_0__ear__dist_to__track_1__thorax,track_0__ear__dist_to__track_1__front_foot,...,track_1__nose__dist_to__track_1__tail_base,track_1__nose__dist_to__track_1__thorax,track_1__nose__dist_to__track_1__front_foot,track_1__nose__dist_to__track_1__rear_foot,track_1__tail_base__dist_to__track_1__thorax,track_1__tail_base__dist_to__track_1__front_foot,track_1__tail_base__dist_to__track_1__rear_foot,track_1__thorax__dist_to__track_1__front_foot,track_1__thorax__dist_to__track_1__rear_foot,track_1__front_foot__dist_to__track_1__rear_foot
0,110.998480,295.175212,140.298474,124.112957,139.089796,771.018961,672.008606,1063.557168,919.824296,880.704991,...,404.385710,271.583856,209.001145,348.984959,145.341548,209.156792,107.959356,120.550164,151.365216,140.010779
1,109.061364,301.779352,142.250520,135.822748,148.315976,700.015621,616.846292,1020.053458,844.570795,720.140191,...,414.863551,249.785275,104.268110,396.838364,176.379143,324.443561,100.903852,178.146567,200.964651,295.646319
2,113.797842,302.114831,127.081302,103.474102,188.856442,643.245851,561.937193,963.486004,775.894369,706.524781,...,416.296653,242.874717,144.589644,323.592743,188.822031,284.511896,131.369314,145.567337,158.083904,180.091216
3,110.535306,316.536059,125.554839,96.227830,205.339689,571.992740,499.716686,892.223490,696.344985,668.069092,...,408.261186,226.849856,168.499592,356.109962,196.764495,254.486044,113.243009,125.748722,194.163907,188.230007
4,108.036777,324.621146,130.687005,98.274344,228.052726,503.417645,434.314648,823.919798,652.490132,568.224656,...,401.534949,244.453570,133.951402,381.309715,174.562892,277.007226,90.178900,148.318443,197.065197,248.103870
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13404,311.530950,295.325425,168.667027,108.350048,233.069459,148.441665,114.389700,524.472644,364.462636,210.742926,...,449.947947,310.674374,106.649462,364.501168,174.861035,351.249054,98.284871,232.676341,145.283449,261.488673
13405,220.188693,295.325425,157.066874,108.661742,249.760212,132.099985,109.499801,504.840381,324.105190,190.662676,...,438.043037,279.283617,91.353412,371.552613,192.345729,354.326326,85.220901,215.994094,169.211969,283.525927
13406,130.122179,295.325425,150.685510,112.710388,207.266635,124.211297,91.380735,485.452784,304.153359,178.077422,...,424.707004,262.612688,90.516884,381.112959,195.235282,348.562876,65.150964,212.974786,183.771964,299.392926
13407,48.966262,295.325425,150.638433,112.748036,207.239132,124.238794,91.462374,485.457333,304.011862,178.123390,...,424.751211,262.555290,90.517681,381.107692,195.361505,348.587929,65.171821,212.912544,183.840331,299.374271


In [6]:
# save the dsitances to a csv
dist_filename = r"all_matches_videos_node_distances.csv"
pairwise_distance_df.to_csv(raw_data_dir+dist_filename)