In [1]:
# import packages
import pandas as pd
import numpy as np
import h5py
import glob
import scipy
from scipy.interpolate import interp1d

In [2]:
def get_info(filename):
    with h5py.File(filename, "r") as f:
        dset_names = list(f.keys())
        locations = f["tracks"][:].T
        node_names = [n.decode() for n in f["node_names"][:]]
        track_names = [n.decode() for n in f["track_names"][:]]
    return dset_names, locations, node_names, track_names

def fill_missing(Y, kind="linear"):
    """Fills missing values independently along each dimension after the first."""

    # Store initial shape.
    initial_shape = Y.shape

    # Flatten after first dim.
    Y = Y.reshape((initial_shape[0], -1))

    # Interpolate along each slice.
    for i in range(Y.shape[-1]):
        y = Y[:, i]

        # Build interpolant.
        x = np.flatnonzero(~np.isnan(y))
        f = interp1d(x, y[x], kind=kind, fill_value=np.nan, bounds_error=False)

        # Fill missing
        xq = np.flatnonzero(np.isnan(y))
        y[xq] = f(xq)
        
        # Fill leading or trailing NaNs with the nearest non-NaN values
        mask = np.isnan(y)
        y[mask] = np.interp(np.flatnonzero(mask), np.flatnonzero(~mask), y[~mask])

        # Save slice
        Y[:, i] = y

    # Restore to initial shape.
    Y = Y.reshape(initial_shape)

    return Y

In [3]:
# get list of files from path
raw_data_dir = r"F:\Dropbox (UFL)\sleap_umap_tube_test_03312023\tube_test_analysis\all_matches_videos\\"
filenames_lst = []
for filename in glob.glob(raw_data_dir+'*.h5'):
    filenames_lst.append(str(filename))

In [4]:
# get data and create dataframe from data
axis_lst = ["x", "y"] # give names to x and y coordinate numbers
trial_df_lst = [] # initialize list to add dataframes to
for i in range(len(filenames_lst)): # loop through each trial/h5 file
    df_data_trial = pd.DataFrame() # initialize a dataframe to add the data to
    dset_names, locations, node_names, track_names = get_info(filenames_lst[i]) # extract data from h5 file
    locations = fill_missing(locations) # fill in missing data
    # loop through the data by mouse, node and then also coordinate axis
    for i_mouse in range(locations.shape[3]): 
        for i_node in range(locations.shape[1]):
            for i_axis in range(locations.shape[2]):
                # add names of the mouse, node and axis in column name
                col_name_str = track_names[i_mouse]+"__"+node_names[i_node]+"__"+axis_lst[i_axis]
                df_data_trial[col_name_str] = locations[:,i_node,i_axis,i_mouse]
    # add a column to identify which trial the data is from
    df_data_trial['trial'] = i
    # add all the dataframes of all the trials to a list
    trial_df_lst.append(df_data_trial.copy())

# concat all the dataframes together into a single dataframe
df_all_data = pd.concat(trial_df_lst, axis=0)
df_all_data

Unnamed: 0,track_0__ear__x,track_0__ear__y,track_0__nose__x,track_0__nose__y,track_0__tail_base__x,track_0__tail_base__y,track_0__thorax__x,track_0__thorax__y,track_0__front_foot__x,track_0__front_foot__y,...,track_1__nose__y,track_1__tail_base__x,track_1__tail_base__y,track_1__thorax__x,track_1__thorax__y,track_1__front_foot__x,track_1__front_foot__y,track_1__rear_foot__x,track_1__rear_foot__y,trial
0,556.438477,759.979004,640.454773,832.518127,263.967560,799.843384,419.760437,728.312378,473.044586,851.900085,...,840.003723,1619.995605,759.691345,1476.038696,739.677002,1432.320679,852.020569,1572.259033,856.523376,0
1,571.781555,759.694214,659.949585,823.887512,272.226288,796.264954,432.189423,732.321838,475.448364,855.442505,...,835.684082,1591.834961,760.021973,1416.202881,743.805481,1284.381104,863.635437,1580.007812,860.230286,0
2,588.577881,760.477417,680.339172,827.781311,288.113892,792.017273,463.890991,735.924683,627.482422,856.359253,...,843.688171,1552.063599,759.735107,1364.216797,740.569031,1287.441162,864.243408,1467.488281,860.258301,0
3,623.700134,760.324402,704.254944,836.014404,308.344482,787.635376,499.883392,739.505371,628.538025,856.430542,...,843.937378,1515.923584,760.057800,1319.845215,743.640137,1283.664429,864.071411,1471.894165,864.390869,0
4,656.213013,763.654358,736.066589,836.423523,331.990662,779.740234,527.751282,739.640259,636.138611,859.856567,...,840.173767,1480.045898,775.621399,1308.401978,743.831970,1216.207520,860.015198,1464.272461,864.410095,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
186,296.936609,753.601668,-8.247585,816.164760,12.305551,832.356140,138.069631,696.946136,316.208817,860.223969,...,840.019440,819.683594,796.112183,660.223938,724.355652,475.395740,865.695020,736.293945,848.132202,35
187,296.936609,753.601668,83.662815,808.349721,12.305551,832.356140,147.873566,704.102051,316.701782,860.450684,...,840.001587,800.011902,795.780579,620.024780,727.946106,452.330200,864.078308,735.579529,851.557556,35
188,296.936609,753.601668,175.573214,800.534681,12.305551,832.356140,152.005157,712.358521,320.211365,863.882751,...,823.739075,779.762573,804.034729,599.999084,727.864502,436.444977,864.276794,735.582703,851.917786,35
189,296.936609,753.601668,267.483613,792.719642,12.305551,832.356140,152.050751,712.370300,320.283112,863.906067,...,823.874329,779.766235,804.043457,599.859131,727.888672,436.436523,864.361389,735.552307,851.923462,35


In [5]:
# write the data to a csv file
df_all_data.to_csv(raw_data_dir+"all_matches_videos_node_coordinates.csv")