In [None]:
# Generate files needed for further analysis and behavior classification
# Swap in folder_path to process different H5 datasets
import os
import re
import h5py
import numpy as np
from scipy.interpolate import interp1d
from scipy.signal import savgol_filter
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages
from natsort import natsorted
import pandas as pd

def getfile(file_path):
    with h5py.File(file_path, "r") as f:
        dset_names = list(f.keys())
        locations = f["tracks"][:].T
        node_names = [n.decode() for n in f["node_names"][:]]
        locations = fill_missing(locations)
        BODY_INDEX = 0
        body_loc = locations[:, BODY_INDEX, :, :]
        return body_loc

def fill_missing(Y, kind="linear"):
    initial_shape = Y.shape
    Y = Y.reshape((initial_shape[0], -1))
    for i in range(Y.shape[-1]):
        y = Y[:, i]
        x = np.flatnonzero(~np.isnan(y))
        f = interp1d(x, y[x], kind=kind, fill_value=np.nan, bounds_error=False)
        xq = np.flatnonzero(np.isnan(y))
        y[xq] = f(xq)
        # Fill leading or trailing NaNs with the nearest non-NaN values
        mask = np.isnan(y)
        y[mask] = np.interp(np.flatnonzero(mask), np.flatnonzero(~mask), y[~mask])
        Y[:, i] = y
    Y = Y.reshape(initial_shape)
    return Y

def individual_velocity(file_path, delay):
    body_loc = getfile(file_path)
    delay_frame = int(delay * 60)
    for i in range(0, body_loc.shape[2]):
        filter_input = body_loc[:,:,i]
        distance_list = []
        for k in range(1, len(filter_input)):
            x1, y1 = filter_input[k - 1]  # Previous coordinates
            x2, y2 = filter_input[k]      # Current coordinates
            distance = np.sqrt((x2 - x1)**2 + (y2 - y1)**2)
            distance_list.append(distance)
        column_name = str(file_path) + str(i)
        print(column_name)
        distances = pd.Series(distance_list) 
        zero_series = pd.Series([0] * delay_frame)
        corrected_distances = pd.concat([zero_series, distances], ignore_index=True)
        all_individual_distances[column_name] = corrected_distances
        window_size = 300
        corrected_distances_array = corrected_distances.to_numpy()
        sum_windows = len(corrected_distances_array) // window_size
        distances_collapsed = np.array([np.sum(corrected_distances_array[i * window_size:(i + 1) * window_size]) for i in range(sum_windows)])
        distances_collapsed = distances_collapsed[1:360]
        all_collapsed_individual_distances[column_name] = distances_collapsed

def vector_length(file_path, delay):
    body_loc = getfile(file_path)
    delay_frame = int(delay * 60)
    for i in range(0, body_loc.shape[2]):
        filter_input = body_loc[:,:,i]
        distance_list = []
        window_size = 300
        start_frame = 300 - delay_frame
        vector_windows = (len(filter_input) + delay_frame - 241) // window_size
        for k in range(0, vector_windows):
            x1, y1 = filter_input[start_frame + k * window_size]  # Previous coordinates
            x2, y2 = filter_input[start_frame + (k+1) * window_size]      # Current coordinates
            distance = np.sqrt((x2 - x1)**2 + (y2 - y1)**2)
            distance_list.append(distance)
        column_name = str(file_path) + str(i)
        print(column_name)
        distances = pd.Series(distance_list) 
        all_vector_lengths[column_name] = distances

def max_velocity(file_path, delay):
    body_loc = getfile(file_path)
    delay_frame = int(delay * 60)
    for i in range(0, body_loc.shape[2]):
        filter_input = body_loc[:,:,i]
        distance_list = []
        for k in range(1, len(filter_input)):
            x1, y1 = filter_input[k - 1]  # Previous coordinates
            x2, y2 = filter_input[k]      # Current coordinates
            distance = np.sqrt((x2 - x1)**2 + (y2 - y1)**2)
            distance_list.append(distance)
        column_name = str(file_path) + str(i)
        print(column_name)
        distances = pd.Series(distance_list) 
        zero_series = pd.Series([0] * delay_frame)
        corrected_distances = pd.concat([zero_series, distances], ignore_index=True)
        all_individual_distances[column_name] = corrected_distances
        window_size = 300
        corrected_distances_array = corrected_distances.to_numpy()
        max_windows = len(corrected_distances_array) // window_size
        max_velocity_array = np.array([np.max(corrected_distances_array[i * window_size:(i + 1) * window_size]) for i in range(max_windows)])
        max_velocity = max_velocity_array[1:360]
        all_max_velocity[column_name] = max_velocity

folder_path = '/Users/donglinhan/Desktop/SLEAP/FinalH5/WT-DD2'
h5_files = os.listdir(folder_path)
h5_files = natsorted(h5_files)
all_individual_distances = pd.DataFrame()
all_collapsed_individual_distances = pd.DataFrame()
all_vector_lengths = pd.DataFrame(index = range(370))
all_max_velocity = pd.DataFrame()

for file_name in h5_files:
    if file_name.endswith('.h5'):
        file_parts = file_name.split('_')
        first_six_chars = file_parts[1][:6]
        delay = float(first_six_chars.lstrip("0"))
        print(delay)
        file_path = os.path.join(folder_path, file_name)
        print(file_name)
        individual_velocity(file_path, delay)
        vector_length(file_path, delay)
        max_velocity(file_path, delay)

all_collapsed_individual_distances.to_excel('WT_DD2_collapsed_distances_300.xlsx', index=False)
all_vector_lengths.to_excel('WT_DD2_vector_lengths_300.xlsx', index=False)
all_max_velocity.to_excel('WT_DD2_max_velocity_300.xlsx', index=False)

In [None]:
# Generate behavior classification
# Use outputs from the previous cell to generate behavior classification
import h5py
import numpy as np
from scipy.signal import savgol_filter
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages
import pandas as pd

vector_input = '/Users/donglinhan/Desktop/SLEAP/WT_DD2_vector_lengths_300.xlsx'
distance_input = '/Users/donglinhan/Desktop/SLEAP/WT_DD2_collapsed_distances_300.xlsx'
max_velocity_input = '/Users/donglinhan/Desktop/SLEAP/WT_DD2_max_velocity_300.xlsx'
distances = pd.read_excel(distance_input)
vector_lengths = pd.read_excel(vector_input)
max_velocities = pd.read_excel(max_velocity_input)
total = len(distances.columns)
all_behavior = pd.DataFrame()
for i, column_name in enumerate(distances.columns):
    behavior_list = []
    for k in range (0, 143):
        if distances[column_name][k] >= 175:
            behavior_list.append(2)
        elif vector_lengths[column_name][k] > 5 and distances[column_name][k] < 175:
            if max_velocities[column_name][k] > 4:
                behavior_list.append(2)
            else:
                behavior_list.append(1)
        else:
            behavior_list.append(0)
    behavior = np.array(behavior_list) 
    print(i)
    all_behavior[column_name] = behavior
all_behavior.to_excel('WT_DD2_all_behavior_modified_again.xlsx', index=False)