### Load the data

In [None]:
import os
from pathlib import Path
import h5py
import aeon
from aeon.io import video
from aeon.schema.schemas import exp02, social02
import pandas as pd
import numpy as np
import re

directory = '/ceph/aeon/aeon/code/scratchpad/sleap/multi_point_tracking/multi_animal_CameraTop/predictions_social02'
h5_directory = 'AEON4/analyses'
filename = 'CameraTop_2024-02-17T18-00-00_full_pose_id_all_frames.analysis.h5'
filepath = os.path.join(directory, h5_directory, filename)

# Open the HDF5 file in read mode
with h5py.File(filepath, 'r') as f:
    # Extract the tracks
    tracks = f['tracks'][:]
    track_occupancy = f['track_occupancy'][:]
    track_names = f['track_names'][:]
    track_names = [name.decode('utf-8') for name in track_names]
    node_names = f['node_names'][:].astype(str)
    edge_inds = f['edge_inds'][:]
    video_path = f['video_path'][()].decode('utf-8')
print(track_names)
print(tracks.shape)

root = Path("/ceph/aeon/aeon/data/raw/AEON4/social0.2")
start, end = pd.Timestamp("2024-02-17 18:00:00"), pd.Timestamp("2024-02-17 19:00:00"),
centroid_blob_data = aeon.load(root, exp02.CameraTop.Position, start, end)
centroid_blob_data.reset_index(inplace=True)
centroid_blob_data.dropna(inplace=True)
print(centroid_blob_data.shape)

### Calculations and extraction of fighting frames

In [None]:
# Constants
spine2_index = np.where(node_names == 'spine2')[0][0]
cm2px = 5.4  # 1 cm = 5.4 px
fps = 50

# Centroid distances 
centroid_mouse0 = tracks[0, :, spine2_index, :]
centroid_mouse1 = tracks[1, :, spine2_index, :]
centroid_distances = np.linalg.norm(centroid_mouse0 - centroid_mouse1, axis=0)
centroid_distances_ffill = pd.Series(centroid_distances).fillna(method='ffill').to_numpy()

# Internode distances
internode_distances_mouse0 = np.zeros((len(edge_inds), tracks.shape[3]))
internode_distances_mouse1 = np.zeros((len(edge_inds), tracks.shape[3]))
for i, node_pair in enumerate(edge_inds):
    internode_distances_mouse0[i] = np.linalg.norm(tracks[0, :, node_pair[0], :] - tracks[0, :, node_pair[1], :], axis=0)
    internode_distances_mouse1[i] = np.linalg.norm(tracks[1, :, node_pair[0], :] - tracks[1, :, node_pair[1], :], axis=0)
nose_head_distances_mouse0 = internode_distances_mouse0[0,:]
nose_head_distances_mouse1 = internode_distances_mouse1[0,:]
mean_interspinal_distances_mouse0 = np.mean(internode_distances_mouse0[3:,:], axis=0)
mean_interspinal_distances_mouse1 = np.mean(internode_distances_mouse1[3:,:], axis=0)

# Blob speed
dxy = centroid_blob_data[["x", "y"]].diff().values[1:]
dt = (np.diff(centroid_blob_data["time"]) / 1e6).astype(int)  # ms
centroid_blob_data["speed"] = np.concatenate(([0], np.linalg.norm(dxy, axis=1) / dt / cm2px * 1000))  # cm/s
k = np.ones(10) / 10  # running avg filter kernel (10 frames)
centroid_blob_data["speed"] = np.convolve(centroid_blob_data["speed"], k, mode="same")

In [None]:
max_distance = 20 # px
max_nose_head_distance = 7 # px
max_interspinal_distance = 10 # px
min_blob_speed = 3  # cm/s 

# Condition 1: the mice are close to each other
cond1_frames = np.where(centroid_distances_ffill < max_distance)[0]

# Condition 2: the mean internode distances are within a certain range
# Condition 2a: the distance between the mice's noses and heads is within a certain range
cond2a = np.logical_or(nose_head_distances_mouse0 > max_nose_head_distance, nose_head_distances_mouse1 > max_nose_head_distance)
# Condition 2b: the mean distance between the mice's own spine nodes is within a certain range
cond2b = np.logical_or(mean_interspinal_distances_mouse0 > max_interspinal_distance, mean_interspinal_distances_mouse1 > max_interspinal_distance)
# Find frames where conditions 2a or 2b are true
cond2 = np.logical_or(cond2a, cond2b)
cond2_frames = np.where(cond2)[0]

# Condition 3: the speed of the blob is above a certain threshold
cond3_frames = centroid_blob_data[(centroid_blob_data["speed"] > min_blob_speed)].index.values

possible_fights = np.intersect1d(np.intersect1d(cond1_frames, cond2_frames), cond3_frames)
possible_fights

### Divide possible fighting frames into subarrays of consecutive frames = one possible fight

In [None]:
fps = 50
max_frame_gap = fps*4
min_num_frames = int(fps*0.1)

# Divide possible_tube_test_starts into sub_arrays of consecutive frames (allowing for gaps up to a certain max)
diffs = np.diff(possible_fights)
indices = np.where(diffs > max_frame_gap)[0]
indices += 1
possible_fights = np.split(possible_fights, indices)
# Filter sub_arrays to keep only those with more than a certain number of frames
possible_fights = [sub_array for sub_array in possible_fights if len(sub_array) > min_num_frames]
print(len(possible_fights), possible_fights)


max_frame_gap = fps*2
# Include empty frames where the mice were close to each other in the previous frame they were detected
# If these occur close to or during the time of a possible fight, it's likely the mice are fighting and not detected due to weird poses
# These frames will have been dropped by condition 2 but can help connect/extend the possible fights detected above
empty_frames = np.where(np.where((track_occupancy[:, 0] == 0) & (track_occupancy[:, 1] == 0), 1, 0))[0]
empty_frames = np.intersect1d(cond1_frames, empty_frames) # Only select empty frames where the mice were previously close to each other
possible_fights = np.concatenate(possible_fights)
possible_fights_w_empty_frames = np.union1d(possible_fights, empty_frames)
diffs = np.diff(possible_fights_w_empty_frames)
indices = np.where(diffs > max_frame_gap)[0]
indices += 1
possible_fights_w_empty_frames = np.split(possible_fights_w_empty_frames, indices)
# Only keep the subarrays that contain at least one frame from the original possible_fights array
# i.e., don't include subarrays entirely composed of empty frames
check = [any(frame in possible_fights for frame in sub_array) for sub_array in possible_fights_w_empty_frames]
possible_fights = [possible_fights_w_empty_frames[i] for i, val in enumerate(check) if val]
print(len(possible_fights_w_empty_frames), len(possible_fights), possible_fights)

### Filtering of possible fights based on the mean individual speeds of the mice

In [None]:
min_centroid_speed = 20  # cm/s min speed for fighting
min_both_centroid_speed = 15

fights = []
for sub_array in possible_fights:
    start = sub_array[0]-1
    end = sub_array[-1]
    # Clean up identity
    # Trim the centroid data to the frames we are currently considering
    centroid_mouse0_trimmed = centroid_mouse0[:, start:end]
    centroid_mouse1_trimmed = centroid_mouse1[:, start:end]
    # Initialize variables to hold the last known positions of each mouse (used to deal with NaN values in the tracking data)
    last_known_pos0 = centroid_mouse0_trimmed[:, 0]
    last_known_pos1 = centroid_mouse1_trimmed[:, 0]
    # Initialize arrays to hold the cleaned centroid data
    centroid_mouse0_cleaned = centroid_mouse0_trimmed.copy()
    centroid_mouse1_cleaned = centroid_mouse1_trimmed.copy()
    # Loop over the frames from the second frame to the last
    for i in range(1, end-start):
        if np.isnan(centroid_mouse0_trimmed[:, i]).any() and np.isnan(centroid_mouse1_trimmed[:, i]).any():
            continue
        # Calculate the Euclidean distance from each centroid in the current frame to each centroid in the previous frame
        dists = np.zeros((2, 2))
        dists[0, 0] = np.sqrt(np.sum((centroid_mouse0_trimmed[:, i] - last_known_pos0)**2))
        dists[0, 1] = np.sqrt(np.sum((centroid_mouse0_trimmed[:, i] - last_known_pos1)**2))
        dists[1, 0] = np.sqrt(np.sum((centroid_mouse1_trimmed[:, i] - last_known_pos0)**2))
        dists[1, 1] = np.sqrt(np.sum((centroid_mouse1_trimmed[:, i] - last_known_pos1)**2))
        if dists[0, 0] + dists[1, 1] <= dists[0, 1] + dists[1, 0]:
            last_known_pos0 = centroid_mouse0_trimmed[:, i]
            last_known_pos1 = centroid_mouse1_trimmed[:, i] 
        else:
            last_known_pos0 = centroid_mouse1_trimmed[:, i]
            last_known_pos1 = centroid_mouse0_trimmed[:, i]
            centroid_mouse0_cleaned[:, i], centroid_mouse1_cleaned[:, i] = centroid_mouse1_trimmed[:, i].copy(), centroid_mouse0_trimmed[:, i].copy()
    # Calculate centroid speed for each mouse
    mouse0_df = pd.DataFrame(centroid_mouse0_cleaned.T, columns=["x", "y"]).dropna()
    mouse1_df = pd.DataFrame(centroid_mouse1_cleaned.T, columns=["x", "y"]).dropna()
    dt_mouse0 = np.diff(mouse0_df.index.values*1000/fps).astype(int) # ms
    dt_mouse1 = np.diff(mouse1_df.index.values*1000/fps).astype(int) # ms
    dxy_mouse0 = mouse0_df[['x', 'y']].diff().values[1:]
    dxy_mouse1 = mouse1_df[['x', 'y']].diff().values[1:]
    mouse0_df = mouse0_df.iloc[1:]
    mouse1_df = mouse1_df.iloc[1:]
    mouse0_df["speed"] = np.linalg.norm(dxy_mouse0, axis=1) / dt_mouse0 / cm2px * 1000  # cm/s
    mouse1_df["speed"] = np.linalg.norm(dxy_mouse1, axis=1) / dt_mouse1 / cm2px * 1000  # cm/s
    mean_centroid0_speed = mouse0_df["speed"].mean()
    mean_centroid1_speed = mouse1_df["speed"].mean()
    mean_both_centroid_speed = np.mean([mean_centroid0_speed, mean_centroid1_speed])
    # Add to fights list if either of the mice have a speed above the threshold
    if (mean_centroid0_speed > min_centroid_speed or mean_centroid1_speed > min_centroid_speed or mean_both_centroid_speed > min_both_centroid_speed):
        print(mean_centroid0_speed, mean_centroid1_speed)
        # print(mouse1_df)
        fights.append(sub_array)
print(fights)

### Save fights

In [None]:
vid_export_dir = directory + '/fight_videos/'

fight_data = {'start_frame' : [], 'end_frame' : [], 'start_timestamp' : [], 'end_timestamp' : [], 'duration (seconds)' : []}

for subarray in fights:
    metadata_retrieval_matches = re.search(r'(.*?)(\d{4}-\d{2}-\d{2}T\d{2}-\d{2}-\d{2}).*(\d{4}-\d{2}-\d{2}T\d{2}-\d{2}-\d{2})', video_path)
    arena_number_match = re.search(r'AEON(\d)', video_path)
    root = metadata_retrieval_matches.group(1)
    chunk_time = pd.to_datetime(metadata_retrieval_matches.group(3), format='%Y-%m-%dT%H-%M-%S')
    arena_number = arena_number_match.group(1)
    # Option to change the drive
    new_drive = "/ceph/aeon"
    root = re.sub(r'^.*?:', new_drive, root)

    start_frame = subarray[0]
    end_frame = subarray[-1]
    start_timestamp = chunk_time + pd.Timedelta(seconds=start_frame/fps)
    end_timestamp = chunk_time + pd.Timedelta(seconds=end_frame/fps)
    duration = (end_timestamp - start_timestamp).total_seconds()
    # Very short fights are likely to be false positives (errors in tracking)
    if duration > 1:
        fight_data['start_frame'].append(start_frame)
        fight_data['end_frame'].append(end_frame)
        fight_data['start_timestamp'].append(start_timestamp)
        fight_data['end_timestamp'].append(end_timestamp)
        fight_data['duration (seconds)'].append(duration)

        vid_start = start_timestamp - pd.Timedelta(seconds=1)
        vid_end   = end_timestamp + pd.Timedelta(seconds=1)
        frames_info = aeon.load(root, social02.CameraTop.Video, start=vid_start, end=vid_end)
        vid = video.frames(frames_info)
        save_path = vid_export_dir + "AEON" + arena_number + "_CameraTop_" + start_timestamp.strftime('%Y-%m-%dT%H-%M-%S') + "_" + end_timestamp.strftime('%Y-%m-%dT%H-%M-%S') + ".avi"
        video.export(vid, save_path, fps=fps)
fights_df = pd.DataFrame(fight_data)
display(fights_df)
fights_df['start_timestamp'] = fights_df['start_timestamp'].apply(lambda x: x.strftime('%Y-%m-%dT%H-%M-%S'))
fights_df['end_timestamp'] = fights_df['end_timestamp'].apply(lambda x: x.strftime('%Y-%m-%dT%H-%M-%S'))
csv_path = vid_export_dir + "AEON" + arena_number + "_fights.csv"
if not os.path.exists(csv_path):
    fights_df.to_csv(csv_path, index=False)
else:
    existing_fights_df = pd.read_csv(csv_path)
    fights_df = pd.concat([existing_fights_df, fights_df]).drop_duplicates()
    fights_df.to_csv(csv_path, index=False)