In [2]:
import pandas as pd
import numpy as np

import plotly.express as px

from scipy.signal import savgol_filter

### Function definitions

In [3]:
def find_binary_sequence_borders(sequence: np.array) -> np.array:
    """Auxfun for finding indices of binary sequence starts and ends
    """
    starts = np.where(np.diff(sequence, prepend=0) == 1)[0]
    ends = np.where(np.diff(sequence, append=0) == -1)[0]
    lengths = (ends - starts) + 1

    return starts, ends, lengths

### Load data

In [6]:
filepath = r"C:\Code\Ambros_analysis\EBBS_workshop\workshop_vid.h5"

df = (
    pd.read_hdf(filepath)
    .astype(float)
    .droplevel("scorer", axis=1)
    .drop("likelihood", axis=1, level="coords")
    .sort_index(axis=1)
    .interpolate("linear")  # länge kann spezifiziert werden
)
individuals = df.columns.get_level_values("individuals").unique()
bodyparts = df.columns.get_level_values("bodyparts").unique()

raw_trace = df.iloc[300:400, 1]

# Smooth traces
for col in range(df.shape[1]):
    df.iloc[:, col] = savgol_filter(df.iloc[:, col], 5, 2)

df = df.astype(int)

smooth_trace = df.iloc[300:400, 1]

plot_comp = pd.DataFrame({"raw": raw_trace, "smooth": smooth_trace})

In [7]:
# Comparison of point trace
px.line(plot_comp, width=800)

ValueError: Mime type rendering requires nbformat>=4.2.0 but it is not installed

### ROI analysis

In [9]:
df_centroid = (
    df
    .loc[:, (individuals, bodyparts, ["x", "y"])]
    .T # because of pandas structure
    .groupby(level=["individuals", "coords"])
    .mean()
    .T   
)

In [10]:
# Building ROI conditions
in_center = pd.DataFrame(index=df_centroid.index, columns=individuals, dtype=bool)

for ind in individuals:
    condition = (
        (df_centroid.loc[:, (ind, "x")].between(400, 600)) & 
        (df_centroid.loc[:, (ind, "y")].between(250, 400))
    )
    in_center.loc[:, ind] = condition.values

In [11]:
print(find_binary_sequence_borders(in_center["black_tail"]))
print(find_binary_sequence_borders(in_center["non-marked"]))
print(find_binary_sequence_borders(in_center.all(axis=1)))

(array([1568, 2166, 3478]), array([1578, 2172, 3491]), array([11,  7, 14]))
(array([ 458, 2060, 2374, 2568, 3766, 4278, 4299, 4706, 5976]), array([ 469, 2076, 2386, 2580, 3793, 4284, 4331, 4723, 6001]), array([12, 17, 13, 13, 28,  7, 33, 18, 26]))
(array([], dtype=int64), array([], dtype=int64), array([], dtype=int64))


### Kinematics

In [13]:
# Calculate velocity and acceleration
df_velocity = df_centroid.diff().fillna(0)
df_acceleration = df_velocity.diff()
df_dist_traveled = pd.DataFrame(index=df_centroid.index, columns=individuals, dtype=float)

for ind in individuals:
    df_dist_traveled.loc[1:, ind] = np.linalg.norm(df_centroid.loc[:len(df_centroid)-2, (ind)].values - 
                                                   df_centroid.loc[1:, (ind)].values, axis=1) 
df_dist_traveled = df_dist_traveled.fillna(0)

In [14]:
real_size_proportion = 266/450 # mm per pixel
framerate = 50

distance_traveled = df_dist_traveled * real_size_proportion
distance_traveled.sum()

individuals
black_tail    18688.956409
non-marked    15436.785934
dtype: float64

In [17]:
average_speed = distance_traveled.groupby(distance_traveled.index//framerate).sum().mean()
print(average_speed)

individuals
black_tail    154.454185
non-marked    127.576743
dtype: float64


For rulebased heuristics, look into the videos, look at example behaviors and try to find metrics in this specific data, then build a rule based on that (eg rearing, head outside of cage edges, acceleration...)

In [18]:
# Calculate nose to nose distances
df_nose = df.loc[:, (individuals, "nose", ["x", "y"])].droplevel("bodyparts", axis=1)
nose_distance = np.linalg.norm(df_nose.iloc[:, :2].values - df_nose.iloc[:, 2:].values, axis=1)

In [19]:
find_binary_sequence_borders(nose_distance < 50)

(array([ 332,  662, 1461, 1882, 2236, 3110, 3524, 4663, 5114, 5481, 5702,
        5707]),
 array([ 334,  679, 1482, 1898, 2256, 3146, 3539, 4676, 5126, 5492, 5705,
        5710]),
 array([ 3, 18, 22, 17, 21, 37, 16, 14, 13, 12,  4,  4]))

In [28]:
plot_df = (
    df.loc[4663:4673]
    .stack(level=["individuals", "bodyparts"], future_stack=True)
    .reset_index()
)

In [29]:
fig = px.scatter(plot_df, x='x', y='y', color="individuals", animation_frame="level_0", width=700, height=500)
fig.update_traces(marker=dict(size=5))

ValueError: Mime type rendering requires nbformat>=4.2.0 but it is not installed

In [26]:
# calculate angles of noses
ind1_vector = (df.loc[:, ("black_tail", "head_centre")] - df.loc[:, ("black_tail", "nose")]).values
ind2_vector = (df.loc[:, ("non-marked", "head_centre")] - df.loc[:, ("non-marked", "nose")]).values

dot_products = np.einsum('ij,ij->i', ind1_vector, ind2_vector)
mag1 = np.linalg.norm(ind1_vector, axis=1)
mag2 = np.linalg.norm(ind2_vector, axis=1)

cos_sim = dot_products / (mag1 * mag2 + 1e-8)
cos_ancos_simgles = np.clip(cos_sim, -1.0, 1.0)
nose_angles = np.degrees(np.arccos(cos_sim))


In [27]:
find_binary_sequence_borders((nose_angles < 180) & (nose_angles > 140) & (nose_distance < 50))

(array([ 662, 3111, 3144, 4663, 5481]),
 array([ 679, 3115, 3146, 4676, 5492]),
 array([18,  5,  3, 14, 12]))

In [30]:
# calculate angles of bodies
ind1_vector = (df.loc[:, ("black_tail", "dorsal_2")] - df.loc[:, ("black_tail", "nose")]).values
ind2_vector = (df.loc[:, ("non-marked", "dorsal_2")] - df.loc[:, ("non-marked", "nose")]).values

dot_products = np.einsum('ij,ij->i', ind1_vector, ind2_vector)
mag1 = np.linalg.norm(ind1_vector, axis=1)
mag2 = np.linalg.norm(ind2_vector, axis=1)

cos_angles = dot_products / (mag1 * mag2 + 1e-8)
cos_angles = np.clip(cos_angles, -1.0, 1.0)
angles = np.degrees(np.arccos(cos_angles))

In [31]:
v1 = df_velocity['black_tail'][['x', 'y']].values
v2 = df_velocity['non-marked'][['x', 'y']].values

dot_products = np.einsum('ij,ij->i', v1, v2)
norms_v1 = np.linalg.norm(v1, axis=1)
norms_v2 = np.linalg.norm(v2, axis=1)

cos_sim = dot_products / (norms_v1 * norms_v2 + 1e-8)
cos_sim = np.clip(cos_sim, -1.0, 1.0)

same_direction = cos_sim > 0.8

In [None]:
# Calculate nose to tail_base distances
df_nose_tb = df.loc[:, (individuals, ["nose", "tail_base"], ["x", "y"])]
nose_tb_distance = np.linalg.norm(df_nose_tb.loc[:, ("black_tail", "nose")].values - 
                               df_nose_tb.loc[:, ("non-marked", "tail_base")].values, axis=1)

In [None]:
find_binary_sequence_borders((nose_tb_distance < 50) & (df_dist_traveled['non-marked'] > 1))