In [1]:
import cv2
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import pathlib
import re
from behavioral_analysis.pandas_tools.files import build_file_df
from behavioral_analysis.utility.builtin_classes.iterables import zip_unequal
from behavioral_analysis.utility.builtin_classes.objects import load_object
from behavioral_analysis.visualization.cv2_funcs import perspective_transform_arena, fig2cv2
from behavioral_analysis.visualization.matplotlib_funcs import create_canvas, get_function_added_artists
from behavioral_analysis.visualization.plots import get_arena_polygon_from_position, plot_arena, load_arena_from_json
from behavioral_analysis.visualization.video_funcs import write_video_to_file, get_video_shape
from behavioral_analysis.visualization.video_overlays import KeyPointOverlay
from joblib import Parallel, delayed
from shapely import Point
from shapely.ops import unary_union, nearest_points
from tqdm.cli import tqdm

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
root_dir = pathlib.Path(r"D:\Folder With Backup\2019_paper\data")
plot_dir = root_dir / "plot_output"
data_out_dir = root_dir / "data_output"

verification_videos_dir = plot_dir / "verification" / "tracked_videos"
annotations_dir = root_dir / "annotations"
tracking_dir = root_dir / "tracking"

videos_dir = root_dir / "videos_remuxed"

In [5]:
def default_correction(raw_track_df, arena_dict, track_arena_transforms):
    arena_bounds = unary_union(list(arena_dict.values())).bounds
    x_bounds = arena_bounds[0], arena_bounds[2]
    y_bounds = arena_bounds[1], arena_bounds[3]

    video_selection_frame_indices = track_arena_transforms.index

    full_transformed_track_df_list = []
    for start_frame, end_frame in zip_unequal(video_selection_frame_indices, video_selection_frame_indices[1:], fill_value=None):
        flattened_arena_transform = track_arena_transforms.loc[start_frame]
        arena_transform = np.reshape(flattened_arena_transform, (3, 3))
        inverse_transform = np.linalg.inv(arena_transform)

        raw_track_df_slice = raw_track_df.loc[start_frame:end_frame, "spine_cervical"]
        transformed_track_array = cv2.perspectiveTransform(raw_track_df_slice[["x", "y"]].astype(float).values[np.newaxis], inverse_transform)[0]
        partial_transformed_track_df = pd.DataFrame(data=transformed_track_array, index=raw_track_df_slice.index, columns=["x", "y"])
        partial_transformed_track_df["clipped_x"] = np.clip(partial_transformed_track_df["x"].values, *x_bounds)
        partial_transformed_track_df["clipped_y"] = np.clip(partial_transformed_track_df["y"].values, *y_bounds)
        full_transformed_track_df_list.append(partial_transformed_track_df)

    transformed_track_df = pd.concat(full_transformed_track_df_list)
    transformed_track_df["containing_polygon"] = transformed_track_df.apply(lambda r: get_arena_polygon_from_position(arena_dict, (r[["x", "y"]].values), outside_behavior="closest"), axis=1)

    return transformed_track_df


def expanded_looming_correction(raw_track_df, arena_dict, track_arena_transforms, y_correction=20):
    video_selection_frame_indices = track_arena_transforms.index

    full_transformed_track_df_list = []
    for start_frame, end_frame in zip_unequal(video_selection_frame_indices, video_selection_frame_indices[1:], fill_value=None):
        flattened_arena_transform = track_arena_transforms.loc[start_frame]
        arena_transform = np.reshape(flattened_arena_transform, (3, 3))
        inverse_transform = np.linalg.inv(arena_transform)

        raw_track_df_slice = raw_track_df.loc[start_frame:end_frame]

        transformed_arena = perspective_transform_arena(arena_dict, arena_transform)
        transformed_arena_union = unary_union(list(transformed_arena.values()))

        corrected_track_df = pd.DataFrame([], index=raw_track_df_slice.index, columns=["x", "y"])
        corrected_track_df["x"] = raw_track_df_slice.loc[:, ("spine_cervical", "x")]
        corrected_track_df["y"] = raw_track_df_slice.loc[:, pd.IndexSlice[:, "y"]].max(axis=1) + y_correction

        clipped_track_df = pd.Series(nearest_points(transformed_arena_union, corrected_track_df.apply(lambda x: Point(x), axis=1).values)[0])  # get nearest points
        clipped_track_df = clipped_track_df.apply(lambda p: pd.Series(np.array(p.xy).squeeze(), index=["clipped_x", "clipped_y"]))  # unpack points to df

        transformed_track_array = cv2.perspectiveTransform(corrected_track_df[["x", "y"]].astype(float).values[np.newaxis], inverse_transform)[0]
        partial_transformed_track_df = pd.DataFrame(data=transformed_track_array, index=corrected_track_df.index, columns=["x", "y"])

        transformed_clipped_array = cv2.perspectiveTransform(clipped_track_df[["clipped_x", "clipped_y"]].astype(float).values[np.newaxis], inverse_transform)[0]
        partial_transformed_clipped_df = pd.DataFrame(data=transformed_clipped_array, index=corrected_track_df.index, columns=["clipped_x", "clipped_y"])

        partial_transformed_track_df = pd.concat([partial_transformed_track_df, partial_transformed_clipped_df], axis=1)
        full_transformed_track_df_list.append(partial_transformed_track_df)

    transformed_track_df = pd.concat(full_transformed_track_df_list)
    transformed_track_df["containing_polygon"] = transformed_track_df.apply(lambda r: get_arena_polygon_from_position(arena_dict, (r[["x", "y"]].values), outside_behavior="closest"), axis=1)

    return transformed_track_df


def transform_raw_tracks(expanded_arena_transforms_path, arena_dict_path, raw_track_dir, processed_track_out_dir, correction_func=None):
    correction_func = default_correction if correction_func is None else correction_func

    expanded_arena_transforms = pd.read_csv(expanded_arena_transforms_path, index_col=[0, 1])
    arena_dict = load_object(arena_dict_path)

    raw_csv_files = [f for f in os.listdir(raw_track_dir) if f.endswith(".csv")]
    os.makedirs(processed_track_out_dir, exist_ok=True)

    for f in tqdm(raw_csv_files):
        raw_csv_path = os.path.join(raw_track_dir, f)
        corresponding_video_name = f.replace(".csv", ".mp4")

        video_arena_transforms = expanded_arena_transforms.loc[corresponding_video_name]

        # we expect only one trace in every file, and drop the scores
        raw_track_df = pd.read_csv(raw_csv_path, index_col=[0, 1], header=[0, 1]).loc["track_0"].drop("instance", level=0, axis=1).drop("score", level=1, axis=1)

        transformed_track_df = correction_func(raw_track_df, arena_dict, video_arena_transforms)

        out_path = os.path.join(processed_track_out_dir, f.replace(".csv", "_processed.csv"))
        transformed_track_df.to_csv(out_path)


def write_transform_verification_video(out_path, source_video_path, raw_track_df, arena_dict, track_arena_transforms, correction_func=None, show_progress=True, reader_kwargs=None, **correction_kwargs):
    correction_func = default_correction if correction_func is None else correction_func
    skeleton_df = pd.DataFrame([["nose", "spine_cervical"], ["spine_cervical", "tail_base"]], index=pd.RangeIndex(2, name="edge_index"), columns=pd.Index(["node_0", "node_1"], name="edge_feature"))
    video_shape = get_video_shape(source_video_path)
    keypoint_canvas = create_canvas(*video_shape[:2][::-1], dpi=50)

    try:
        keypoint_overlay = KeyPointOverlay(initial_fig=keypoint_canvas, track_df=raw_track_df, skeleton_df=skeleton_df, keypoint_kwargs=dict(s=150*4))

        raw_track_df = raw_track_df.loc["track_0"]  # we assume there is only one track
        corrected_track_df = correction_func(raw_track_df, arena_dict, track_arena_transforms, **correction_kwargs)
        corrected_track_df = corrected_track_df.reindex(index=pd.RangeIndex(corrected_track_df.index.min(), corrected_track_df.index.max()+1, name=corrected_track_df.index.name))

        video_selection_frame_indices = track_arena_transforms.index
        full_transformed_track_df_list = []
        transformed_arena_dict = {}
        for start_frame, end_frame in zip_unequal(video_selection_frame_indices, video_selection_frame_indices[1:], fill_value=None):
            flattened_arena_transform = track_arena_transforms.loc[start_frame]
            arena_transform = np.reshape(flattened_arena_transform, (3, 3))

            transformed_arena_dict[start_frame] = perspective_transform_arena(arena_dict, arena_transform)

            corrected_track_df_slice = corrected_track_df.loc[start_frame:end_frame]

            partial_transformed_track_df = pd.DataFrame([], index=corrected_track_df_slice.index, columns=["x", "y", "clipped_x", "clipped_y"])
            partial_transformed_track_df[["x", "y"]] = cv2.perspectiveTransform(corrected_track_df_slice[["x", "y"]].astype(float).values[np.newaxis], arena_transform)[0]
            partial_transformed_track_df[["clipped_x", "clipped_y"]] = cv2.perspectiveTransform(corrected_track_df_slice[["clipped_x", "clipped_y"]].astype(float).values[np.newaxis], arena_transform)[0]
            full_transformed_track_df_list.append(partial_transformed_track_df)

        transformed_track_df = pd.concat(full_transformed_track_df_list)

        def _plot_estimated_position(frame_index):
            if frame_index not in transformed_track_df.index:
                added_artists = []
            else:
                added_artists = keypoint_canvas.gca().plot(transformed_track_df.loc[frame_index, "clipped_x"], transformed_track_df.loc[frame_index, "clipped_y"], 
                                                           color="green", marker="o", ms=20)
            canvas_array = fig2cv2(keypoint_canvas)
            for added_artist in added_artists:
                added_artist.remove()
            return canvas_array
        

        def _plot_transformed_arena(frame_index):
            transformed_arena = {}
            for start_frame, range_transformed_arena in transformed_arena_dict.items():
                if frame_index >= start_frame:
                    transformed_arena = range_transformed_arena
                else:
                    break
            
            arena_artists = get_function_added_artists(plot_arena, transformed_arena, ax=keypoint_canvas.gca(), plot_labels=False)
            canvas_array = fig2cv2(keypoint_canvas)

            for new_artist in arena_artists:
                new_artist.remove()
            return canvas_array

        write_video_to_file(out_path, source_video_path, rgba_overlay_functions=[_plot_transformed_arena, keypoint_overlay.get_frame_overlay, _plot_estimated_position], show_progress=show_progress, reader_kwargs=reader_kwargs)

    finally:
        plt.close(keypoint_canvas)

# Verification

## Looming Stimulus

### Cohort 1

In [15]:
local_annotations_dir = annotations_dir / "looming_stimulus" / "cohort2"
arena_dict_path = local_annotations_dir / "arena_dict.json"
expanded_arena_transforms_path = local_annotations_dir / "expanded_arena_transforms.csv"

local_videos_dir = videos_dir / "looming_stimulus" / "cohort2"
local_tracking_dir = tracking_dir / "looming_stimulus" / "cohort2"

local_verification_videos_dir = verification_videos_dir / "looming_stimulus" / "cohort2"
local_verification_videos_dir.mkdir(parents=True, exist_ok=True)

In [39]:
rng = np.random.default_rng(12312415)
n_videos = 32
batch_size = 4

video_regex = r"(?P<date_string>\d+)_mouse(?P<mouse_id>\d+.\d+)_loom(?P<loom_id>\d+)_remuxed\.mp4"

arena_dict = load_arena_from_json(arena_dict_path)

expanded_arena_transforms = pd.read_csv(expanded_arena_transforms_path, index_col=[0, 1, 2])

out_dir = os.path.join(root_dir, r"verification\tracking_videos")
os.makedirs(out_dir, exist_ok=True)

video_file_df = build_file_df(str(local_videos_dir), video_regex)
video_file_df = video_file_df.sample(n=n_videos, random_state=rng)
video_names = video_file_df["file_path"].apply(os.path.basename)

def _single_transform_verification_video(video_name, show_progress=False):
    out_path = local_verification_videos_dir / video_name.replace(".mp4", "_Tracked.mp4")
    regex_dict = re.search(video_regex, video_name).groupdict()

    if out_path.exists():
        return

    source_video_path = str(local_videos_dir / video_name)

    raw_csv_path = str(local_tracking_dir / video_name.replace("_remuxed.mp4", ".csv"))
    raw_track_df = pd.read_csv(raw_csv_path, index_col=[0, 1], header=[0, 1]).drop("instance", level=0, axis=1).drop("score", level=1, axis=1)
    track_arena_transforms = expanded_arena_transforms.loc[(regex_dict["mouse_id"], int(regex_dict["loom_id"]))]

    write_transform_verification_video(str(out_path), source_video_path, raw_track_df, arena_dict, track_arena_transforms, correction_func=expanded_looming_correction, show_progress=show_progress, y_correction=20)

In [33]:
video_names.iloc[1]

'20241107_mouse3_4_loom9_remuxed.mp4'

In [40]:
_single_transform_verification_video(video_names.iloc[1], show_progress=True)

  track_arena_transforms = expanded_arena_transforms.loc[(regex_dict["mouse_id"], int(regex_dict["loom_id"]))]
100%|██████████| 898/898 [02:46<00:00,  5.38it/s]


In [7]:
with Parallel(n_jobs=batch_size) as parallel:
    parallel(delayed(_single_transform_verification_video)(video_name) for video_name in tqdm(video_names))

100%|██████████| 32/32 [35:38<00:00, 66.84s/it]


### Cohort 2

In [41]:
local_annotations_dir = annotations_dir / "looming_stimulus" / "cohort1"
arena_dict_path = local_annotations_dir / "arena_dict.json"
expanded_arena_transforms_path = local_annotations_dir / "expanded_arena_transforms.csv"

local_videos_dir = videos_dir / "looming_stimulus" / "cohort1"
local_tracking_dir = tracking_dir / "looming_stimulus" / "cohort1"

local_verification_videos_dir = verification_videos_dir / "looming_stimulus" / "cohort1"
local_verification_videos_dir.mkdir(parents=True, exist_ok=True)

In [42]:
rng = np.random.default_rng(12312415)
n_videos = 32
batch_size = 4

video_regex = r"(?P<date_string>\d+)_mouse(?P<mouse_id>\d+.\d+)_loom(?P<loom_id>\d+)_remuxed\.mp4"

arena_dict = load_arena_from_json(arena_dict_path)

expanded_arena_transforms = pd.read_csv(expanded_arena_transforms_path, index_col=[0, 1, 2])

out_dir = os.path.join(root_dir, r"verification\tracking_videos")
os.makedirs(out_dir, exist_ok=True)

video_file_df = build_file_df(str(local_videos_dir), video_regex)
video_file_df = video_file_df.sample(n=n_videos, random_state=rng)
video_names = video_file_df["file_path"].apply(os.path.basename)

def _single_transform_verification_video(video_name, show_progress=False):
    out_path = local_verification_videos_dir / video_name.replace(".mp4", "_Tracked.mp4")
    regex_dict = re.search(video_regex, video_name).groupdict()

    if out_path.exists():
        return

    source_video_path = str(local_videos_dir / video_name)

    raw_csv_path = str(local_tracking_dir / video_name.replace("_remuxed.mp4", ".csv"))
    raw_track_df = pd.read_csv(raw_csv_path, index_col=[0, 1], header=[0, 1]).drop("instance", level=0, axis=1).drop("score", level=1, axis=1)
    track_arena_transforms = expanded_arena_transforms.loc[(regex_dict["mouse_id"], int(regex_dict["loom_id"]))]

    write_transform_verification_video(str(out_path), source_video_path, raw_track_df, arena_dict, track_arena_transforms, correction_func=expanded_looming_correction, show_progress=show_progress, y_correction=20)

In [43]:
video_names.iloc[0]

'20210516_mouse1_4_loom0_remuxed.mp4'

In [44]:
_single_transform_verification_video(video_names.iloc[0], show_progress=True)

100%|██████████| 906/906 [02:47<00:00,  5.42it/s]


In [31]:
with Parallel(n_jobs=batch_size) as parallel:
    parallel(delayed(_single_transform_verification_video)(video_name) for video_name in tqdm(video_names))


[A
[A
[A
100%|██████████| 32/32 [00:03<00:00,  9.92it/s]


## Rat Odor

### Cohort 1

In [45]:
local_annotations_dir = annotations_dir / "rat_odor" / "cohort1"
arena_dict_path = local_annotations_dir / "arena_dict.json"
expanded_arena_transforms_path = local_annotations_dir / "expanded_arena_transforms.csv"

local_videos_dir = videos_dir / "rat_odor" / "cohort1"
local_tracking_dir = tracking_dir / "rat_odor" / "cohort1"

local_verification_videos_dir = verification_videos_dir / "rat_odor" / "cohort1"
local_verification_videos_dir.mkdir(parents=True, exist_ok=True)

In [53]:
rng = np.random.default_rng(12312415)
n_videos = 4
batch_size = 4
start_frame = 25 * 120
end_frame = start_frame + 25 * 5

video_regex = r"(?P<date_string>\d+)_mouse(?P<mouse_id>\d+.\d+)_(?P<phase>\w+)_remuxed\.mp4"

arena_dict = load_arena_from_json(arena_dict_path)

expanded_arena_transforms = pd.read_csv(expanded_arena_transforms_path, index_col=[0, 1, 2])

out_dir = os.path.join(root_dir, r"verification\tracking_videos")
os.makedirs(out_dir, exist_ok=True)

video_file_df = build_file_df(str(local_videos_dir), video_regex)
video_file_df = video_file_df.sample(n=n_videos, random_state=rng)
video_names = video_file_df["file_path"].apply(os.path.basename)

def _single_transform_verification_video(video_name, show_progress=False):
    out_path = local_verification_videos_dir / video_name.replace(".mp4", "_Tracked.mp4")
    regex_dict = re.search(video_regex, video_name).groupdict()

    if out_path.exists():
        return

    source_video_path = str(local_videos_dir / video_name)

    raw_csv_path = str(local_tracking_dir / video_name.replace("_remuxed.mp4", ".csv"))
    raw_track_df = pd.read_csv(raw_csv_path, index_col=[0, 1], header=[0, 1]).drop("instance", level=0, axis=1).drop("score", level=1, axis=1)
    track_arena_transforms = expanded_arena_transforms.loc[(regex_dict["mouse_id"], regex_dict["phase"])]

    write_transform_verification_video(str(out_path), source_video_path, raw_track_df, arena_dict, track_arena_transforms, correction_func=default_correction, reader_kwargs=dict(start_frame=start_frame, end_frame=end_frame), show_progress=show_progress)

In [49]:
video_names.iloc[2]

'20210528_mouse1_1_WithOdor_remuxed.mp4'

In [54]:
_single_transform_verification_video(video_names.iloc[2], show_progress=True)

  track_arena_transforms = expanded_arena_transforms.loc[(regex_dict["mouse_id"], regex_dict["phase"])]
100%|██████████| 124/124 [00:25<00:00,  4.85it/s]


In [74]:
with Parallel(n_jobs=batch_size) as parallel:
    parallel(delayed(_single_transform_verification_video)(video_name) for video_name in tqdm(video_names))









100%|██████████| 4/4 [00:00<00:00, 190.47it/s]


### Cohort 2

In [55]:
local_annotations_dir = annotations_dir / "rat_odor" / "cohort2"
arena_dict_path = local_annotations_dir / "arena_dict.json"
expanded_arena_transforms_path = local_annotations_dir / "expanded_arena_transforms.csv"

local_videos_dir = videos_dir / "rat_odor" / "cohort2"
local_tracking_dir = tracking_dir / "rat_odor" / "cohort2"

local_verification_videos_dir = verification_videos_dir / "rat_odor" / "cohort2"
local_verification_videos_dir.mkdir(parents=True, exist_ok=True)

In [56]:
rng = np.random.default_rng(12312415)
n_videos = 4
batch_size = 4
start_frame = 25 * 120
end_frame = start_frame + 25 * 5

video_regex = r"(?P<date_string>\d+)_mouse(?P<mouse_id>\d+.\d+)_(?P<phase>\w+)_remuxed\.mp4"

arena_dict = load_arena_from_json(arena_dict_path)

expanded_arena_transforms = pd.read_csv(expanded_arena_transforms_path, index_col=[0, 1, 2])

out_dir = os.path.join(root_dir, r"verification\tracking_videos")
os.makedirs(out_dir, exist_ok=True)

video_file_df = build_file_df(str(local_videos_dir), video_regex)
video_file_df = video_file_df.sample(n=n_videos, random_state=rng)
video_names = video_file_df["file_path"].apply(os.path.basename)

def _single_transform_verification_video(video_name, show_progress=False):
    out_path = local_verification_videos_dir / video_name.replace(".mp4", "_Tracked.mp4")
    regex_dict = re.search(video_regex, video_name).groupdict()

    if out_path.exists():
        return

    source_video_path = str(local_videos_dir / video_name)

    raw_csv_path = str(local_tracking_dir / video_name.replace("_remuxed.mp4", ".csv"))
    raw_track_df = pd.read_csv(raw_csv_path, index_col=[0, 1], header=[0, 1]).drop("instance", level=0, axis=1).drop("score", level=1, axis=1)
    track_arena_transforms = expanded_arena_transforms.loc[(regex_dict["mouse_id"], regex_dict["phase"])]

    write_transform_verification_video(str(out_path), source_video_path, raw_track_df, arena_dict, track_arena_transforms, correction_func=default_correction, reader_kwargs=dict(start_frame=start_frame, end_frame=end_frame), show_progress=show_progress)

In [57]:
video_names.iloc[0]

'20241120_mouse3_5_WithOdor_remuxed.mp4'

In [58]:
_single_transform_verification_video(video_names.iloc[0], show_progress=True)

  track_arena_transforms = expanded_arena_transforms.loc[(regex_dict["mouse_id"], regex_dict["phase"])]
100%|██████████| 124/124 [00:27<00:00,  4.44it/s]


In [76]:
with Parallel(n_jobs=batch_size) as parallel:
    parallel(delayed(_single_transform_verification_video)(video_name) for video_name in tqdm(video_names))









100%|██████████| 4/4 [00:00<?, ?it/s]


## Rat Presence

### Cohort 1

In [6]:
local_annotations_dir = annotations_dir / "rat_presence" / "cohort1"
arena_dict_path = local_annotations_dir / "arena_dict.json"
expanded_arena_transforms_path = local_annotations_dir / "expanded_arena_transforms.csv"

local_videos_dir = videos_dir / "rat_presence" / "cohort1"
local_tracking_dir = tracking_dir / "rat_presence" / "cohort1"

local_verification_videos_dir = verification_videos_dir / "rat_presence" / "cohort1"
local_verification_videos_dir.mkdir(parents=True, exist_ok=True)

In [7]:
rng = np.random.default_rng(12312415)
n_videos = 4
batch_size = 4
start_frame = 25 * 120
end_frame = start_frame + 25 * 5

video_regex = r"(?P<date_string>\d+)_mouse(?P<mouse_id>\d+.\d+)_(?P<phase>\w+)_remuxed\.mp4"

arena_dict = load_arena_from_json(arena_dict_path)

expanded_arena_transforms = pd.read_csv(expanded_arena_transforms_path, index_col=[0, 1, 2])

out_dir = os.path.join(root_dir, r"verification\tracking_videos")
os.makedirs(out_dir, exist_ok=True)

video_file_df = build_file_df(str(local_videos_dir), video_regex)
video_file_df = video_file_df.sample(n=n_videos, random_state=rng)
video_names = video_file_df["file_path"].apply(os.path.basename)

def _single_transform_verification_video(video_name, show_progress=False):
    out_path = local_verification_videos_dir / video_name.replace(".mp4", "_Tracked.mp4")
    regex_dict = re.search(video_regex, video_name).groupdict()

    if out_path.exists():
        return

    source_video_path = str(local_videos_dir / video_name)

    raw_csv_path = str(local_tracking_dir / video_name.replace("_remuxed.mp4", ".csv"))
    raw_track_df = pd.read_csv(raw_csv_path, index_col=[0, 1], header=[0, 1]).drop("instance", level=0, axis=1).drop("score", level=1, axis=1)
    track_arena_transforms = expanded_arena_transforms.loc[(regex_dict["mouse_id"], regex_dict["phase"])]

    write_transform_verification_video(str(out_path), source_video_path, raw_track_df, arena_dict, track_arena_transforms, correction_func=default_correction, reader_kwargs=dict(start_frame=start_frame, end_frame=end_frame), show_progress=show_progress)

In [9]:
video_names.iloc[0]

'20210601_mouse1_4_WithRat_remuxed.mp4'

In [10]:
_single_transform_verification_video(video_names.iloc[0], show_progress=True)

  track_arena_transforms = expanded_arena_transforms.loc[(regex_dict["mouse_id"], regex_dict["phase"])]
100%|██████████| 124/124 [00:22<00:00,  5.56it/s]


In [78]:
with Parallel(n_jobs=batch_size) as parallel:
    parallel(delayed(_single_transform_verification_video)(video_name) for video_name in tqdm(video_names))









100%|██████████| 4/4 [00:00<00:00, 3997.43it/s]


### Cohort 2

In [11]:
local_annotations_dir = annotations_dir / "rat_presence" / "cohort2"
arena_dict_path = local_annotations_dir / "arena_dict.json"
expanded_arena_transforms_path = local_annotations_dir / "expanded_arena_transforms.csv"

local_videos_dir = videos_dir / "rat_presence" / "cohort2"
local_tracking_dir = tracking_dir / "rat_presence" / "cohort2"

local_verification_videos_dir = verification_videos_dir / "rat_presence" / "cohort2"
local_verification_videos_dir.mkdir(parents=True, exist_ok=True)

In [12]:
rng = np.random.default_rng(12312415)
n_videos = 4
batch_size = 4
start_frame = 25 * 120
end_frame = start_frame + 25 * 5

video_regex = r"(?P<date_string>\d+)_mouse(?P<mouse_id>\d+.\d+)_(?P<phase>\w+)_remuxed\.mp4"

arena_dict = load_arena_from_json(arena_dict_path)

expanded_arena_transforms = pd.read_csv(expanded_arena_transforms_path, index_col=[0, 1, 2])

out_dir = os.path.join(root_dir, r"verification\tracking_videos")
os.makedirs(out_dir, exist_ok=True)

video_file_df = build_file_df(str(local_videos_dir), video_regex)
video_file_df = video_file_df.sample(n=n_videos, random_state=rng)
video_names = video_file_df["file_path"].apply(os.path.basename)

def _single_transform_verification_video(video_name, show_progress=False):
    out_path = local_verification_videos_dir / video_name.replace(".mp4", "_Tracked.mp4")
    regex_dict = re.search(video_regex, video_name).groupdict()

    if out_path.exists():
        return

    source_video_path = str(local_videos_dir / video_name)

    raw_csv_path = str(local_tracking_dir / video_name.replace("_remuxed.mp4", ".csv"))
    raw_track_df = pd.read_csv(raw_csv_path, index_col=[0, 1], header=[0, 1]).drop("instance", level=0, axis=1).drop("score", level=1, axis=1)
    track_arena_transforms = expanded_arena_transforms.loc[(regex_dict["mouse_id"], regex_dict["phase"])]

    write_transform_verification_video(str(out_path), source_video_path, raw_track_df, arena_dict, track_arena_transforms, correction_func=default_correction, reader_kwargs=dict(start_frame=start_frame, end_frame=end_frame), show_progress=show_progress)

In [15]:
video_names.iloc[2]

'20241115_mouse1_1_WithRat_remuxed.mp4'

In [16]:
_single_transform_verification_video(video_names.iloc[2], show_progress=True)

  track_arena_transforms = expanded_arena_transforms.loc[(regex_dict["mouse_id"], regex_dict["phase"])]
100%|██████████| 124/124 [00:22<00:00,  5.62it/s]


In [80]:
with Parallel(n_jobs=batch_size) as parallel:
    parallel(delayed(_single_transform_verification_video)(video_name) for video_name in tqdm(video_names))









100%|██████████| 4/4 [00:00<00:00, 569.36it/s]


# Processing

In [None]:
_index_df = expanded_arena_transforms.index.to_frame()
_index_df["mouse_id"] = _index_df["mouse_id"].astype(str).str.replace(".", "_")
_index_df.iloc[14:14+14].loc[:, "mouse_id"] = "1_10"
_index_df.iloc[183:183+15].loc[:, "mouse_id"] = "3_10"
expanded_arena_transforms.index = pd.MultiIndex.from_frame(_index_df)
expanded_arena_transforms.to_csv(local_annotations_dir / "expanded_arena_transforms.csv")

You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  _index_df.iloc[14:14+14].loc[:, "mouse_id"] = "1_10"
You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original

## Looming Stimulus

### Cohort 1

In [150]:
local_annotations_dir = annotations_dir / "looming_stimulus" / "cohort2"
arena_dict_path = local_annotations_dir / "arena_dict.json"
expanded_arena_transforms_path = local_annotations_dir / "expanded_arena_transforms.csv"

local_tracking_dir = tracking_dir / "looming_stimulus" / "cohort2"

local_track_out_dir = data_out_dir / "processed_tracks" / "looming_stimulus" / "cohort2"
local_track_out_dir.mkdir(parents=True, exist_ok=True)

In [151]:
batch_size = 8
csv_regex = r"(?P<date_string>\d+)_mouse(?P<mouse_id>\d+.\d+)_loom(?P<loom_id>\d+)\.csv"

arena_dict = load_arena_from_json(arena_dict_path)
expanded_arena_transforms = pd.read_csv(expanded_arena_transforms_path, index_col=[0, 1, 2])

raw_csv_files = [str(x) for x in local_tracking_dir.glob("*.csv")]

def _single_track_transform(csv_file):
    out_path = local_track_out_dir / pathlib.Path(csv_file).name.replace(".csv", "_processed.csv")
    if out_path.exists():
        return
    
    regex_dict = re.search(csv_regex, csv_file).groupdict()

    raw_track_df = pd.read_csv(csv_file, index_col=[0, 1], header=[0, 1]).drop("instance", level=0, axis=1).drop("score", level=1, axis=1).loc["track_0"]

    track_arena_transforms = expanded_arena_transforms.loc[regex_dict["mouse_id"]].loc[int(regex_dict["loom_id"])]
    transformed_track_df = expanded_looming_correction(raw_track_df, arena_dict, track_arena_transforms, y_correction=20)
    transformed_track_df.to_csv(str(out_path))

In [142]:
_single_track_transform(raw_csv_files[0])

                              0         1      2         3         4      5  \
selection_frame_index                                                         
0                      1.617297 -0.772364  241.0  0.016154 -0.647631  723.0   

                              6         7    8  
selection_frame_index                           
0                      0.000008 -0.001278  1.0  


In [152]:
with Parallel(n_jobs=batch_size) as parallel:
    parallel(delayed(_single_track_transform)(csv_file) for csv_file in tqdm(raw_csv_files))



[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

100%|██████████| 336/336 [00:32<00:00, 10.26it/s]


### Cohort 2

In [153]:
local_annotations_dir = annotations_dir / "looming_stimulus" / "cohort1"
arena_dict_path = local_annotations_dir / "arena_dict.json"
expanded_arena_transforms_path = local_annotations_dir / "expanded_arena_transforms.csv"

local_tracking_dir = tracking_dir / "looming_stimulus" / "cohort1"

local_track_out_dir = data_out_dir / "processed_tracks" / "looming_stimulus" / "cohort1"
local_track_out_dir.mkdir(parents=True, exist_ok=True)

In [154]:
batch_size = 8
csv_regex = r"(?P<date_string>\d+)_mouse(?P<mouse_id>\d+.\d+)_loom(?P<loom_id>\d+)\.csv"

arena_dict = load_arena_from_json(arena_dict_path)
expanded_arena_transforms = pd.read_csv(expanded_arena_transforms_path, index_col=[0, 1, 2])

raw_csv_files = [str(x) for x in local_tracking_dir.glob("*.csv")]

def _single_track_transform(csv_file):
    out_path = local_track_out_dir / pathlib.Path(csv_file).name.replace(".csv", "_processed.csv")
    if out_path.exists():
        return
    
    regex_dict = re.search(csv_regex, csv_file).groupdict()

    raw_track_df = pd.read_csv(csv_file, index_col=[0, 1], header=[0, 1]).drop("instance", level=0, axis=1).drop("score", level=1, axis=1).loc["track_0"]

    track_arena_transforms = expanded_arena_transforms.loc[regex_dict["mouse_id"]].loc[int(regex_dict["loom_id"])]
    transformed_track_df = expanded_looming_correction(raw_track_df, arena_dict, track_arena_transforms, y_correction=20)
    transformed_track_df.to_csv(str(out_path))

In [156]:
with Parallel(n_jobs=batch_size) as parallel:
    parallel(delayed(_single_track_transform)(csv_file) for csv_file in tqdm(raw_csv_files))



[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

100%|██████████| 161/161 [00:12<00:00, 12.42it/s]


## Rat Odor

### Cohort 1

In [157]:
local_annotations_dir = annotations_dir / "rat_odor" / "cohort1"
arena_dict_path = local_annotations_dir / "arena_dict.json"
expanded_arena_transforms_path = local_annotations_dir / "expanded_arena_transforms.csv"

local_tracking_dir = tracking_dir / "rat_odor" / "cohort1"

local_track_out_dir = data_out_dir / "processed_tracks" / "rat_odor" / "cohort1"
local_track_out_dir.mkdir(parents=True, exist_ok=True)

In [160]:
batch_size = 8
csv_regex = r"(?P<date_string>\d+)_mouse(?P<mouse_id>\d+.\d+)_(?P<phase>\w+)\.csv"

arena_dict = load_arena_from_json(arena_dict_path)
expanded_arena_transforms = pd.read_csv(expanded_arena_transforms_path, index_col=[0, 1, 2])

raw_csv_files = [str(x) for x in local_tracking_dir.glob("*.csv")]

def _single_track_transform(csv_file):
    out_path = local_track_out_dir / pathlib.Path(csv_file).name.replace(".csv", "_processed.csv")
    if out_path.exists():
        return
    
    regex_dict = re.search(csv_regex, csv_file).groupdict()

    raw_track_df = pd.read_csv(csv_file, index_col=[0, 1], header=[0, 1]).drop("instance", level=0, axis=1).drop("score", level=1, axis=1).loc["track_0"]

    track_arena_transforms = expanded_arena_transforms.loc[regex_dict["mouse_id"]].loc[regex_dict["phase"]]
    transformed_track_df = default_correction(raw_track_df, arena_dict, track_arena_transforms)
    transformed_track_df.to_csv(str(out_path))

In [161]:
with Parallel(n_jobs=batch_size) as parallel:
    parallel(delayed(_single_track_transform)(csv_file) for csv_file in tqdm(raw_csv_files))




[A[A[A


[A[A[A


100%|██████████| 24/24 [00:44<00:00,  1.85s/it]


### Cohort 2

In [162]:
local_annotations_dir = annotations_dir / "rat_odor" / "cohort2"
arena_dict_path = local_annotations_dir / "arena_dict.json"
expanded_arena_transforms_path = local_annotations_dir / "expanded_arena_transforms.csv"

local_tracking_dir = tracking_dir / "rat_odor" / "cohort2"

local_track_out_dir = data_out_dir / "processed_tracks" / "rat_odor" / "cohort2"
local_track_out_dir.mkdir(parents=True, exist_ok=True)

In [163]:
batch_size = 8
csv_regex = r"(?P<date_string>\d+)_mouse(?P<mouse_id>\d+.\d+)_(?P<phase>\w+)\.csv"

arena_dict = load_arena_from_json(arena_dict_path)
expanded_arena_transforms = pd.read_csv(expanded_arena_transforms_path, index_col=[0, 1, 2])

raw_csv_files = [str(x) for x in local_tracking_dir.glob("*.csv")]

def _single_track_transform(csv_file):
    out_path = local_track_out_dir / pathlib.Path(csv_file).name.replace(".csv", "_processed.csv")
    if out_path.exists():
        return
    
    regex_dict = re.search(csv_regex, csv_file).groupdict()

    raw_track_df = pd.read_csv(csv_file, index_col=[0, 1], header=[0, 1]).drop("instance", level=0, axis=1).drop("score", level=1, axis=1).loc["track_0"]

    track_arena_transforms = expanded_arena_transforms.loc[regex_dict["mouse_id"]].loc[regex_dict["phase"]]
    transformed_track_df = default_correction(raw_track_df, arena_dict, track_arena_transforms)
    transformed_track_df.to_csv(str(out_path))

In [164]:
with Parallel(n_jobs=batch_size) as parallel:
    parallel(delayed(_single_track_transform)(csv_file) for csv_file in tqdm(raw_csv_files))




[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


100%|██████████| 48/48 [00:33<00:00,  1.44it/s]


## Rat Presence

### Cohort 1

In [165]:
local_annotations_dir = annotations_dir / "rat_presence" / "cohort1"
arena_dict_path = local_annotations_dir / "arena_dict.json"
expanded_arena_transforms_path = local_annotations_dir / "expanded_arena_transforms.csv"

local_tracking_dir = tracking_dir / "rat_presence" / "cohort1"

local_track_out_dir = data_out_dir / "processed_tracks" / "rat_presence" / "cohort1"
local_track_out_dir.mkdir(parents=True, exist_ok=True)

In [166]:
batch_size = 8
csv_regex = r"(?P<date_string>\d+)_mouse(?P<mouse_id>\d+.\d+)_(?P<phase>\w+)\.csv"

arena_dict = load_arena_from_json(arena_dict_path)
expanded_arena_transforms = pd.read_csv(expanded_arena_transforms_path, index_col=[0, 1, 2])

raw_csv_files = [str(x) for x in local_tracking_dir.glob("*.csv")]

def _single_track_transform(csv_file):
    out_path = local_track_out_dir / pathlib.Path(csv_file).name.replace(".csv", "_processed.csv")
    if out_path.exists():
        return
    
    regex_dict = re.search(csv_regex, csv_file).groupdict()

    raw_track_df = pd.read_csv(csv_file, index_col=[0, 1], header=[0, 1]).drop("instance", level=0, axis=1).drop("score", level=1, axis=1).loc["track_0"]

    track_arena_transforms = expanded_arena_transforms.loc[regex_dict["mouse_id"]].loc[regex_dict["phase"]]
    transformed_track_df = default_correction(raw_track_df, arena_dict, track_arena_transforms)
    transformed_track_df.to_csv(str(out_path))

In [167]:
with Parallel(n_jobs=batch_size) as parallel:
    parallel(delayed(_single_track_transform)(csv_file) for csv_file in tqdm(raw_csv_files))




[A[A[A


[A[A[A


100%|██████████| 24/24 [00:18<00:00,  1.29it/s]


### Cohort 2

In [168]:
local_annotations_dir = annotations_dir / "rat_presence" / "cohort2"
arena_dict_path = local_annotations_dir / "arena_dict.json"
expanded_arena_transforms_path = local_annotations_dir / "expanded_arena_transforms.csv"

local_tracking_dir = tracking_dir / "rat_presence" / "cohort2"

local_track_out_dir = data_out_dir / "processed_tracks" / "rat_presence" / "cohort2"
local_track_out_dir.mkdir(parents=True, exist_ok=True)

In [169]:
batch_size = 8
csv_regex = r"(?P<date_string>\d+)_mouse(?P<mouse_id>\d+.\d+)_(?P<phase>\w+)\.csv"

arena_dict = load_arena_from_json(arena_dict_path)
expanded_arena_transforms = pd.read_csv(expanded_arena_transforms_path, index_col=[0, 1, 2])

raw_csv_files = [str(x) for x in local_tracking_dir.glob("*.csv")]

def _single_track_transform(csv_file):
    out_path = local_track_out_dir / pathlib.Path(csv_file).name.replace(".csv", "_processed.csv")
    if out_path.exists():
        return
    
    regex_dict = re.search(csv_regex, csv_file).groupdict()

    raw_track_df = pd.read_csv(csv_file, index_col=[0, 1], header=[0, 1]).drop("instance", level=0, axis=1).drop("score", level=1, axis=1).loc["track_0"]

    track_arena_transforms = expanded_arena_transforms.loc[regex_dict["mouse_id"]].loc[regex_dict["phase"]]
    transformed_track_df = default_correction(raw_track_df, arena_dict, track_arena_transforms)
    transformed_track_df.to_csv(str(out_path))

In [None]:
with Parallel(n_jobs=batch_size) as parallel:
    parallel(delayed(_single_track_transform)(csv_file) for csv_file in tqdm(raw_csv_files))




[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


100%|██████████| 48/48 [00:16<00:00,  2.85it/s]


: 