# Lockbox Dataset Conversion

Dataset from Reiske et al., 2025, containing video and pose files of individual mice solving mechanical puzzle "lockboxes" recorded from three camera perspectives (top, front, side). ()

- Full dataset: https://doi.org/10.14279/depositonce-23850
- Preview of dataset (used here): https://www.dropbox.com/scl/fo/h7nkai8574h23qfq9m1b2/AP4gNZOpDJJ7z0yGtbWQiOc?rlkey=w36jzxqjkghg0j0xva5zsxy2v&st=5r9msqjw&dl=0

---

¹ Reiske, P., Boon, M. N., Andresen, N., Traverso, S., Hohlbaum, K., Lewejohann, L., Thöne-Reineke, C., Hellwich, O., & Sprekeler, H. (2025). Mouse Lockbox Dataset: Behavior Recognition for Mice Solving Lockboxes (arXiv:2505.15408). arXiv. https://doi.org/10.48550/arXiv.2505.15408


<img src="assets/lpckbox1.png" width="900">

In [None]:
import os 
import xarray as xr
import numpy as np
import h5py
import requests
import zipfile
from pathlib import Path
from audioio import write_audio
from ethograph import TrialTree, set_media_attrs, minimal_basics, get_project_root
from movement.io import load_poses, save_poses
import pandas as pd

def download_and_extract(url: str, data_folder: Path) -> None:
    zip_path = data_folder / "dataset.zip"
    data_folder.mkdir(parents=True, exist_ok=True)
    
    response = requests.get(url, stream=True)
    response.raise_for_status()
    
    with open(zip_path, 'wb') as f:
        for chunk in response.iter_content(chunk_size=8192):
            f.write(chunk)
    
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall(data_folder)
    
    zip_path.unlink()


data_folder = get_project_root() / "data" / "lockbox"
os.makedirs(data_folder, exist_ok=True)
url = "https://www.dropbox.com/scl/fo/h7nkai8574h23qfq9m1b2/AP4gNZOpDJJ7z0yGtbWQiOc?rlkey=w36jzxqjkghg0j0xva5zsxy2v&e=1&st=5r9msqjw&dl=1"

download_and_extract(url, data_folder)

data_folder = data_folder / "labeled"

In [None]:
from movement.kinematics import compute_velocity, compute_speed, compute_pairwise_distances
import re
fps = 30


def get_files_with_prefix(folder: Path, prefix: str) -> list[Path]:
    return list(folder.glob(f"{prefix}*"))


trials = ["2021-02-15_07-32-44_segment1", "2021-05-31_07-34-21_segment2", "2021-05-31_07-34-21_segment3"] # not all


ds_list = []
for trial in trials:
    files = list(data_folder.glob(f"{trial}*"))
    dlc_files = [f for f in files if f.suffix == '.h5']
    dlc_names = []
    # Recommended convert to mp4, see https://ethograph.readthedocs.io/en/latest/troubleshooting
    cam_names = [f.name for f in files if f.suffix == '.mp4'] # original avi 
    label_files = [f for f in files if f.suffix == '.csv']
    for file in dlc_files:  
        file = str(file)
        
        # Movement not able to load this hdf file, therefore convert to .csv format
        df = pd.read_hdf(file)        
        ds_temp = load_poses.from_dlc_style_df(df, fps=30)
        
        csv_path = file.replace('.h5', '.csv')
        save_poses.to_dlc_file(ds_temp, csv_path)
        dlc_names.append(os.path.basename(csv_path).replace('.csv', '_individual_0.csv')) # Movement adds this

        
        # Will only extract kinematics/ distance (in 2D) from front view.
        # For 3D pose, see https://deeplabcut.github.io/DeepLabCut/docs/Overviewof3D.html
        if "front-view" in str(file):    
            ds = ds_temp.copy()
            ds["velocity"] = compute_velocity(ds.position)
            ds["speed"] = compute_speed(ds.position)

            # Nose to tools
            ds["dist_nose_lever_tip"] = compute_pairwise_distances(ds.position, "keypoints", {"nose": "lever_tip"})
            ds["dist_nose_other_lever_tip"] = compute_pairwise_distances(ds.position, "keypoints", {"nose": "other_lever_tip"})
            ds["dist_nose_stick_head"] = compute_pairwise_distances(ds.position, "keypoints", {"nose": "stick_head"})
            ds["dist_nose_ball"] = compute_pairwise_distances(ds.position, "keypoints", {"nose": "ball"})

            # Front paws to tools
            ds["dist_front_paw_left_lever_tip"] = compute_pairwise_distances(ds.position, "keypoints", {"front_paw_left": "lever_tip"})
            ds["dist_front_paw_left_stick_head"] = compute_pairwise_distances(ds.position, "keypoints", {"front_paw_left": "stick_head"})
            ds["dist_front_paw_left_ball"] = compute_pairwise_distances(ds.position, "keypoints", {"front_paw_left": "ball"})
            ds["dist_front_paw_right_lever_tip"] = compute_pairwise_distances(ds.position, "keypoints", {"front_paw_right": "lever_tip"})
            ds["dist_front_paw_right_stick_head"] = compute_pairwise_distances(ds.position, "keypoints", {"front_paw_right": "stick_head"})
            ds["dist_front_paw_right_ball"] = compute_pairwise_distances(ds.position, "keypoints", {"front_paw_right": "ball"})
            
        
            
    for var in ds.data_vars:
        ds[var].attrs["type"] = "features"
    ds.attrs["trial"] = trial
    ds = set_media_attrs(ds, cameras=cam_names, pose=dlc_names)
    ds_list.append(ds)

dt = TrialTree.from_datasets(ds_list)                    
dt.save(data_folder / "lockbox.nc")