In [26]:
import pandas as pd
import numpy as np
import os
from pathlib import Path


In [27]:
import importlib
import dataset as ds
importlib.reload(ds)

<module 'dataset' from '/Users/jacob/Documents/GitHub/EcodylicScience/behavior/dataset.py'>

Centralized notebook for calling other function

# Datasets and operations

# CALMS21

In [28]:
name = "calms21"
# Choose a base directory (will be created if it doesn't exist)
base = "/Volumes/JD-SSD/mice_data/calms21/dataset/"
# Only include roots you actually need right now:

manifest_path = ds.new_dataset_manifest(
    name=name,
    base_dir=base,
    version="0.1.0",
    index_format="group/sequence",
)
print(Path(manifest_path).read_text().splitlines()[:30])  # peek first lines

Wrote dataset manifest -> /Volumes/JD-SSD/mice_data/calms21/dataset/dataset.yaml


### Load existing

In [52]:
# Load existing manifest
manifest_path = '/Volumes/JD-SSD/mice_data/calms21/dataset/dataset.yaml'
importlib.reload(ds)
dataset = ds.Dataset(manifest_path)
dataset.load()

Dataset(manifest_path='/Volumes/JD-SSD/mice_data/calms21/dataset/dataset.yaml', name='calms21', version='0.1.0', format='yaml', roots={'media': '/Volumes/JD-SSD/mice_data/calms21/dataset/media', 'features': '/Volumes/JD-SSD/mice_data/calms21/dataset/features', 'labels': '/Volumes/JD-SSD/mice_data/calms21/dataset/labels', 'models': '/Volumes/JD-SSD/mice_data/calms21/dataset/models', 'tracks': '/Volumes/JD-SSD/mice_data/calms21/dataset/tracks', 'tracks_raw': '/Volumes/JD-SSD/mice_data/calms21/dataset/tracks_raw'}, meta={})

### Index and convert

In [34]:
# Index videos (no symlinks, just absolute paths in media/index.csv)
dataset.index_media(
    search_dirs=[
        "/Volumes/JD-SSD/mice_data/calms21/task1_videos_mp4/",
    ],
    extensions=(".mp4", ".avi")
)
dataset.index_tracks_raw(
    search_dirs=["/Volumes/JD-SSD/mice_data/calms21/data"],
    patterns="calms*.npy",       
    src_format="calms21_npy",
    multi_sequences_per_file=True,
    group_from="filename"
)

[index_media] Wrote 90 entries -> /Volumes/JD-SSD/mice_data/calms21/dataset/media/index.csv
[index_tracks_raw] 6 -> /Volumes/JD-SSD/mice_data/calms21/dataset/tracks_raw/index.csv


PosixPath('/Volumes/JD-SSD/mice_data/calms21/dataset/tracks_raw/index.csv')

In [40]:
dataset.convert_all_tracks(params={"neck_idx": None, "tail_idx": None}, overwrite=True, group_from='filename')

In [None]:
dataset.list_groups()

['calms21_task1_test',
 'calms21_task1_train',
 'calms21_task2_test',
 'calms21_task2_train',
 'calms21_task3_test',
 'calms21_task3_train']

## Features

### PoseDistance and Wavelet

In [None]:
import features
importlib.reload(features)
feat = features.PairPoseDistancePCA(params={
    "n_components": 6,
    "pose_n": 7,
})
run_id = dataset.run_feature(feat, groups=['calms21_task1_train'])

In [None]:
feat_wav = features.PairPoseDistanceWavelet(params={
    "fps_default": 30.0,
    "f_min": 0.2,
    "f_max": 5.0,
    "n_freq": 25,
    "wavelet": "cmor1.5-1.0",
    "log_floor": -3.0,
})
wav_run = dataset.run_feature(
    feat_wav,
    input_kind="feature",
    input_feature="pair-posedistance-pca",   # name of prior feature
    # optionally scope by groups or sequences:
    groups=['calms21_task1_train']
)

### Pair-Egocentric and Wavelet

In [None]:
feat = features.PairEgocentricFeatures(params={
    "neck_idx": 3,
    "tail_base_idx": 6,
    # "fps_default": 30.0,
})
run_id = dataset.run_feature(feat, groups=["calms21_task1_train"],overwrite=True)

[feature:pair-egocentric] completed run_id=0.1-5f7ddf53ba -> /Volumes/JD-SSD/mice_data/calms21/dataset/features/pair-egocentric/0.1-5f7ddf53ba


In [None]:
## wavelet (refine so can change parameters)
feat_wav = features.PairPoseDistanceWavelet(params={
    "fps_default": 30.0,
    "f_min": 0.2,
    "f_max": 5.0,
    "n_freq": 25,
    "wavelet": "cmor1.5-1.0",
    "log_floor": -3.0,
})
wav_run = dataset.run_feature(
    feat_wav,
    input_kind="feature",
    input_feature="pair-egocentric",   # name of prior feature
    input_run_id=None,                     # or a specific run_id; None => latest
    # optionally scope by groups or sequences:
    # groups=['calms21_task1_train']
)

# CRIMS 13
https://data.caltech.edu/records/4emt5-b0t10


# ESI 11-11

In [None]:
name = "esi11-11"
# Choose a base directory (will be created if it doesn't exist)
base = "/Volumes/JD-SSD/ESI-mice/dataset1/"

manifest_path = ds.new_dataset_manifest(
    name=name,
    base_dir=base,
    version="0.1.0",
    index_format="group/sequence",
)
print(Path(manifest_path).read_text().splitlines()[:30])  # peek first lines

Wrote dataset manifest -> /Volumes/JD-SSD/ESI-mice/dataset1/dataset.yaml


## Load existing

In [64]:
base = "/Volumes/JD-SSD/ESI-mice/dataset1/"
importlib.reload(ds)
dataset = ds.Dataset(base)
dataset.load()
# Index native TRex NPZs (per id)
dataset.index_tracks_raw(
    search_dirs=["/Volumes/JD-SSD/ESI-mice/data"],
    patterns="*.npz",
    src_format="trex_npz",
    multi_sequences_per_file=False
)
dataset.convert_all_tracks(merge_per_sequence=True)


[index_tracks_raw] 4 -> /Volumes/JD-SSD/ESI-mice/dataset1/tracks_raw/index.csv


### PoseDistance and Wavelet

In [None]:
#### PoseDistance and Wavelet
feat = features.PairPoseDistancePCA(params={
    "n_components": 6,
    "pose_n": 7
})
run_id = dataset.run_feature(feat)

feat_wav = features.PairPoseDistanceWavelet(params={
    "fps_default": 30.0,
    "f_min": 0.2,
    "f_max": 5.0,
    "n_freq": 25,
    "wavelet": "cmor1.5-1.0",
    "log_floor": -3.0,
})
wav_run = dataset.run_feature(
    feat_wav,
    input_kind="feature",
    input_feature="pair-posedistance-pca",   # name of prior feature
    input_run_id=None,                     # or a specific run_id; None => latest
    # optionally scope by groups or sequences:
    # groups=['calms21_task1_train']
)

[feature:pair-posedistance-pca] completed run_id=0.1-d2cdb45ffd -> /Volumes/JD-SSD/ESI-mice/dataset1/features/pair-posedistance-pca/0.1-d2cdb45ffd


### Pair-Egocentric and Wavelet

In [None]:
feat = features.PairEgocentricFeatures(params={
    "neck_idx": 3,
    "tail_base_idx": 6,
    # optional overrides:
    # "center_mode": "mean",
    # "fps_default": 30.0,
    # "smooth_win": 0,
})
run_id = dataset.run_feature(feat)


## wavelet (refine so can change parameters)
feat_wav = features.PairPoseDistanceWavelet(params={
    "fps_default": 30.0,
    "f_min": 0.2,
    "f_max": 5.0,
    "n_freq": 25,
    "wavelet": "cmor1.5-1.0",
    "log_floor": -3.0,
})
wav_run = dataset.run_feature(
    feat_wav,
    input_kind="feature",
    input_feature="pair-egocentric",   # name of prior feature
    input_run_id=None,                     # or a specific run_id; None => latest
    # optionally scope by groups or sequences:
    # groups=['calms21_task1_train']
)

[feature:pair-egocentric] completed run_id=0.1-5f7ddf53ba -> /Volumes/JD-SSD/ESI-mice/dataset1/features/pair-egocentric/0.1-5f7ddf53ba
[feature:pair-posedistance-wavelet] completed run_id=0.1-2a34d68f02 -> /Volumes/JD-SSD/ESI-mice/dataset1/features/pair-posedistance-wavelet/0.1-2a34d68f02


  df.loc[sel, "finished_at"] =  finished_at
