# Trial segmentation script

This notebook is intended for the breakdown, classification and analysis of the behavioural states shown in the video, starting frmo a trial segmentation. 

- `trial segmentation` takes the output of simplerCode.py (video + .csv), and segments the video based on mouse entry and exit times, excluding when the mouse entry/exit was not detected. 
- we then will manually sort the trials into `exploitative`, `explorative` and `nest`
- extract speed and trajectories per trial
- identify behavioural syllables with keypoint moseq 2D



In [None]:
# install libraries
# !pip install -q pandas moviepy imageio-ffmpeg

In [7]:
import os
import pandas as pd
from pathlib import Path
import subprocess, shutil, math

import re

Here we just isolate `mouse 6357` and `mouse 6359`, sessions `1.1`, `3.5` and `3.6`

In [None]:
main_dir = "C:/Users/aleja/Box/Awake Project/Maze data/simplermaze/"

#this is where the csv files of interest are stored
csvs = []

#this is where we store the videos
videos = []


for i in os.listdir(main_dir):
    if "6357" in i or "6359" in i:
        mouse_dir = os.path.join(main_dir, i)
        # print(mouse_dir[-10:])
        for session in os.listdir(mouse_dir):
            if 'habituation' in session or '1.1' in session or "3.5" in session or "3.6" in session or "3.7" in session or "3.8" in session:
                session_dir = os.path.join(mouse_dir, session)
                # print(f"    -{session_dir[-10:]}")

                for file in os.listdir(session_dir):

                    full_path_file = os.path.join(session_dir, file)

                    if "trial_info.csv" in file and "clean" not in file:
                        csvs.append(full_path_file)
                        # print(f"        -{file}")
                    elif "mp4" in file:
                        videos.append(full_path_file)

csv_video = dict(zip(csvs, videos))


In [4]:

for key,value in csv_video.items():
    n_key = os.path.basename(key)
    n_value = os.path.basename(value)
    print(n_key + "    :     "+ n_value)

mouse6357_session3.5_trial_info.csv    :     6357_2024-08-27_13_05_19s3.5.mp4
mouse6357_session3.6_trial_info.csv    :     6357_2024-08-28_11_58_14s3.6.mp4
mouse6357_session1.1_trial_info.csv    :     6357_2024-08-15_11_23_10.mp4
mouse6359_session3.5_trial_info.csv    :     6359_2024-08-27_14_08_35s3.5.mp4
mouse6359_session3.6_trial_info.csv    :     6359_2024-08-28_13_28_27s3.6.mp4
mouse6359_session1.1_trial_info.csv    :     6359_2024-08-15_14_05_08.mp4


In [None]:
def ffmpeg_bin():
    return os.environ.get("IMAGEIO_FFMPEG_EXE") or "ffmpeg"

def ms_to_sec(x): 
    return None if pd.isna(x) else float(x)/1000.0

def sanitize(s: str) -> str:
    for ch in '<>:"/\\|?*': s = s.replace(ch, "_")
    return s

def build_output_name(base_label: str, trial_idx: int, ext=".mp4"):
    return f"{sanitize(base_label)}_trial_{int(trial_idx):03d}{ext}"

def cut_with_ffmpeg(input_video: Path, start_s: float, end_s: float, out_path: Path, reencode: bool):
    dur = max(0.0, end_s - start_s)
    out_path.parent.mkdir(parents=True, exist_ok=True)
    if reencode:
        cmd = [
            ffmpeg_bin(), "-hide_banner", "-loglevel", "error",
            "-ss", f"{start_s:.3f}", "-t", f"{dur:.3f}",
            "-i", str(input_video),
            "-map", "0:v:0?", "-c:v", "libx264", "-preset", "veryfast", "-crf", "18",
            "-movflags", "+faststart", "-reset_timestamps", "1",
            str(out_path)
        ]
    else:
        cmd = [
            ffmpeg_bin(), "-hide_banner", "-loglevel", "error",
            "-ss", f"{start_s:.3f}", "-to", f"{end_s:.3f}",
            "-i", str(input_video),
            "-map", "0:v:0?", "-c", "copy",
            "-movflags", "+faststart", "-reset_timestamps", "1",
            str(out_path)
        ]
    subprocess.run(cmd, check=True)

def segment_one_session(
    video, trials_csv, outdir=None, base_label=None,
    offset_ms=0.0, padding_ms=0.0, method="copy", inplace=False,
    column_name="video_segment_path"
):
    video = Path(video).resolve()
    trials_csv = Path(trials_csv).resolve()
    outdir = Path(outdir).resolve() if outdir else (video.parent / "segments")
    outdir.mkdir(parents=True, exist_ok=True)

    df = pd.read_csv(trials_csv)
    for col in ["mouse_enter_time","end_trial_time"]:
        if col not in df.columns:
            raise ValueError(f"Required column '{col}' missing in {trials_csv.name}")

    trial_ids = df["trial_ID"].tolist() if "trial_ID" in df.columns else list(df.index)
    if column_name not in df.columns:
        df[column_name] = pd.NA

    base_label = base_label if (base_label is not None and base_label != "") else video.stem

    made = 0
    skipped = 0
    for i, trial_id in enumerate(trial_ids):
        row = df.iloc[i]
        enter_ms = row["mouse_enter_time"]; exit_ms = row["end_trial_time"]
        if pd.isna(enter_ms) or pd.isna(exit_ms):
            skipped += 1; continue

        start_ms = max(0.0, float(enter_ms) + float(offset_ms) - float(padding_ms))
        end_ms   = max(0.0, float(exit_ms)  + float(offset_ms) + float(padding_ms))
        if end_ms <= start_ms:
            skipped += 1; continue

        start_s, end_s = ms_to_sec(start_ms), ms_to_sec(end_ms)
        out_path = outdir / build_output_name(base_label, trial_id)
        try:
            cut_with_ffmpeg(video, start_s, end_s, out_path, reencode=(method=="reencode"))
            df.at[i, column_name] = str(out_path)
            made += 1
        except Exception as e:
            print(f"[WARN] {video.name} | trial {trial_id}: {e}")
            skipped += 1

    updated = trials_csv if inplace else trials_csv.with_name(trials_csv.stem + "_with_segments.csv")
    df.to_csv(updated, index=False)
    return {"segments_made": made, "trials_skipped": skipped, "updated_csv": str(updated)}

In [None]:
summaries = []
for trials_csv, video in csv_video.items():
    outdir = Path(video).parent / "segments"  # per-session folder next to the video
    res = segment_one_session(
        video=video,
        trials_csv=trials_csv,
        outdir=outdir,
        base_label=None,       # default = video filename stem
        offset_ms=0.0,
        padding_ms=0.0,
        method="reencode",     # or "copy" for faster keyframe cuts
        inplace=False,         # don't overwrite CSV unless you want to
        column_name="video_segment_path",
    )
    summaries.append({"video": video, "trials_csv": trials_csv, **res})

import pandas as pd
pd.DataFrame(summaries)