# Auditory analysis

In [None]:
import os
import subprocess
from pathlib import Path
import pandas as pd
from datetime import datetime

# === CONFIG ===
BASE_DIR = Path(r"C:/Users/aleja/Box/Awake Project/Maze data/Auditory experiments/8_arms_w_voc")
POSTHOC_SCRIPT = Path(r"posthoc_video_time_in_maze.py")  # <-- change me
MANUAL_OFFSET = 0     # seconds; positive means video starts AFTER first trial start
ROIS_FILENAME = "rois1.csv"  # will auto-draw if missing (script handles it)
DRY_RUN = False       # True = just print what would run, don't execute

# Optionally skip sessions that already have the column present in their trials CSV
SKIP_IF_ALREADY_UPDATED = True

def find_session_files(session_dir: Path):
    """Return (video_path, trials_path, rois_path|None) or (None,None,None) if missing."""
    video = None
    trials = None
    rois = session_dir / ROIS_FILENAME

    # Find first .mp4 video and a trials CSV that starts with 'trials_time'
    for p in session_dir.iterdir():
        if p.suffix.lower() == ".mp4" and video is None:
            video = p
        if p.is_file() and p.name.startswith("trials_time") and p.suffix.lower() == ".csv" and trials is None:
            trials = p
    return video, trials, (rois if rois.exists() else None)

def trials_already_have_time_in_maze(trials_csv: Path) -> bool:
    try:
        # read just header efficiently
        with open(trials_csv, "r", encoding="utf-8", errors="ignore") as f:
            header = f.readline().strip().split(",")
        return "time_in_maze_ms" in header
    except Exception:
        return False

def run_posthoc_on_session(session_dir: Path, manual_offset=0):
    video, trials, rois = find_session_files(session_dir)
    if not video or not trials:
        return {"session": str(session_dir), "status": "skip_missing_files"}

    if SKIP_IF_ALREADY_UPDATED and trials_already_have_time_in_maze(trials):
        return {"session": str(session_dir), "status": "skip_already_updated", "trials": str(trials)}

    cmd = [
        "python", str(POSTHOC_SCRIPT),
        "--session", str(session_dir),
        "--video", str(video),
        "--trials", str(trials),
    ]
    if rois:
        cmd += ["--rois", str(rois)]
    if manual_offset and float(manual_offset) != 0.0:
        cmd += ["--manual-offset-secs", str(manual_offset)]

    if DRY_RUN:
        print("[DRY RUN]", " ".join(cmd))
        return {"session": str(session_dir), "status": "dry_run", "video": str(video), "trials": str(trials)}

    print(">>", " ".join(cmd))
    completed = subprocess.run(cmd, capture_output=True, text=True)
    status = "ok" if completed.returncode == 0 else "error"
    return {
        "session": str(session_dir),
        "status": status,
        "video": str(video),
        "trials": str(trials),
        "stdout_tail": completed.stdout[-500:],
        "stderr_tail": completed.stderr[-500:],
        "returncode": completed.returncode,
    }

def batch_process(root: Path):
    """
    Expects structure:
      root/
        YYYY-MM-DD/
          mouseX/
            <video>.mp4
            trials_time_*.csv
            rois1.csv (optional; tool will prompt to draw if missing)
    """
    rows = []
    for day in sorted([d for d in root.iterdir() if d.is_dir()]):
        for mouse in sorted([m for m in day.iterdir() if m.is_dir()]):
            res = run_posthoc_on_session(mouse, manual_offset=MANUAL_OFFSET)
            rows.append(res)

    df = pd.DataFrame(rows)
    df["run_at"] = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    return df

# ==== RUN IT ====
summary_df = batch_process(BASE_DIR)
display(summary_df)

# Optional: save a log next to BASE_DIR
log_path = BASE_DIR / f"posthoc_batch_log_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv"
summary_df.to_csv(log_path, index=False)
print("Saved log to:", log_path)



