In [1]:
import pandas as pd
import sys
import argparse
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import os
import numpy as np
import glob
import traceback
import re
from pathlib import Path

In [47]:
#Merge events files 
all_events_path = r"D:\Box Sync\Box Sync\00_BEAR_Lab\Projects\Kyler(Kihun) Hong\1st_Haptic Sleeve\PilotExperiment\MotorLearning\20251209_SHF04_Session1"
all_events_files = glob.glob(os.path.join(all_events_path, "*_events.csv"))
all_events_df = pd.DataFrame()

# Use folder name to create file label: 
folder_main   = Path(all_events_path).name 
parts = folder_main.split('_')
if parts and re.fullmatch(r"\d{8}", parts[0]):
    run_label = "_".join(parts[1:])       # "SHF01_Session1"
else:
    run_label = folder_main              # use full folder name
n_files = len(all_events_files)
print(f"Found {n_files} event files to merge for run: {run_label}")

dfs = []
for file in all_events_files:
    try:
        # Read EVERYTHING as string so nothing gets rounded
        temp_df = pd.read_csv(file, sep="\t", dtype=str)

        # Convert only safe integer columns (edit names if yours differ)
        for c in ["trial", "button_index"]:
            if c in temp_df.columns:
                temp_df[c] = pd.to_numeric(temp_df[c], errors="coerce").astype("Int64")

        # IMPORTANT: keep ros_time as string to preserve decimals exactly
        # temp_df["ros_time"] stays dtype=str

        dfs.append(temp_df)

    except Exception as e:
        print(f"Error reading {file}: {e}")
        traceback.print_exc()

all_events_df = pd.concat(dfs, ignore_index=True)

out_name = f"{run_label}_combined_events.csv"
out_path = os.path.join(all_events_path, out_name)

all_events_df.to_csv(out_path, index=False, sep="\t")
print("Saved:", out_path)

# sanity check: show raw characters (should include '.')
if "ros_time" in all_events_df.columns:
    print("Example ros_time values:", all_events_df["ros_time"].head(5).tolist())


Found 2 event files to merge for run: SHF04_Session1
Saved: D:\Box Sync\Box Sync\00_BEAR_Lab\Projects\Kyler(Kihun) Hong\1st_Haptic Sleeve\PilotExperiment\MotorLearning\20251209_SHF04_Session1\SHF04_Session1_combined_events.csv


In [None]:
# Function to read trial comments related to success/failure/collision
def read_trial_comments(session_dir: Path, trial_is_one_based=True, collision_run_label=None):
    """
    Reads files like:
      SNHF06_Session1_000_commnet   (or .txt)
    Content like:
      good 3

    Returns DataFrame with: session, trial, comment_label, comment_value
    """
    session_dir = Path(session_dir)
    collision_run_label = str(collision_run_label)
    m_fallback = re.search(r"Session(?P<session>\d+)", collision_run_label, re.IGNORECASE)
    fallback_session = int(m_fallback.group("session")) if m_fallback else pd.NA

    # accept both "commnet" and "comment", with or without extension
    rx = re.compile(
    r".*(?:_Session(?P<session>\d+))?_(?P<idx>\d{3})_(?:commnet|comment)(?:\.\w+)?$",
        re.IGNORECASE
    )

    rows = []
    for p in sorted(session_dir.glob("*_commnet*")) + sorted(session_dir.glob("*_comment*")):
        m = rx.match(p.name)
        if not m:
            continue

        session_str = m.group("session")
        session = int(session_str) if session_str is not None else fallback_session
        idx = int(m.group("idx"))                 # 000, 001, ...
        trial = idx if trial_is_one_based else idx

        text = p.read_text(encoding="utf-8", errors="ignore").strip()
        parts = text.split()

        label = parts[0] if len(parts) >= 1 else pd.NA
        value = parts[1] if len(parts) >= 2 else pd.NA

        # numeric if possible
        try:
            value = float(value)
        except Exception:
            pass

        rows.append({
            "session": session,
            "trial": trial,
            "comment_label": label,
            "comment_value": value,
        })

    df = pd.DataFrame(rows).sort_values(["session", "trial"], na_position="last")
    return df


# --- usage ---
SESSION_DIR = Path(r"D:\Box Sync\Box Sync\00_BEAR_Lab\Projects\Kyler(Kihun) Hong\1st_Haptic Sleeve\PilotExperiment\MotorLearning\20251210_SHF04_Session2")

collision_run_label = SESSION_DIR.name  # or str(SESSION_DIR) also works


comments_df = read_trial_comments(SESSION_DIR, trial_is_one_based=True, collision_run_label = collision_run_label)
print(comments_df)
col_out_path = SESSION_DIR / "SHF04_Session1_collisions.csv"
comments_df.to_csv(col_out_path, index=False)


    session  trial comment_label  comment_value
0         1      0          good            1.0
1         1      1          good            0.0
2         1      2          good            0.0
3         1      3          good            0.0
4         1      4          good            1.0
5         1      5          good            2.0
6         1      6          good            0.0
7         1      7          good            0.0
8         1      8          good            1.0
9         1      9          good            0.0
10        1     10          good            0.0
11        1     11          good            0.0
12        1     12          good            0.0
13        1     13          good            0.0
14        1     14          good            0.0
15        1     15          good            0.0
16        1     16          good            0.0
17        1     17          good            0.0
18        1     18          good            0.0
19        1     19          good        

In [59]:
# Trial events time stamps: using collision file to segment 
all_events_path = r"D:\Box Sync\Box Sync\00_BEAR_Lab\Projects\Kyler(Kihun) Hong\1st_Haptic Sleeve\PilotExperiment\MotorLearning\20251227_SHF05_Session3"
folder = Path(all_events_path).name

m = re.search(r"_(?P<pid>[A-Za-z]+[0-9]+)_Session(?P<session>\d+)", folder, re.IGNORECASE)
if not m:
    raise ValueError(f"Could not parse pid/session from folder name: {folder}")

pid = m.group("pid")
session = int(m.group("session"))


events_file = glob.glob(os.path.join(all_events_path, "*_combined_events.csv"))
collision_files = glob.glob(os.path.join(all_events_path, f"{pid}_Session{session}_collisions.csv"))
print(collision_files)

if events_file and os.path.isfile(events_file[0]):
    print("Found:", events_file[0])
else:
    print("Not found:", events_file)
    events_file = glob.glob(os.path.join(all_events_path, "*_events.csv"))

collision_df = pd.read_csv(collision_files[0])
events_df = pd.read_csv(events_file[0])   

def trial_timestamps_from_events_with_fail_trials(
    events_df: pd.DataFrame,
    collision_df: pd.DataFrame,
    trial_col: str = "trial",
    event_col: str = "event",
    time_col: str = "ros_time",
    comment_col: str = "comment_label",
    fail_value: str = "fail",
):
    if comment_col in collision_df.columns:
        fail_mask = (
            collision_df[comment_col]
            .astype(str)
            .str.strip()
            .str.lower()
            .eq(str(fail_value).lower())
        )
        fail_trials = set(collision_df.loc[fail_mask, trial_col].dropna().tolist())
    else:
        fail_trials = set()
    out = []

    # iterate trials based on events_df order
    for trial, g in events_df.groupby(trial_col, sort=False):
        # If fail -> only save trial + fail
        if trial in fail_trials:
            out.append({trial_col: trial, "outcome": "fail"})
            continue

        g = g.sort_values(time_col)

        share_times = g.loc[g[event_col] == "share_button", time_col]
        rec_times   = g.loc[g[event_col] == "start_recording", time_col]
        sq_times    = g.loc[g[event_col] == "square_button", time_col]

        # start: prefer share_button, else start_recording
        if len(share_times) > 0:
            start_time = share_times.iloc[0]
        elif len(rec_times) > 0:
            start_time = rec_times.iloc[0]
        else:
            start_time = pd.NA

        # end: take LAST square_button after start_time (fallback: last square_button)
        if len(sq_times) == 0:
            end_time = pd.NA
        else:
            if pd.isna(start_time):
                end_time = sq_times.iloc[-1]
            else:
                after = sq_times[sq_times >= start_time]
                end_time = after.iloc[-1] if len(after) > 0 else sq_times.iloc[-1]

        out.append({trial_col: trial, "outcome": "ok", "start_time": start_time, "end_time": end_time})

    return pd.DataFrame(out)

trial_ts = trial_timestamps_from_events_with_fail_trials(
    events_df=events_df,
    collision_df=collision_df,
    trial_col="trial",
    event_col="event",
    time_col="ros_time",
    comment_col="comment_label",
    fail_value="fail"
)
out_dir = Path(all_events_path) 
out_dir.mkdir(parents=True, exist_ok=True)

# --- build a nice filename ---
out_csv = out_dir / f"{pid}_Session{session}_trial_timestamps.csv"

# --- save ---
trial_ts.to_csv(out_csv, index=False)



['D:\\Box Sync\\Box Sync\\00_BEAR_Lab\\Projects\\Kyler(Kihun) Hong\\1st_Haptic Sleeve\\PilotExperiment\\MotorLearning\\20251227_SHF05_Session3\\SHF05_Session3_collisions.csv']
Found: D:\Box Sync\Box Sync\00_BEAR_Lab\Projects\Kyler(Kihun) Hong\1st_Haptic Sleeve\PilotExperiment\MotorLearning\20251227_SHF05_Session3\SHF05_Session3_combined_events.csv


In [61]:
# Data segmentation
ROOT1 = Path(r"D:\Box Sync\Box Sync\00_BEAR_Lab\Projects\Kyler(Kihun) Hong\1st_Haptic Sleeve\PilotExperiment\MotorLearning\20251227_SHF05_Session3\Rosbag")
Rosfolder = ROOT1.parent.name
print(Rosfolder)
m = re.search(r"_(?P<pid>[A-Za-z]+[0-9]+)_Session(?P<session>\d+)", Rosfolder, re.IGNORECASE)
if not m:
    raise ValueError(f"Could not parse pid/session from folder name: {Rosfolder}")
pid = m.group("pid") 
print(pid[-1])
session = int(m.group("session"))
collision_files = glob.glob(os.path.join(all_events_path, f"{pid}_Session{session}_collisions.csv"))
TF_GLOB = f"SHF0{pid[-1]}_Session{session}_*_tf"             # matches 000_tf, 001_tf, ...
TOPIC_SUBSTRINGS = [
    "vx300s-commands-joy_processed",
    "vx300s-gripper_pos",
    "vx300s-gripper_vel"   # common spelling (in case)
]

TRIAL_IS_ONE_BASED = True

def pick_time_col(df: pd.DataFrame):
    """Try common time columns from bag conversions."""
    candidates = [
        "Time"
    ]
    for c in candidates:
        if c in df.columns:
            return c
    # last resort: any column containing 'time' or 'stamp'
    for c in df.columns:
        lc = c.lower()
        if "time" in lc or "stamp" in lc:
            return c
    raise ValueError(f"No time column found. Columns={list(df.columns)[:30]}...")

def find_topic_csvs(trial_folder: Path):
    """Return {substring: [csv_paths...]} for the substrings we care about."""
    csvs = list(trial_folder.glob("*.csv"))
    hits = {}
    for sub in TOPIC_SUBSTRINGS:
        matches = [p for p in csvs if sub in p.name]
        if matches:
            hits[sub] = matches
    return hits


def folder_index_from_name(folder: Path):
    # expects ..._000_tf
    m = re.search(r"_(\d{3})_tf$", folder.name)
    if not m:
        return None
    return int(m.group(1))

def segment_csv(in_path: Path, out_path: Path, start_time, end_time):
    df = pd.read_csv(in_path)
    tcol = pick_time_col(df)

    # Ensure numeric
    df[tcol] = pd.to_numeric(df[tcol], errors="coerce")

    seg = df[(df[tcol] >= start_time) & (df[tcol] <= end_time)].copy()
    out_path.parent.mkdir(parents=True, exist_ok=True)
    seg.to_csv(out_path, index=False)
    return len(seg), tcol

def segment_all_trials (ts: pd.DataFrame, root: Path): 
    trial_folders = sorted(root.glob(TF_GLOB))
    print(f"Found {len(trial_folders)} trial folders to segment."   )
    for folder in trial_folders:
        idx = folder_index_from_name(folder)
        if idx is None:
            print(f"Skipping folder with unexpected name: {folder}")
            continue
        trial_num = idx  if TRIAL_IS_ONE_BASED else idx
        row = ts.loc[ts['trial'] == trial_num]
        if row.empty: 
            print(f"No timestamp data for trial {trial_num}, skipping folder {folder}")
            continue
        start_time = row['start_time'].values[0]
        end_time = row['end_time'].values[0]    
        
        if pd.isna(start_time) or pd.isna(end_time):
            print(f"[WARN] Missing start/end for trial={trial_num} -> start={start_time}, end={end_time}")
            continue
        hits = find_topic_csvs(folder)
        if not hits:
            print(f"[WARN] No matching topic CSVs found in {folder.name}")
            continue

        out_dir = folder / "segmented"
        print(f"\nTrial {trial_num} ({folder.name}) start={start_time} end={end_time}")

        for sub, paths in hits.items():
            for p in paths:
                out_path = out_dir / p.name.replace(".csv", "_seg.csv")
                n, tcol = segment_csv(p, out_path, start_time, end_time)
                print(f"  - {p.name}  -> {out_path.name}  rows={n}  (time_col={tcol})")


segment_all_trials(trial_ts, ROOT1)

20251227_SHF05_Session3
5
Found 35 trial folders to segment.

Trial 0 (SHF05_Session3_000_tf) start=1766893232.2613852 end=1766893287.1180315
  - vx300s-commands-joy_processed.csv  -> vx300s-commands-joy_processed_seg.csv  rows=4474  (time_col=Time)
  - vx300s-gripper_pos.csv  -> vx300s-gripper_pos_seg.csv  rows=549  (time_col=Time)
  - vx300s-gripper_vel.csv  -> vx300s-gripper_vel_seg.csv  rows=549  (time_col=Time)

Trial 1 (SHF05_Session3_001_tf) start=1766893301.4101086 end=1766893351.8069413
  - vx300s-commands-joy_processed.csv  -> vx300s-commands-joy_processed_seg.csv  rows=4047  (time_col=Time)
  - vx300s-gripper_pos.csv  -> vx300s-gripper_pos_seg.csv  rows=504  (time_col=Time)
  - vx300s-gripper_vel.csv  -> vx300s-gripper_vel_seg.csv  rows=504  (time_col=Time)

Trial 2 (SHF05_Session3_002_tf) start=1766893365.203075 end=1766893442.207939
  - vx300s-commands-joy_processed.csv  -> vx300s-commands-joy_processed_seg.csv  rows=5921  (time_col=Time)
  - vx300s-gripper_pos.csv  -> vx3