In [1]:
#!/usr/bin/env python3
import os
import re
import sys
import numpy as np
import pyxdf
from scipy.io import savemat

In [2]:
# ------------------ USER SETTINGS ------------------
XDF_PATH = "./Data/17_Mireia/Subject17_Mireia_Javad_Shimmer_included.xdf"          # <-- change this
OUT_DIR = "segmented"
WINDOW_SEC = 58.0
participant_id = "17"
# If you want to force exact stream names, set these (otherwise leave None)
FORCE_EEG_STREAM_NAME = None            # e.g. "EEG"
FORCE_FNIRS_STREAM_NAME = None          # e.g. "NIRS"
FORCE_MARKER_STREAM_NAME = None         # e.g. "Game_Markers"

In [3]:
# ---------------------------------------------------

def stream_name(stream):
    return stream.get("info", {}).get("name", [""])[0]

def stream_type(stream):
    return stream.get("info", {}).get("type", [""])[0]

def get_channel_labels(stream):
    try:
        chs = stream["info"]["desc"][0]["channels"][0]["channel"]
        return [ch.get("label", [""])[0] for ch in chs]
    except Exception:
        return None

def slice_stream_by_time(stream, t0, t1):
    times = np.asarray(stream["time_stamps"])
    data = np.asarray(stream["time_series"])
    mask = (times >= t0) & (times <= t1)
    return data[mask], times[mask]

def pick_stream(streams, force_name, kind):
    """
    kind in {'markers','eeg','fnirs'}
    """
    if force_name:
        for s in streams:
            if stream_name(s) == force_name:
                return s
        raise RuntimeError(f"Could not find {kind} stream with name='{force_name}'")

    # heuristics
    for s in streams:
        name = stream_name(s)
        stype = stream_type(s)
        name_l = name.lower()
        stype_u = stype.upper()

        if kind == "markers":
            # common for LabRecorder marker streams: type "Markers"
            if stype_u == "MARKERS" or "marker" in name_l:
                return s

        if kind == "eeg":
            # common: type "EEG" or name contains eeg
            if stype_u == "EEG" or re.search(r"\beeg\b", name_l):
                return s

        if kind == "fnirs":
            # common: type NIRS / FNIRS or name contains nirs/fnirs
            if stype_u in ["NIRS", "FNIRS"] or re.search(r"(fnirs|nirs|nirstar)", name_l):
                return s

    return None

def flatten_marker_labels(markers_time_series):
    """
    LabRecorder markers are often stored as [['label'], ['label2'], ...]
    """
    if len(markers_time_series) == 0:
        return []
    if isinstance(markers_time_series[0], list):
        return [m[0] if m else "" for m in markers_time_series]
    return [str(m) for m in markers_time_series]

In [5]:
if not os.path.exists(XDF_PATH):
    print(f"ERROR: XDF file not found: {XDF_PATH}")
    sys.exit(1)

os.makedirs(OUT_DIR, exist_ok=True)

print(f"Loading XDF: {XDF_PATH}")
streams, header = pyxdf.load_xdf(XDF_PATH)

print("\nStreams found:")
for i, s in enumerate(streams):
    print(f"  [{i}] name='{stream_name(s)}' type='{stream_type(s)}' "
          f"samples={len(s.get('time_stamps', []))}")

Loading XDF: ./Data/17_Mireia/Subject17_Mireia_Javad_Shimmer_included.xdf

Streams found:
  [0] name='FMS_Score' type='Survey' samples=8
  [1] name='Coaster' type='Object' samples=54411
  [2] name='HMD_MotionData' type='VR' samples=54411
  [3] name='Game_Markers' type='Markers' samples=29
  [4] name='Photon_Cap_C2022044_STATS' type='NIRS' samples=7211
  [5] name='actiCHampMarkers-24020270' type='Markers' samples=0
  [6] name='Photon_Cap_C2022044_RAW' type='NIRS' samples=7211
  [7] name='cortivision_markers_mirror' type='Markers' samples=0
  [8] name='actiCHamp-24020270' type='EEG' samples=465498
  [9] name='Shimmer_8462' type='Sensor_Data' samples=102612


In [7]:
#markers_stream = pick_stream(streams, FORCE_MARKER_STREAM_NAME, "markers")
markers_stream = streams[3]

if markers_stream is None:
    raise RuntimeError("Could not auto-detect marker stream. Set FORCE_MARKER_STREAM_NAME.")
if eeg_stream is None:
    raise RuntimeError("Could not auto-detect EEG stream. Set FORCE_EEG_STREAM_NAME.")

In [8]:
# ---- Extract RollerCoasterStarted markers ----
markers_times = np.asarray(markers_stream["time_stamps"])
markers_labels = flatten_marker_labels(markers_stream["time_series"])

start_pat = re.compile(r"^RollerCoasterStarted\b", re.IGNORECASE)
attempt_pat = re.compile(r"attempt=(\d+)", re.IGNORECASE)

In [9]:
markers_labels

['SurveySubmitted|session=RollerCoasterVR|user=1|attempt=0',
 'RollerCoasterStarted|session=RollerCoasterVR|user=1|attempt=1',
 'SurveyStarted|session=RollerCoasterVR|user=1|attempt=1',
 'RollerCoasterFinished|session=RollerCoasterVR|user=1|attempt=1',
 'SurveySubmitted|session=RollerCoasterVR|user=1|attempt=1',
 'RollerCoasterStarted|session=RollerCoasterVR|user=1|attempt=2',
 'SurveyStarted|session=RollerCoasterVR|user=1|attempt=2',
 'RollerCoasterFinished|session=RollerCoasterVR|user=1|attempt=2',
 'SurveySubmitted|session=RollerCoasterVR|user=1|attempt=2',
 'RollerCoasterStarted|session=RollerCoasterVR|user=1|attempt=3',
 'SurveyStarted|session=RollerCoasterVR|user=1|attempt=3',
 'RollerCoasterFinished|session=RollerCoasterVR|user=1|attempt=3',
 'SurveySubmitted|session=RollerCoasterVR|user=1|attempt=3',
 'RollerCoasterStarted|session=RollerCoasterVR|user=1|attempt=4',
 'SurveyStarted|session=RollerCoasterVR|user=1|attempt=4',
 'RollerCoasterFinished|session=RollerCoasterVR|user=1|

In [10]:
starts = []
for t, lab in zip(markers_times, markers_labels):
    if start_pat.search(lab):
        m = attempt_pat.search(lab)
        attempt = int(m.group(1)) if m else None
        starts.append((float(t), lab, attempt))

In [12]:
if not starts:
    # helpful debug
    uniq = sorted(set(markers_labels))
    print("\nNo RollerCoasterStarted markers found.")
    print("Unique markers (first 60):")
    for u in uniq[:60]:
        print(" ", u)
    sys.exit(1)

starts.sort(key=lambda x: x[0])
print(f"\nFound {len(starts)} RollerCoasterStarted markers. Segmenting {WINDOW_SEC} s after each one...")



Found 6 RollerCoasterStarted markers. Segmenting 58.0 s after each one...


In [13]:
Hmd_stream = streams[2]

In [16]:
Hmd_stream['time_stamps'].shape

(54411,)

In [17]:
Hmd_stream['time_series'].shape

(54411, 11)

In [21]:
# ---- Segment and save (HMD) ----
for idx, (t0, lab, attempt) in enumerate(starts, start=1):
    t1 = t0 + WINDOW_SEC

    # Slice HMD stream
    hmd_data, hmd_t = slice_stream_by_time(Hmd_stream, t0, t1)

    mdict = {
        "segment_index": np.array([[idx]]),
        "attempt": np.array([[attempt if attempt is not None else -1]]),
        "t_start": np.array([[t0]]),
        "t_end": np.array([[t1]]),
        "marker": np.array([lab], dtype=object),

        "HMD": {
            "data": hmd_data,   # samples x 11 (or whatever your channel count is)
            "t": hmd_t,         # samples
            "chan_labels": np.array(hmd_labels if "hmd_labels" in globals() and hmd_labels else [], dtype=object),
            "stream_name": np.array([stream_name(Hmd_stream)], dtype=object),
            "stream_type": np.array([stream_type(Hmd_stream)], dtype=object),
        }
    }


    # Filename
    if attempt is not None:
        fname = f"P{participant_id}_round_{attempt:02d}_HMD.mat"
    else:
        fname = f"segment_{idx:02d}_{int(WINDOW_SEC)}s_after_{lab}_HMD.mat"

    out_path = os.path.join(OUT_DIR, fname)
    savemat(out_path, mdict, do_compression=True)

    msg = f"Saved {out_path} | HMD samples={len(hmd_t)}"

    print(msg)

print("\nDone.")

Saved segmented\P17_round_01_HMD.mat | HMD samples=3333
Saved segmented\P17_round_02_HMD.mat | HMD samples=3470
Saved segmented\P17_round_03_HMD.mat | HMD samples=3273
Saved segmented\P17_round_04_HMD.mat | HMD samples=3443
Saved segmented\P17_round_05_HMD.mat | HMD samples=3258
Saved segmented\P17_round_06_HMD.mat | HMD samples=3469

Done.


In [31]:
import os
import numpy as np
import pandas as pd

column_map = {
    "vx": 0, "vy": 1, "vz": 2,
    "ax": 3, "ay": 4, "az": 5,
    "framerate": 6, "displayfrequency": 7,
    "render": 8, "timewrap": 9, "postpresent": 10
}

def hmd_segment_to_df(hmd_data, hmd_t, column_map, time_col="time_stamps"):
    cols = list(column_map.keys())
    idxs = [column_map[c] for c in cols]

    seg_df = pd.DataFrame(hmd_data[:, idxs], columns=cols)
    seg_df.insert(0, time_col, hmd_t)
    return seg_df

# ---- Segment and save (HMD -> DataFrame files) ----
os.makedirs(OUT_DIR, exist_ok=True)

for idx, (t0, lab, attempt) in enumerate(starts, start=1):
    t1 = t0 + WINDOW_SEC

    # Slice HMD stream
    hmd_data, hmd_t = slice_stream_by_time(Hmd_stream, t0, t1)

    # Build dataframe for this segment
    seg_df = hmd_segment_to_df(hmd_data, hmd_t, column_map, time_col="time_stamps")

    # Add metadata columns (repeated per row)

    # Filename
    if attempt is not None:
        fname = f"P{participant_id}_round_{attempt:02d}_HMD.parquet"  # or .csv
    else:
        fname = f"segment_{idx:02d}_{int(WINDOW_SEC)}s_after_{lab}_HMD.parquet"

    out_path = os.path.join(OUT_DIR, fname)

    # Save (pick one)
    seg_df.to_parquet(out_path, index=False)     # best for speed/size
    # seg_df.to_csv(out_path, index=False)       # simpler, larger

    print(f"Saved {out_path} | HMD samples={len(seg_df)}")

print("\nDone.")

Saved segmented\P17_round_01_HMD.parquet | HMD samples=3333
Saved segmented\P17_round_02_HMD.parquet | HMD samples=3470
Saved segmented\P17_round_03_HMD.parquet | HMD samples=3273
Saved segmented\P17_round_04_HMD.parquet | HMD samples=3443
Saved segmented\P17_round_05_HMD.parquet | HMD samples=3258
Saved segmented\P17_round_06_HMD.parquet | HMD samples=3469

Done.
