# XDF2BIDS


In [10]:
import os
import os.path as op
import shutil
import mne
import pyxdf
import pandas as pd
import json
import matplotlib.pyplot as plt
import numpy as np

from mne_bids import BIDSPath, print_dir_tree, write_raw_bids
from mne_bids.stats import count_events

In [11]:
#set directories
notebook_dir = os.path.dirname(os.path.abspath('__file__')) if '__file__' in globals() else os.getcwd()
project_root = os.path.dirname(notebook_dir)  # Go up one level from notebook location
data_dir = op.join(project_root, 'sourcedata')
config_xdf_path = op.join(data_dir, 'pilot', 'sub-P01', 'brain', 'sub-P01_task-natural-walk_eeg.xdf')

# Check if directory exists, create if needed
if not op.exists(data_dir):
    print(f"Warning: {data_dir} does not exist. You may need to create it or adjust the path.")
    # Fallback to current directory + sourcedata
    data_dir = op.join(os.getcwd(), 'sourcedata')
    print(f"Using fallback path: {data_dir}")

print(f"Data directory set to: {data_dir}")
print(f"Project root set to: {project_root}")
print(f"Config XDF file path set to: {config_xdf_path}")

Data directory set to: c:\MoBI\sourcedata
Project root set to: c:\MoBI
Config XDF file path set to: c:\MoBI\sourcedata\pilot\sub-P01\brain\sub-P01_task-natural-walk_eeg.xdf


In [12]:
# Load XDF file
streams, header = pyxdf.load_xdf(config_xdf_path)

# Identify EEG stream, Impedance stream, and Markers stream
eeg_stream = None
impedance_stream = None
markers_stream = None

# Examine each stream to identify its type so we dont have to hardcode stream indices
print("Available streams in XDF file:")
for idx, s in enumerate(streams):
    s_name = s['info']['name'][0] if 'name' in s['info'] else 'Unknown'
    s_type = s['info']['type'][0] if 'type' in s['info'] else 'Unknown'
    print(f"  Stream {idx}: Name = '{s_name}', Type = '{s_type}'")

Available streams in XDF file:
  Stream 0: Name = 'Android_Impedances - PROX_054', Type = 'Impedance values'
  Stream 1: Name = 'Android_EEG - PROX_054', Type = 'EEG'
  Stream 2: Name = 'Presentation', Type = 'Markers'


In [13]:
for stream in streams:
    stream_type = stream['info']['type'][0].lower()
    stream_name = stream['info']['name'][0].lower()

    if 'eeg' in stream_type or 'eeg' in stream_name:
        eeg_stream = stream
        print(f"Found EEG stream: {stream['info']['name'][0]}")
    elif 'impedance' in stream_type or 'impedance' in stream_name:
        impedance_stream = stream
        print(f"Found Impedance stream: {stream['info']['name'][0]}")
    elif 'marker' in stream_type or 'marker' in stream_name:
        markers_stream = stream
        print(f"Found Markers stream: {stream['info']['name'][0]}")

# Scrape information from the EEG stream to configure it
if eeg_stream:
    sfreq = float(eeg_stream['info']['nominal_srate'][0])
    
    # Extract channel labels 
    ch_labels_info = eeg_stream['info']['desc'][0]['channels'][0]['channel']
    ch_names = [ch['label'][0] for ch in ch_labels_info]
    n_chans = len(ch_names)

    print(f"\n--- EEG Stream Configuration ---")
    print(f"Sampling Frequency: {sfreq} Hz")
    print(f"Number of channels: {n_chans}")
    print(f"Channel names: {ch_names}")
else:
    print("\nEEG stream not found.")

if not impedance_stream:
    print("Impedance stream not found.")
if not markers_stream:
    print("Markers stream not found.")

Found Impedance stream: Android_Impedances - PROX_054
Found EEG stream: Android_EEG - PROX_054
Found Markers stream: Presentation

--- EEG Stream Configuration ---
Sampling Frequency: 250.0 Hz
Number of channels: 64
Channel names: ['Fp1', 'Fp2', 'F3', 'F4', 'C3', 'C4', 'P3', 'P4', 'O1', 'O2', 'F7', 'F8', 'T7', 'T8', 'P7', 'P8', 'AFz', 'Fz', 'Cz', 'Pz', 'FC1', 'FC2', 'CP1', 'CP2', 'FC5', 'FC6', 'CP5', 'CP6', 'FT9', 'FT10', 'TP7', 'TP8', 'F1', 'F2', 'C1', 'C2', 'P1', 'P2', 'AF3', 'AF4', 'FC3', 'FC4', 'CP3', 'CP4', 'PO3', 'PO4', 'F5', 'F6', 'C5', 'C6', 'P5', 'P6', 'AF7', 'AF8', 'FT7', 'FT8', 'TP9', 'TP10', 'PO7', 'PO8', 'PO9', 'PO10', 'CPz', 'POz']


In [14]:
# Create MNE Raw object from the EEG stream
if eeg_stream:
    eeg_data = eeg_stream['time_series'].T  # Transpose to shape (n_channels, n_times)
    eeg_timestamps = eeg_stream['time_stamps']
    
    # Create MNE Info object
    info = mne.create_info(ch_names=ch_names, sfreq=sfreq, ch_types='eeg')
    
    # Create Raw object
    raw = mne.io.RawArray(eeg_data, info)
    
    # Set EEG montage to PROX-64-montage in the root directory
    montage_path = op.join(project_root, 'utility', 'PROX-64-montage-dig.fif')
    if op.exists(montage_path):
        montage = mne.channels.read_dig_fif(montage_path)
        raw.set_montage(montage)
        print("\nMontage set to PROX-64-montage.")
    else:
        print(f"\nMontage file not found at {montage_path}. Please check the path.")
    
    # Parse markers stream and add as annotations
    if markers_stream:
        print("\n--- Processing Markers Stream ---")
        marker_data = markers_stream['time_series']
        marker_timestamps = markers_stream['time_stamps']
        
        # Convert markers to annotations
        annotations_onset = []
        annotations_duration = []
        annotations_description = []
        
        # Get the first EEG timestamp to calculate relative timing
        eeg_start_time = eeg_timestamps[0]
        
        for i, (marker_time, marker_value) in enumerate(zip(marker_timestamps, marker_data)):
            # Calculate onset relative to EEG start time
            onset = marker_time - eeg_start_time
            
            # Extract marker description (handle both string and list formats)
            if isinstance(marker_value, (list, np.ndarray)) and len(marker_value) > 0:
                description = str(marker_value[0])
            else:
                description = str(marker_value)
            
            annotations_onset.append(onset)
            annotations_duration.append(0.0)  # Point events have 0 duration
            annotations_description.append(description)
        
        # Create MNE Annotations object
        annotations = mne.Annotations(onset=annotations_onset,
                                     duration=annotations_duration,
                                     description=annotations_description)
        
        # Add annotations to raw object
        raw.set_annotations(annotations)
        
        print(f"Added {len(annotations)} annotations to raw object:")
        print(f"Unique markers: {set(annotations_description)}")
        print(f"First few annotations:")
        for i in range(min(5, len(annotations))):
            print(f"  {annotations.onset[i]:.3f}s: {annotations.description[i]}")
    else:
        print("\nNo markers stream found - no annotations added.")
        
    
    print("\n--- MNE Raw Object Info ---")
    print(raw.info)
    
    # Plot raw data for visual inspection
    raw.plot(n_channels=min(20, n_chans), duration=30, scalings='auto', title='Raw EEG Data', show=True, block=True)
    plt.show()

Creating RawArray with float64 data, n_channels=64, n_times=88799
    Range : 0 ... 88798 =      0.000 ...   355.192 secs
    Range : 0 ... 88798 =      0.000 ...   355.192 secs
Ready.
Ready.

Montage set to PROX-64-montage.

--- Processing Markers Stream ---
Added 318 annotations to raw object:
Unique markers: {'<pevent><etype>Picture</etype><ecode>1</ecode><unc>0.001336</unc></pevent>', '<pevent><etype>Picture</etype><ecode>1</ecode><unc>0.000251</unc></pevent>', '<pevent><etype>Picture</etype><ecode>2</ecode><unc>0.000197</unc></pevent>', '<pevent><etype>Picture</etype><ecode>1</ecode><unc>0.000147</unc></pevent>', '<pevent><etype>Picture</etype><ecode>2</ecode><unc>0.000193</unc></pevent>', '<pevent><etype>Picture</etype><ecode>2</ecode><unc>0.00022</unc></pevent>', '<pevent><etype>Picture</etype><ecode>1</ecode><unc>0.000208</unc></pevent>', '<pevent><etype>Picture</etype><ecode>2</ecode><unc>0.000254</unc></pevent>', '<pevent><etype>Picture</etype><ecode>2</ecode><unc>0.000186</u

  z = zscore(data, axis=1)


Channels marked as bad:
none


In [6]:
# Now we use MNE BIDS to convert the Raw object to BIDS format
bids_root = op.join(project_root, 'data', 'BIDS')
bids_path = BIDSPath(subject='P01', session='01', task='NaturalWalk', datatype='eeg', root=bids_root, )
write_raw_bids(raw, bids_path, overwrite=True, allow_preload=True, format='BrainVision')

Writing 'c:\MoBI\data\BIDS\participants.tsv'...
Writing 'c:\MoBI\data\BIDS\participants.json'...
Writing 'c:/MoBI/data/BIDS/sub-P01/ses-01/eeg/sub-P01_ses-01_space-CapTrak_electrodes.tsv'...
Writing 'c:/MoBI/data/BIDS/sub-P01/ses-01/eeg/sub-P01_ses-01_space-CapTrak_coordsystem.json'...
The provided raw data contains annotations, but you did not pass an "event_id" mapping from annotation descriptions to event codes. We will generate arbitrary event codes. To specify custom event codes, please pass "event_id".
Used Annotations descriptions: [np.str_('<pevent><etype>Picture</etype><ecode>1</ecode><unc>0.000128</unc></pevent>'), np.str_('<pevent><etype>Picture</etype><ecode>1</ecode><unc>0.000139</unc></pevent>'), np.str_('<pevent><etype>Picture</etype><ecode>1</ecode><unc>0.000141</unc></pevent>'), np.str_('<pevent><etype>Picture</etype><ecode>1</ecode><unc>0.000142</unc></pevent>'), np.str_('<pevent><etype>Picture</etype><ecode>1</ecode><unc>0.000143</unc></pevent>'), np.str_('<pevent><e

  write_raw_bids(raw, bids_path, overwrite=True, allow_preload=True, format='BrainVision')
  write_raw_bids(raw, bids_path, overwrite=True, allow_preload=True, format='BrainVision')


Writing 'c:\MoBI\data\BIDS\sub-P01\ses-01\sub-P01_ses-01_scans.tsv'...
Wrote c:\MoBI\data\BIDS\sub-P01\ses-01\sub-P01_ses-01_scans.tsv entry with eeg\sub-P01_ses-01_task-NaturalWalk_eeg.vhdr.


BIDSPath(
root: c:/MoBI/data/BIDS
datatype: eeg
basename: sub-P01_ses-01_task-NaturalWalk_eeg.vhdr)