# Convert raw EEG data to BIDS

This script will take raw EEG data files stored in the `sourcedata` folder (as saved by OpenVibe, PyCorder, NeStation, or ASAlab (after conversion to EEGLAB format)) and convert them to BIDS-compatible files and folder structure (saved to `rawdata`), with all metadata.

You must save the raw EEG files in the `sourcedata/sub-00x/eeg` subfolder of your study's BIDS root folder.

All study-specific configuration details should be defined in the `config.yml` file and not in this script. 

The only thing to change in this script is to list the subject IDs of the subjects whose data you wish to convert, in the cell below. Then just run all additional cells.


---
Copyright 2023 [Aaron J Newman](https://github.com/aaronjnewman), [NeuroCognitive Imaging Lab](http://ncil.science), [Dalhousie University](https://dal.ca)

Released under the [The 3-Clause BSD License](https://opensource.org/licenses/BSD-3-Clause)

---

In [None]:
from os import path as op
import os
import json
# import configparser
import yaml
try:
    from yaml import CLoader as Loader, CDumper as Dumper
except ImportError:
    from yaml import Loader, Dumper

import shutil
from glob import glob
from pathlib import Path

import numpy as np 
# from matplotlib import pyplot as plt
import mne
# from mne.datasets import eegbci
mne.set_log_level('error')

from mne_bids import write_raw_bids, BIDSPath, update_sidecar_json
from mne_bids.stats import count_events

## Study Parameters

Will import study-level parameters from `config.yml` in `bids_root`

In [None]:
# this shouldn't change if you run this script from its default location in code/import
bids_root = '../..'

cfg_file = op.join(bids_root, 'config.yml')
with open(cfg_file, 'r') as f:
    config = yaml.load(f, Loader=Loader)

study_name = config['study_name']
task = config['task']
reb_info = config['reb_info']
authors = config['authors']
line_freq = int(config['line_freq']) 
instn = config['instn']
instn_addr = config['instn_addr']
instn_dept = config['instn_dept']
funding = config['funding'] 
license = config['license']
data_type = config['data_type']
amp_mfr = config['amp_mfr']
amp_model = config['amp_model']
sw_filt = config['sw_filt']
hw_filt = config['hw_filt']
eeg_ref = config['eeg_ref']
eeg_ground = config['eeg_ground']
cap_mfr = config['cap_mfr']
cap_model = config['cap_model']
eog = config['eog']
eog_ch = []
# awkwardly extract EOG channels to a flat list
for e in eog:
    for i in e.values():
        for j in i:
            eog_ch = eog_ch + j.split(', ')            
montage_fname = config['montage_fname']
events = config['events']
# events_of_interest = {}
# for e in events:
#     events_of_interest.update(e)
events_extra = {'BAD boundary':99, 'EDGE boundary':999}
prefix = config['source_prefix']    
eeg_extn = 'raw'
if config['drop_ch']:
    drop_ch = config['drop_ch'].split(', ')

## Paths

In [None]:
# source_path is where the input source (raw) files live
source_path = op.join(bids_root, 'sourcedata')

# raw_path is where the results of running this script will be saved
raw_path = op.join(bids_root, 'rawdata')

out_path = op.join(bids_root, 'derivatives', 'preprocessing')
if Path(out_path).exists() == False:
    Path(out_path).mkdir(parents=True)

log_path= op.join(bids_root, out_path, 'logs')
if Path(log_path).exists() == False:
    Path(log_path).mkdir(parents=True)
                  

In [None]:
# convert all participants in sourcedata
in_subjs = sorted([s[-7:] for s in glob(source_path + '/' + prefix + '*')])

# alternatively, list the names of subject IDs to convert
# in_subjs = []

In [None]:
# output data files will convert file names to generic sub-001, 002, ... names 
# _ASSUMES__ last two characters in in_subjs are the id number; prepends a leading 0
# BIDS will prepend 'sub-' to the numbers in out_subjs
out_subjs = [ '0' + s[-2:] for s in in_subjs]

## Import data, and covert to BIDS

In [None]:
for subject in in_subjs:
    print(subject)
    # subject_id is for naming output files
    subject_id = '0' + subject[-2:]

    # --- Import raw files ---
    source_subj = op.join(source_path, subject, config['data_type'])
    raw_fnames =  glob(source_subj + '/*.' + eeg_extn)

   if len(raw_fnames) > 1:
        # if multiple input files, we need to concatenate them
        raw_list = []
        for f in raw_fnames:
            # check if file is EEGLAB format
            if eeg_extn == 'set':
                raw_list.append(mne.io.read_raw_eeglab(f, 
                                                       preload=False, 
                                                       eog=config['eog']).set_montage(montage_fname)
                               )
            elif eeg_extn == 'raw':    
                raw_list.append(mne.io.read_raw_egi(f, 
                                                    preload=False, 
                                                    eog=eog_ch,
                                                    misc=['E129'],  
                                                    exclude=[], 
                                                ).set_montage(montage_fname)
                            )
           # otherwise default to BrainVision format    
            else:
                raw_list.append(mne.io.read_raw_brainvision(f, 
                                                       preload=False, 
                                                       eog=config['eog'],
                                                       misc=['Digi']).set_montage(montage_fname)
                               )
        raw = mne.concatenate_raws(raw_list)    

        # mne-bids doesn't handle concatenated raw files, unless we save them as .fif then reload:
        raw_tmp_fname = source_path + '/' + subject + '-raw.fif'
        raw.save(raw_tmp_fname, overwrite=True)
        raw = mne.io.read_raw(raw_tmp_fname)
        
    else:
        if eeg_extn == 'set':
            raw = mne.io.read_raw_eeglab(raw_fnames.pop(), 
                                  preload=False, 
                                  eog=config['eog']) #.set_montage(montage_fname)
               
       elif eeg_extn == 'raw':
            raw = mne.io.read_raw_egi(f, 
                            preload=False, 
                            eog=eog_ch,
                            misc=['E129'],  
                            exclude=[], 
                            ).set_montage(montage_fname)

       else:
            raw = (mne.io.read_raw_brainvision(raw_fnames.pop(), 
                                               preload=False,
                                               eog=config['eog'],
                                               misc=['Digi']).set_montage(montage_fname)

                  )
        
    # --- Event Processing ---
    # We don't actually need to extract events for BIDS conversion, 
    #  however we do this to check what system data were acquired on
    #  so we can remove extraneous channels from OpenVibe-acquired data
    events, event_dict = mne.events_from_annotations(raw)
    
    # For 32 channel Booth room TMSi OpenVibe acquired data   
    # comment out if using 64 channel data!
    if list(event_dict.keys())[0].split('/')[1].split('_')[0] == 'OVTK':
        if drop_ch:
            raw = raw.drop_channels(drop_ch)
                    
    # --- Make BIDS ---
    bids_path = BIDSPath(subject=subject_id, 
                         task=config['task'], 
                         datatype=config['data_type'],
                         root=op.join(raw_path))

    write_raw_bids(raw, bids_path, 
                   events_data=events, event_id=event_dict, 
                   overwrite=True)

    # Update metadata with additional info
    entries = { 'Manufacturer':config['amp_mfr'],
                'ManufacturersModelName':config['amp_model'],
                'PowerLineFrequency':config['line_freq'],
                'SoftwareFilters':config['sw_filt'],
                'HardwareFilters':config['hw_filt'],
                'EEGReference':config['eeg_ref'],
                'EEGGround':config['eeg_ground'],
                'CapManufacturer':config['cap_mfr'],
                'CapManufacturersModelName':config['cap_model'],
                'EthicsApprovals':config['reb_info'],
                'InstitutionName':config['instn'],
                'InstitutionAddress':config['instn_addr'],
                'InstitutionalDepartmentName':config['instn_dept'],
                }

    sidecar_path = bids_path.copy().update(extension='.json')
    update_sidecar_json(bids_path=sidecar_path, entries=entries)
    
    # Clean up
    if len(raw_fnames) > 1:
        os.remove(raw_tmp_fname)

# Merge README file generated by write_raw_bids with the eixting README.md        
with open(raw_path + '/README') as fi:
    lines = fi.read().splitlines()    

with open(raw_path + '/README.md', 'a') as fo:
    for li in lines:
        fo.write('\n')
        fo.write(li)        

os.remove(raw_path + '/README')        