# Move raw EEG data to BIDS sourcedata folder

This script copies braw EEG data from wherever it lives on the server to this BIDS folder's `sourcedata` subfolder. 

It will require customization for each data set being imported, due to the varying ways in which old studies were organized. The main thing that would need to be changed relates to the folder structure of the old data. This template assumes that data are organized by subject, with each subject having a folder named by their subject ID, and within that subfolders for diferent sessions. If your data are organized differently, you will need to modify the code accordingly (e.g., remove `ses` from `in_files`).

Study-specific configuration details (e.g., study name, data type) are defined in the `config.yml` file and not in this script. 


---
(c) 2023 Aaron J. Newman, NeuroCogntive Imaging Lab, Dalhousie University

Released under a [CC BY 4.0](https://creativecommons.org/licenses/by/4.0/) license.

---

In [None]:
from os import path as op
import os
import json
# import configparser
import yaml
try:
    from yaml import CLoader as Loader, CDumper as Dumper
except ImportError:
    from yaml import Loader, Dumper
import random

import shutil
from glob import glob
from pathlib import Path

import numpy as np 
# from matplotlib import pyplot as plt
import mne
# from mne.datasets import eegbci
mne.set_log_level('error')

from mne_bids import write_raw_bids, BIDSPath, update_sidecar_json
from mne_bids.stats import count_events

## Study Parameters

Will import study-level parameters from `config.yml` in `bids_root`

In [None]:
# this shouldn't change if you run this script from its default location in code/import
bids_root = '../..'

cfg_file = op.join(bids_root, 'config.yml')
with open(cfg_file, 'r') as f:
    config = yaml.load(f, Loader=Loader)

study_name = config['study_name']
task = config['TaskName']
data_type = 'eeg'
raw_extn = config['raw_extn']
sessions = config['sessions']
prefix = config['study_name']

## Paths

In [None]:
# orig_path is where the input source (raw) files live
# This needs to be edited according to the study data naming convention
orig_path = op.join(bids_root, '..', '..', prefix) 

# source_path is where the results of running this script will be saved
source_path = op.join(bids_root, 'sourcedata')         

In [None]:
# convert all participants in sourcedata
# This needs to be edited according to the study data naming convention
in_subjs = [s.split('/')[-1] for s in glob(op.join(orig_path) + '/' + prefix + '_*')]
in_subjs

## Copy EEG data from original location to sourcedata

Don't change names in case that messes with teh linking between set and fdt files

In [None]:
for subject in in_subjs:
    for ses in sessions:
        print(subject, ses)
        
        # check if we have data for this subject/session (maybe missing sessions)
        in_files = glob(op.join(orig_path) + '/' + subject + '*' + ses + '*')
        if len(in_files) > 0:
            if raw_extn == 'set':
                fdt_files = glob(op.join(orig_path, subject) + '/*.fdt')
                set_files = glob(op.join(orig_path, subject) + '/*.set')
                eeg_files = fdt_files + set_files

            elif raw_extn == 'vhdr':
                eeg_files = glob(op.join(orig_path) + '/' + subject + '*' + ses + '*')
        
            # participant_id is for naming output files
            participant_id = 'sub-0' + subject[-2:]

            dest_path = op.join(source_path, participant_id, 'ses-' + str(ses),  data_type)
            if Path(dest_path).exists() == False:
                Path(dest_path).mkdir(parents=True)

            print('Source files:', eeg_files)
            print('Destination path:', dest_path)

            for f in eeg_files:
                shutil.copy(f, dest_path)