In [15]:
!pip install dask distributed
!pip install "dask[dataframe]"
!pip install dask_ml
!pip install h5py
!pip install tensorflow-addons
!pip install mne
!pip install scot
!pip install pyprep



In [16]:
import numpy as np
import pandas as pd
import math
import matplotlib.pyplot as plt
from os import listdir
from os.path import isfile, join
import os
import datetime
import dask
from dask.distributed import Scheduler
from dask.distributed import Client
import dask.dataframe as dd
from dask_ml.model_selection import train_test_split
from random import shuffle, seed
import multiprocessing
import pywt
from scipy import fftpack
import random
from itertools import repeat
import scipy
import tensorflow as tf
import mne
import pyprep
import gc
from mne.preprocessing import ICA
import csv
import pandas as pd
import matplotlib.pyplot as plt
from mne.time_frequency.psd import psd_array_welch
import scot
from pyprep.prep_pipeline import PrepPipeline
from scipy.integrate import simps

import h5py



from sys import platform

In [17]:
# config, is a class meant to configure the processor, in here you can specify and control the entire preprocessing steps

# which disorder to process
disorders = ['HBN', 'MDD']

class config(object):
    def __init__(self):
        # folder name were the data should be read
        self.foldername = r''
        # leave empty
        self.datapath = ''
        # folder name where the results should be stored
        self.resultpath =r''
        # required for processing (do not remove)
        self.filename = ""
        # lower and upper frequency for band pass filter
        self.band_pass_filter = [1,70]
        # csv channels names. Required for some preprocessing steps (specific to HBN, do not change)
        self.csv_channel_names = ['E1','E2',\
                                  'E3','E4','E5','E6','E7','E8','E9','E10','E11','E12','E13','E14','E15','E16','E18','E19','E20','E21','E22','E23','E24','E25',\
                                  'E26','E27','E28','E29','E30','E31','E32','E33','E34','E35','E36','E37','E38','E39','E40','E41','E42','E44','E45','E46','E47',\
                                  'E50','E51','E52','E53','E54','E55','E57','E58','E59','E60','E61','E62','E64','E65','E66','E67','E69','E70','E71','E72','E74','E75',\
                                  'E76','E77','E78','E79','E80','E82','E83','E84','E85','E86','E87','E89','E90','E91','E92','E93','E95','E96','E97','E98','E100','E101',\
                                  'E102','E103','E104','E105','E106','E108','E109','E110','E111','E112','E114','E115','E116','E117','E118','E121','E122','E123','E124']
        # channels that EOG and ECG should be applied to (recommended to apply to all channels)
        self.ECG_channel =self.csv_channel_names
        self.EOG_channel = self.csv_channel_names
        
        # Wanted channels, will exclude any other channel in the data
        self.wanted_channels = self.csv_channel_names
        # original frequency
        self.sfreq = 500
        #resampiling frequency
        self.resample_freq = 256
        # Notch filter frequncy
        self.notch_frequncy = 60
        # psd resolution, which is the number of sample used in the psd computation
        # a large value would give a lot of noise, a low value will start to lose information
        # this is optimized manually by testing the output
        self.psd_resolution = 2500
        # percentage of overlap for PSD segments (to reduce noise)
        self.overlap_percentage = 0.5

        # motange is the channels standered positining (read the comment below for all options)
        self.montage = "GSN-HydroCel-128"
        '''
        standard_1005

        Electrodes are named and positioned according to the international 10-05 system (343+3 locations)

        standard_1020

        Electrodes are named and positioned according to the international 10-20 system (94+3 locations)

        standard_alphabetic

        Electrodes are named with LETTER-NUMBER combinations (A1, B2, F4, …) (65+3 locations)

        standard_postfixed

        Electrodes are named according to the international 10-20 system using postfixes for intermediate positions (100+3 locations)

        standard_prefixed

        Electrodes are named according to the international 10-20 system using prefixes for intermediate positions (74+3 locations)

        standard_primed

        Electrodes are named according to the international 10-20 system using prime marks (’ and ‘’) for intermediate positions (100+3 locations)

        biosemi16

        BioSemi cap with 16 electrodes (16+3 locations)

        biosemi32

        BioSemi cap with 32 electrodes (32+3 locations)

        biosemi64

        BioSemi cap with 64 electrodes (64+3 locations)

        biosemi128

        BioSemi cap with 128 electrodes (128+3 locations)

        biosemi160

        BioSemi cap with 160 electrodes (160+3 locations)

        biosemi256

        BioSemi cap with 256 electrodes (256+3 locations)

        easycap-M1

        EasyCap with 10-05 electrode names (74 locations)

        easycap-M10

        EasyCap with numbered electrodes (61 locations)

        EGI_256

        Geodesic Sensor Net (256 locations)

        GSN-HydroCel-32

        HydroCel Geodesic Sensor Net and Cz (33+3 locations)

        GSN-HydroCel-64_1.0

        HydroCel Geodesic Sensor Net (64+3 locations)

        GSN-HydroCel-65_1.0

        HydroCel Geodesic Sensor Net and Cz (65+3 locations)

        GSN-HydroCel-128

        HydroCel Geodesic Sensor Net (128+3 locations)

        GSN-HydroCel-129

        HydroCel Geodesic Sensor Net and Cz (129+3 locations)

        GSN-HydroCel-256

        HydroCel Geodesic Sensor Net (256+3 locations)

        GSN-HydroCel-257

        HydroCel Geodesic Sensor Net and Cz (257+3 locations)

        mgh60

        The (older) 60-channel cap used at MGH (60+3 locations)

        mgh70

        The (newer) 70-channel BrainVision cap used at MGH (70+3 locations)

        artinis-octamon

        Artinis OctaMon fNIRS (8 sources, 2 detectors)

        artinis-brite23

        Artinis Brite23 fNIRS (11 sources, 7 detectors)
        '''




In [18]:
"""Module for PREP pipeline."""
# this module is copied from prep pipeline offical code, slightly adjusted to out need in this pipeline
import mne
from mne.utils import check_random_state

from pyprep.find_noisy_channels import NoisyChannels
from pyprep.reference import Reference
from pyprep.removeTrend import removeTrend
from pyprep.utils import _set_diff, _union  # noqa: F401



class PrepPipeline:
    """Early stage preprocessing (PREP) of EEG data.

    This class implements the functionality  of the PREP (preprocessing
    pipeline) for EEG data described in [1]_.

    Parameters
    ----------
    raw : mne.io.Raw
        The data. Channel types must be correctly assigned (e.g.,
        ocular channels are assigned the type 'eog').
    prep_params : dict
        Parameters of PREP which include at least the following keys:

        - ref_chs : {list, 'eeg'}
            - A list of channel names to be used for rereferencing.
              Can be a str 'eeg' to use all EEG channels.
        - reref_chs : {list, 'eeg'}
            - A list of channel names to be used for line-noise removed,
              and referenced. Can be a str 'eeg' to use all EEG channels.
        - line_freqs : {np.ndarray, list}
            - list of floats indicating frequencies to be removed.
              For example, for 60Hz you may specify
              ``np.arange(60, sfreq / 2, 60)``. Specify an empty list to
              skip the line noise removal step.
        - max_iterations : int, optional
            - The maximum number of iterations of noisy channel removal to
              perform during robust referencing. Defaults to ``4``.
    montage : mne.channels.DigMontage
        Digital montage of EEG data.
    ransac : bool, optional
        Whether or not to use RANSAC for noisy channel detection in addition to
        the other methods in :class:`~pyprep.NoisyChannels`. Defaults to True.
    channel_wise : bool, optional
        Whether RANSAC should predict signals for chunks of channels over the
        entire signal length ("channel-wise RANSAC", see `max_chunk_size`
        parameter). If ``False``, RANSAC will instead predict signals for all
        channels at once but over a number of smaller time windows instead of
        over the entire signal length ("window-wise RANSAC"). Channel-wise
        RANSAC generally has higher RAM demands than window-wise RANSAC
        (especially if `max_chunk_size` is ``None``), but can be faster on
        systems with lots of RAM to spare. Has no effect if not using RANSAC.
        Defaults to ``False``.
    max_chunk_size : {int, None}, optional
        The maximum number of channels to predict at once during channel-wise
        RANSAC. If ``None``, RANSAC will use the largest chunk size that will
        fit into the available RAM, which may slow down other programs on the
        host system. If using window-wise RANSAC (the default) or not using
        RANSAC at all, this parameter has no effect. Defaults to ``None``.
    random_state : {int, None, np.random.RandomState}, optional
        The random seed at which to initialize the class. If random_state is
        an int, it will be used as a seed for RandomState.
        If None, the seed will be obtained from the operating system
        (see RandomState for details). Default is None.
    filter_kwargs : {dict, None}, optional
        Optional keywords arguments to be passed on to mne.filter.notch_filter.
        Do not set the "x", Fs", and "freqs" arguments via the filter_kwargs
        parameter, but use the "raw" and "prep_params" parameters instead.
        If None is passed, the pyprep default settings for filtering are used
        instead.
    matlab_strict : bool, optional
        Whether or not PyPREP should strictly follow MATLAB PREP's internal
        math, ignoring any improvements made in PyPREP over the original code
        (see :ref:`matlab-diffs` for more details). Defaults to False.

    Attributes
    ----------
    raw : mne.io.Raw
        The data including eeg and non eeg channels. It is unprocessed if
        accessed before the fit method, processed if accessed after a
        successful fit method.
    raw_eeg : mne.io.Raw
        The only-eeg part of the data. It is unprocessed if accessed before
        the fit method, processed if accessed after a successful fit method.
    raw_non_eeg : {mne.io.Raw, None}
        The non-eeg part of the data. It is not processed when calling
        the fit method. If the input was only EEG it will be None.
    noisy_channels_original : dict
       Detailed bad channels in each criteria before robust reference.
    noisy_channels_before_interpolation : dict
        Detailed bad channels in each criteria just before interpolation.
    noisy_channels_after_interpolation : dict
        Detailed bad channels in each criteria just after interpolation.
    bad_before_interpolation : list
        bad channels after robust reference but before interpolation
    EEG_before_interpolation : np.ndarray
        EEG data in uV before the interpolation
    reference_before_interpolation : np.ndarray
        Reference signal in uV before interpolation.
    reference_after_interpolation : np.ndarray
        Reference signal in uV after interpolation.
    interpolated_channels : list
        Names of the interpolated channels.
    still_noisy_channels : list
        Names of the noisy channels after interpolation.

    References
    ----------
    .. [1] Bigdely-Shamlo, N., Mullen, T., Kothe, C., Su, K. M., Robbins, K. A.
       (2015). The PREP pipeline: standardized preprocessing for large-scale
       EEG analysis. Frontiers in Neuroinformatics, 9, 16.

    """

    def __init__(
        self,
        raw,
        prep_params,
        montage,
        ransac=True,
        channel_wise=False,
        max_chunk_size=None,
        random_state=None,
        filter_kwargs=None,
        matlab_strict=False,
    ):
        """Initialize PREP class."""
        raw.load_data()
        self.raw_eeg = raw.copy()

        # split eeg and non eeg channels
        self.ch_names_all = raw.ch_names.copy()
        self.ch_types_all = raw.get_channel_types()
        self.ch_names_eeg = [
            self.ch_names_all[i]
            for i in range(len(self.ch_names_all))
            if self.ch_types_all[i] == "eeg"
        ]
        self.ch_names_non_eeg = list(set(self.ch_names_all) - set(self.ch_names_eeg))
        self.raw_eeg.pick_channels(self.ch_names_eeg)
        if self.ch_names_non_eeg == []:
            self.raw_non_eeg = None
        else:
            self.raw_non_eeg = raw.copy()
            self.raw_non_eeg.pick_channels(self.ch_names_non_eeg)

        self.raw_eeg.set_montage(montage,on_missing='ignore')
        # raw_non_eeg may not be compatible with the montage
        # so it is not set for that object

        self.EEG_raw = self.raw_eeg.get_data()
        self.prep_params = prep_params
        if self.prep_params["ref_chs"] == "eeg":
            self.prep_params["ref_chs"] = self.ch_names_eeg
        if self.prep_params["reref_chs"] == "eeg":
            self.prep_params["reref_chs"] = self.ch_names_eeg
        if "max_iterations" not in prep_params.keys():
            self.prep_params["max_iterations"] = 4
        self.sfreq = self.raw_eeg.info["sfreq"]
        self.ransac_settings = {
            "ransac": ransac,
            "channel_wise": channel_wise,
            "max_chunk_size": max_chunk_size,
        }
        self.random_state = check_random_state(random_state)
        self.filter_kwargs = filter_kwargs
        self.matlab_strict = matlab_strict

    @property
    def raw(self):
        """Return a version of self.raw_eeg that includes the non-eeg channels."""
        full_raw = self.raw_eeg.copy()
        if self.raw_non_eeg is None:
            return full_raw
        else:
            return full_raw.add_channels([self.raw_non_eeg], force_update_info=True)


    def fit(self):
        """Run the whole PREP pipeline."""
        noisy_detector = NoisyChannels(self.raw_eeg, random_state=self.random_state)
        noisy_detector.find_bad_by_nan_flat()
        # unusable_channels = _union(
        #     noisy_detector.bad_by_nan, noisy_detector.bad_by_flat
        # )
        # reference_channels = _set_diff(self.prep_params["ref_chs"], unusable_channels)
        # Step 1: 1Hz high pass filtering
        if len(self.prep_params["line_freqs"]) != 0:
            self.EEG_new = removeTrend(
                self.EEG_raw, self.sfreq, matlab_strict=self.matlab_strict
            )

            # Step 2: Removing line noise
            linenoise = self.prep_params["line_freqs"]
            if self.filter_kwargs is None:
                self.EEG_clean = mne.filter.notch_filter(
                    self.EEG_new,
                    Fs=self.sfreq,
                    freqs=linenoise,
                    method="spectrum_fit",
                    mt_bandwidth=2,
                    p_value=0.01,
                    filter_length="10s",
                )
            else:
                self.EEG_clean = mne.filter.notch_filter(
                    self.EEG_new,
                    Fs=self.sfreq,
                    freqs=linenoise,
                    **self.filter_kwargs,
                )

            # Add Trend back
            self.EEG = self.EEG_raw - self.EEG_new + self.EEG_clean
            self.raw_eeg._data = self.EEG

        # Step 3: Referencing
        reference = Reference(
            self.raw_eeg,
            self.prep_params,
            random_state=self.random_state,
            matlab_strict=self.matlab_strict,
            **self.ransac_settings,
        )
        reference.perform_reference(self.prep_params["max_iterations"])
        self.raw_eeg = reference.raw
        self.noisy_channels_original = reference.noisy_channels_original
        self.noisy_channels_before_interpolation = (
            reference.noisy_channels_before_interpolation
        )
        self.noisy_channels_after_interpolation = (
            reference.noisy_channels_after_interpolation
        )
        self.bad_before_interpolation = reference.bad_before_interpolation
        self.EEG_before_interpolation = reference.EEG_before_interpolation
        self.reference_before_interpolation = reference.reference_signal
        self.reference_after_interpolation = reference.reference_signal_new
        self.interpolated_channels = reference.interpolated_channels
        self.still_noisy_channels = reference.still_noisy_channels

        return self


In [19]:
import numpy as np
np.random.seed(7)

In [20]:
import threading
import dask.dataframe as dd
import shutil



def get_diractory_seperator_type():
	  # type pf seperator between each folder in the path
    if platform == "linux":
        seperator = "/"
    elif platform == 'win32':
        seperator = '\\'
    else:
        exit(0)
    return seperator

seperator = get_diractory_seperator_type()

# deprecated
lock = threading.Lock()
lock_conn = threading.Lock()

class EEG_file_processor():
    def __init__(self, config, disorder):
        if(os.path.isdir(config.datapath)):
            self.datapath = config.datapath
        else:
            raise Exception("File path is not vaild: "+config.datapath)

        if(os.path.isdir(config.resultpath)):
            self.resultpath = config.resultpath
        else:
            raise Exception("Result path is not vaild: "+config.resultpath)

        self.filename = config.filename
        self.disorder = disorder

        self.band_pass_filter = config.band_pass_filter
        self.csv_channel_names = config.csv_channel_names
        self.sfreq = config.sfreq
        self.resample_freq = config.resample_freq
        self.notch_frequncy = config.notch_frequncy
        self.psd_resolution = config.psd_resolution
        self.overlap_percentage = config.overlap_percentage
        self.montage = config.montage

        self.ECG_channel = config.ECG_channel
        self.EOG_channel = config.EOG_channel
        
        # object to hold data
        self.reader = None

        self.fill_readers()
        self.Build_dirs()
        gc.collect()

    def band_pass_filter(self,l_freq=None,h_freq=None,verbose=False):
        if h_freq == None or l_freq ==None:
            self.reader = self.reader.filter( l_freq=self.band_pass_filter[0], h_freq=self.band_pass_filter[1], picks=None,
                            filter_length='auto',
                                l_trans_bandwidth='auto', h_trans_bandwidth='auto', n_jobs=1,
                                method='fft', iir_params=None,  phase='zero',
                                fir_window='hamming', fir_design='firwin',
                                pad='reflect_limited', verbose=verbose)
            gc.collect()
            return None
        else:
            #retVal = mne.filter.filter_data(self.reader,sfreq=self.resample_freq, l_freq=l_freq, h_freq=h_freq)
            newreader = self.reader.copy()
            newreader.filter( l_freq=l_freq, h_freq=h_freq, picks=None,
                            filter_length='auto',
                                l_trans_bandwidth='auto', h_trans_bandwidth='auto', n_jobs=1,
                                method='fft', iir_params=None,  phase='zero',
                                fir_window='hamming', fir_design='firwin',
                                pad='reflect_limited', verbose=verbose)
            gc.collect()
            return newreader[:]


    def notch_filter(self,verbose=False):
        self.reader = self.reader.notch_filter(freqs=self.notch_frequncy, picks=None, filter_length='auto',
                         notch_widths=None, trans_bandwidth=1.0, n_jobs=1,
                         method='fir', iir_params=None, mt_bandwidth=None,
                         p_value=0.05, phase='zero', fir_window='hamming',
                         fir_design='firwin', pad='reflect_limited', verbose=verbose)
        gc.collect()


    # mathod can be infomax or fastica
    def ICA(self,method='fastica',remove_movement= True):
        self.remove_nan()
        self.remove_muscle_movment()
        ica = ICA(n_components=len(self.reader.ch_names)-len(self.reader.info['bads'])-1, method=method, random_state=0)
        ica.fit(self.reader)
        for channel_name in self.ECG_channel:
            if channel_name in self.reader.ch_names:
                ecg_annot,_ = ica.find_bads_ecg(self.reader, ch_name=channel_name, threshold='auto', start=None,
                              stop=None, l_freq=self.band_pass_filter[0], h_freq=self.band_pass_filter[1], method='correlation',
                              reject_by_annotation=True, measure='zscore',
                              verbose=None)
        for channel_name in self.EOG_channel:
            if channel_name in self.reader.ch_names:
                eog_annot,_ = ica.find_bads_eog(self.reader, ch_name=channel_name, threshold=3.0, start=None,
                              stop=None, l_freq=self.band_pass_filter[0], h_freq=self.band_pass_filter[1],
                              reject_by_annotation=True, measure='zscore',
                              verbose=None)

        self.reader = ica.apply(self.reader, n_pca_components=len(self.reader.ch_names)-len(self.reader.info['bads'])-1)
        gc.collect()


    def Interpolate(self):
        self.reader = self.reader.copy().interpolate_bads(reset_bads=False)

    def remove_muscle_movment(self):
        annot, _ = mne.preprocessing.artifact_detection.annotate_muscle_zscore(self.reader, threshold=5, ch_type="eeg", min_length_good=0.1,
                           filter_freq=(110, 140]), n_jobs=1, verbose=None)
        self.reader.set_annotations(annot)
    def remove_nan(self):
        annot = mne.preprocessing.annotate_nan(self.reader)
        self.reader.set_annotations(annot)



    def fill_readers(self, verbose=True):
        temp = dd.read_table(self.datapath + seperator + self.filename,sample=10000000, engine='c')
        arr2 = temp.to_dask_array()
        arr2 = arr2.persist()
        arr2 = arr2.compute_chunk_sizes()
        df = np.array([np.float32((temp2[0].compute()).split(',')) for temp2 in arr2])
        print("df.shape", df.shape)
        #df = pd.read_csv(self.datapath + seperator + self.filename)
        montage = mne.channels.make_standard_montage(self.montage)
        tmp_ch_pos = montage.get_positions()

        info = mne.create_info(ch_names=list(tmp_ch_pos["ch_pos"].keys()),ch_types="eeg",sfreq=self.sfreq,verbose=verbose)

        self.reader = mne.io.BaseRaw(info, preload=np.array(df,dtype=np.float64), verbose=verbose)
        self.reader.set_montage(montage)
        data, _ = self.reader[:]

        if 'Cz' in self.reader.ch_names:
            self.reader = self.reader.drop_channels(['Cz'])
        print(self.datapath + seperator +self.filename)
        data, _ = self.reader[:]
        for index, channel in enumerate(self.reader.ch_names):
            if all(data[index]) == 0:
                self.reader = self.reader.drop_channels(channel)
        for index, channel in enumerate(self.reader.ch_names):
            if channel not in self.csv_channel_names:
                self.reader = self.reader.drop_channels([channel])


        data, _ = self.reader[:]
        print(len(self.reader.ch_names), len(data))


        self.prep_pipeline()
        self.reader.resample(sfreq=self.resample_freq)

        filename = self.filename.split(".")[0] + ".h5"
        if(not os.path.isdir(self.resultpath+seperator+self.disorder)):
            os.mkdir(self.resultpath+seperator+self.disorder)
        gc.collect()



    def rereference(self, verbose = False):
        mne.io.set_eeg_reference(self.reader, ref_channels='average', copy=False, \
                            projection=False, ch_type='auto', forward=None,\
                          verbose=verbose)

    def savestep1(self):
        data, time = self.reader[:]
        result = [list(self.reader.ch_names)]
        annotations = self.reader.annotations
        for sample in range(len(data[0])):
            tmp = []
            for channel in range(len(data)):
                tmp.append(data[channel, sample])

            for channel in range(len(data)):
                if len(result) < channel + 1:
                    result.append([])
                result[channel].append(tmp[channel])
            tmp[:] = []

        with open(self.resultpath+seperator+self.disorder+seperator+self.filename, 'w',newline='') as csvfile:
            csvwriter = csv.writer(csvfile)
            for row in result:
                csvwriter.writerow(row)

    def Build_dirs(self):
        if(not os.path.isdir(self.resultpath+seperator+self.disorder)):
            os.mkdir(self.resultpath+seperator+self.disorder)
        if not os.path.isdir(self.resultpath+seperator+self.disorder+seperator+'CNN'):
            os.mkdir(self.resultpath+seperator+self.disorder+seperator+'CNN')

        if not os.path.isdir(self.resultpath+seperator+self.disorder+seperator+'LSTM'):
            os.mkdir(self.resultpath+seperator+self.disorder+seperator+'LSTM')

    def prep_pipeline(self):
        prep_params = {
            "ref_chs": "eeg",
            "reref_chs": "eeg",
            "line_freqs": np.array([self.notch_frequncy]),
            "max_iterations": 3
        }
        #"line_freqs": np.array([self.notch_frequncy]),
        failed = False
        try:
            prep = PrepPipeline(self.reader.copy(),
                                prep_params,
                                self.montage,
                                ransac=True,
                                channel_wise=False,
                                max_chunk_size=self.sfreq*2,
                                random_state=0,
                                filter_kwargs=None,
                                matlab_strict=True)
            prep.fit()
        except Exception as e:
            print("Failed in prep")
            failed = True
        if failed:
            prep = PrepPipeline(self.reader.copy(),
                                    prep_params,
                                    self.montage,
                                    ransac=False,
                                    channel_wise=False,
                                    max_chunk_size=self.sfreq*2,
                                    random_state=0,
                                    filter_kwargs=None,
                                    matlab_strict=True)
            prep.fit()

        self.reader.info['bads'].extend(prep.noisy_channels_original["bad_all"])
        self.reader.info['bads'].extend(prep.still_noisy_channels)

In [None]:

def process_files(Config,disorder):
    try:
        processor = EEG_file_processor(Config,disorder)
        processor.band_pass_filter()
        processor.notch_filter()
        processor.Interpolate()
        processor.rereference()
        processor.ICA()
        print("########################################################################################################################")
        print("saving")
        print("########################################################################################################################")
        processor.savestep1()

        print("########################################################################################################################")
        print("done")
        print("########################################################################################################################")
    except Exception as e:
        import traceback
        traceback.print_tb(e.__traceback__)
        print(Config.filename + " # " + disorder + " , Error: " + str(e) + "\n")
        return Config.filename + " # " + disorder + " , Error: " + str(e) + "\n"
    return ""



def main():
    Config = config()

    # work with the file as it is now locked
    if os.path.isfile("/content/drive/MyDrive/MDD/data/pre_processed/processed_files.txt"):
        file = open("/content/drive/MyDrive/MDD/data/pre_processed/processed_files.txt", "r")
        lines = file.readlines().copy()
        file.close()
    else:
        with open("/content/drive/MyDrive/MDD/data/pre_processed/processed_files.txt", 'w') as f:
            print('Create a new text file!')
        lines = []
    retVal = []
    limit_counter= 0
    counter = 0
    all_counter = 0
    inputs = []
    for disorder in disorders:
        print(disorder)
        files = os.listdir(Config.foldername+disorder)
        for filesname in files:
            all_counter += 1
            file_config = config()
            file_config.filename = filesname
            file_config.datapath = Config.foldername+disorder
            found = False
            try:
                os.mkdir(Config.resultpath+disorder)
            except Exception as e:
                lines = []

            #process_files(file_config,disorder)
            #break
            inputs.append((file_config,disorder))
            counter += 1
            print(counter)


    import multiprocessing as mp
    pool = mp.Pool()
    retVal.extend(list(pool.starmap(process_files,inputs)))
    pool.close()
    pool.terminate()
    pool.join()



    print("Failed files:")
    for ret in retVal:
      print(ret)



if __name__=="__main__":
    print("Starting program")
    main()
    print('Done excuting')


Starting program
HBN
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
df.shape (128, 305856)
/content/drive/MyDrive/MDD/data/pre_processed/original_data/HBN/NDARGD507TDZ_RestingState_data.csv
107 107
NOTE: pick_channels() is a legacy function. New code should use inst.pick(...).
NOTE: pick_types() is a legacy function. New code should use inst.pick(...).
Setting up high-pass filter at 1 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phas

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done 107 out of 107 | elapsed:    1.4s finished


Removed notch frequencies (Hz):
     60.00 : 12947 windows
NOTE: pick_types() is a legacy function. New code should use inst.pick(...).
