In [None]:
# Import necessary libraries
import os, mne, re  
from mne.channels import make_standard_montage  
from glob import glob  
from mne_bids import BIDSPath, write_raw_bids, read_raw_bids  
from pyprep.find_noisy_channels import NoisyChannels  # Library to detect noisy EEG channels

# Set the folder for the data in BIDS format (downsampled)
root_bids = 'C:/Users/mfbpe/Desktop/DATA/2023_Eva_Freedom/bids/'

# Set the derivatives directory path where processed data will be stored
root_derivatives = root_bids.split('/bids')[0] + '/derivatives'

# Set the directory path where the ICA component images will be saved
root_image = 'C:/Users/mfbpe/Desktop/DATA/2023_Eva_Freedom/images_prep_eeg/'

# Change the current working directory to the derivatives directory
os.chdir(root_derivatives)

# Get a list of all file paths with .vhdr extension (EEG header files) in the BIDS directory
all_files_path = sorted(glob(f'{root_bids}/*/*/*_eeg.vhdr*'), key=len)

# Define dictionaries to store information about participants' bad channels and bad ICA components
prtp_chs_out = {}  # This will hold bad channels for each participant
prtp_ica_out = {}  # This will hold bad ICA components for each participant

# Loop over each EEG file path in the list of all_files_path
for part in all_files_path:
    # Extract the participant number from the file path
    n_part = part.split("\\")[1].split("-")[1]

    # Define the BIDS path for the EEG data of the current participant
    bidspath = BIDSPath(subject=n_part, task='constraint', datatype='eeg', root=root_bids)
    
    # Load the EEG data in BIDS format
    raw = read_raw_bids(bids_path=bidspath).load_data()

    # Apply a bandpass filter to the raw data (0.01 Hz to 40 Hz) to remove slow drifts and high-frequency noise
    raw.filter(l_freq=.01, h_freq=40, n_jobs=-1)

    # Create a copy of the raw data and apply a high-pass filter for improved ICA and bad channel detection
    raw_copy = raw.copy().filter(1, None, n_jobs=-1)
    
    # If the participant has known bad channels, interpolate them; otherwise, find bad channels automatically
    if n_part in list(prtp_chs_out):
        raw.info['bads'] = prtp_chs_out[n_part]
        raw = raw.interpolate_bads()  # Interpolate the bad channels in the raw data
        raw_copy = raw_copy.interpolate_bads()  # Also interpolate in the copy for ICA
    else:
        # Use the NoisyChannels class to find noisy channels in the data
        noisy = NoisyChannels(raw_copy)
        noisy.find_all_bads()
        bad_chs = noisy.get_bads()  # Get the bad channels detected automatically
        raw.info['bads'] = bad_chs  # Mark bad channels
        raw = raw.interpolate_bads()  # Interpolate bad channels in the raw data
        raw_copy = raw_copy.interpolate_bads()  # Interpolate bad channels in the copy
        prtp_chs_out[n_part] = bad_chs  # Save the bad channels for this participant

    # Perform Independent Component Analysis (ICA) on the filtered copy of the data to remove artifacts
    ica = mne.preprocessing.ICA(random_state=21, max_iter='auto')
    ica.fit(raw_copy)  # Fit the ICA model to the data

    # If there are known bad ICA components for this participant, exclude them
    if n_part in list(prtp_ica_out):
        ica.exclude = prtp_ica_out[n_part]
        ica.apply(raw)  # Apply ICA and remove bad components
    # Plot the ICA components and save the figure to the specified image directory
    fig = ica.plot_components(picks=np.arange(ica.n_components_), show=False)
    fig.savefig(f"{root_image}sub-{n_part}_ICA.png")  # Save the plot of ICA components

    del ica  # Delete the ICA object to free memory

    # Save the preprocessed raw data in a .fif format
    raw.save(f"sub-{n_part}_constraint_raw.fif", overwrite=True)

    # Delete the raw and raw_copy data objects to free memory
    del raw, raw_copy

# Create a text file to store the list of bad channels for each participant
with open('list_bad_channels.txt', 'w') as f:
    f.write('dict = ' + repr(prtp_chs_out) + '\n')

# Create a text file to store the list of bad ICA components for each participant
with open('list_bad_ica.txt', 'w') as f:
    f.write('dict = ' +
