# Correct the triggers

This step corrects the parasite/artifact triggers and the potential delays between the trigger and the actual audio stimulus onset.

## Initialisation

In [None]:
# Navigate in the OS and call folders
import os
import os.path as op

# Get the username automatically
import getpass

# Perform plots
import matplotlib.pyplot as plt
import matplotlib.ticker as mtick
import numpy as np

# Date and time
from datetime import datetime as dt
from datetime import timedelta as td

# Find delay
from find_delay import find_delay

# Load Matlab file
from scipy.io import loadmat

# Moviepy
from moviepy.editor import VideoFileClip

# MEG processing
import mne
from mne_bids import (
    write_raw_bids,
    read_raw_bids,
    BIDSPath,
    write_meg_calibration,
    write_meg_crosstalk,
    mark_channels,
)

# Configuration
user = getpass.getuser()
path_data = op.join('/export/home', user, 'lab/MEG_EXPERIMENTS/BLCOMP/DATA/FIF/')

# Subjects (all of the subjects from the experiment)
subjects_paths = {"subject_01a": 'path_to_subject_01/session_01/block_01',
                  "subject_01b": 'path_to_subject_01/session_02/block_01',
                  "subject_02a": 'path_to_subject_02/session_01/block_01',
                  "subject_02b": 'path_to_subject_02/session_02/block_01'}

subject_mat = {"subject_01a": "0001.mat",
               "subject_01b": "0002.mat",
               "subject_02a": "0003.mat",
               "subject_02b": "0004.mat"}

# Define the raw BIDS output path
path_output = op.join('/export', 'home', user, 'public', 'MEGtrain')
path_bids = op.join(path_output, 'bids')

# Calibration and crosstalk files 
path_maxfilter_parameters = op.join(path_output, "scripts", "maxfilter_parameters")
fine_cal_file = op.join(path_maxfilter_parameters, "sss_cal_3049.dat")
crosstalk_file = op.join(path_maxfilter_parameters, "ct_sparse.fif")

# Define the preprocessing output path
path_preprocessing = op.join(path_output, "derivatives", "Preprocessing")

# Define the eelbrain path
path_eelbrain_meg = op.join(path_output, 'eelbrain', 'meg')
os.makedirs(path_eelbrain_meg, exist_ok=True)

# Define the MAT paths
path_mat_files = op.join('/export', 'home', user, 'public', 'MEGtrain', 'Scripts', 'MEG Analysis', 'MAT')
path_figures = op.join('/export', 'home', user, 'public', 'MEGtrain', 'Scripts', 'MEG Analysis', 'Figures')
path_videos = op.join('/export', 'home', user, 'lab', 'path_to_original_audios_or_videos')

# Empty room
path_empty_room = op.join('/export/home', user, 'lab/MEG_EXPERIMENTS/EMPTYROOM/DATA/')

class Subject(object):

    def __init__(self, subject):
        self.name = subject

        if subject not in subjects_paths.keys():
            raise Exception("Subject name not found.")
        self.fif = subjects_paths[subject] + ".fif"
        self.number = subject[8:10]
        self.session = "01"
        self.visit = ""
        self.raw_fif_name = op.join(path_data, self.fif)
        self.sub = "sub-" + self.number

        self.path_derivatives = BIDSPath(subject = self.number, session = self.session, task = 'track', run = '01', suffix = "meg", root = path_preprocessing)
        self.path_bids_raw = BIDSPath(subject = self.number, session = self.session, task = 'track', run = '01', suffix = "meg", root = path_bids)
        self.path_bids_empty_room = BIDSPath(subject = self.number, session = self.session, task = 'emptyroom', run = '01', suffix = "meg", root = path_bids)
        self.path_derivatives_er = BIDSPath(subject = self.number, session = self.session, task = 'emptyroom', run = '01', suffix = "meg", root = path_preprocessing)

        self.path_downsampled_sss = op.join(self.path_derivatives.directory, self.path_derivatives.basename.replace("meg", "sss") + ".fif")
        self.path_ica_solution = op.join(self.path_derivatives.directory, self.path_derivatives.basename.replace("meg", "ica_solution") + ".fif")
        self.path_ica = op.join(self.path_derivatives.directory, self.path_derivatives.basename.replace("meg", "ica") + ".fif")
        self.path_eelbrain = op.join(path_eelbrain_meg, self.sub, self.sub + "_track" + self.visit + "-raw.fif")
        self.path_downsampled_sss_er = op.join(self.path_derivatives_er.directory, self.path_derivatives_er.basename.replace("meg", "sss") + ".fif")
        self.path_eelbrain_er = op.join(path_eelbrain_meg, self.sub, self.sub + "_emptyroom" + self.visit + "-raw.fif")

        self.path_mat = op.join(path_mat_files, subject_mat[self.name])

        os.makedirs(op.join(path_eelbrain_meg, self.sub), exist_ok=True)

    def print_name(self):
        s = "#"
        print("")
        print(s * (len(self.name) + 10))
        print(s + s + "   " + self.name.upper() + "   " + s + s)
        print(s * (len(self.name) + 10))
        print("")

all_subjects = {}
for subject_name in subjects_paths.keys():
    all_subjects[subject_name] = Subject(subject_name)

subjects = []
for subject_name in all_subjects.keys():
    subjects.append(all_subjects[subject_name])

## Correct the events

### Observe the events
Determine which triggers are to remove/correct.

In [None]:
sub = 27
subject = subjects[sub-1]
print(subject.name)

path_data = op.join(subject.path_derivatives.directory, subject.path_derivatives.basename.replace("meg", "ica") + ".fif")
data = mne.io.read_raw_fif(path_data, preload = False)

events = mne.find_events(data, stim_channel = "STI101", shortest_event = 1)

plot = mne.viz.plot_events(events, data.info["sfreq"])

### Print the events

In [None]:
for event in events:
    print(event)

## Remove specific triggers

In [None]:
# The IDs of the subjects you want to preprocess
selected_subjects = ["subject_01a", "subject_01b", "subject_02a", "subject_02b"]

# The triggers ID you want removed (e.g [4, 20]). All triggers with these values will be removed.
events_to_remove_all = {"subject_01a": [],
                        "subject_01b": [],
                        "subject_02a": [],
                        "subject_02b": []}

# Some trigger occurrences you want removed (e.g., the 8th and 11th occurrence of the trigger 32 will be {32: [8, 11]}).
# Beware, indices start at 1! Meaning, the first occurrence of a trigger will be 1.
unwanted_occurrences_all = {"subject_01a": {},    
                            "subject_01b": {},
                            "subject_02a": {},
                            "subject_02b": {}}

# Set the max trigger value (all triggers over this value will be removed).
max_trigger_value = 105

# Conditions to triggers
conditions_to_triggers = {"AO_U_XX": 10, "AO_M_XX": 11,
                          "VO_U_BD": 20, "VO_M_BD": 21, "VO_X_SK": 22, "VO_X_SC": 23,
                          "AV_U_BD": 30, "AV_M_BD": 31, "AV_X_SK": 32, "AV_X_SC": 33}

# Set to True if you want to set the middle value for each of the events to zero
# (each event is saved as an array of three values: the timestamp, the channel value before the trigger, and the trigger value).
# (to have a cleaner list, set the second value to 0).
set_middle_value_at_0 = True

# Save the outlier delays to show them at the end
outlier_threshold = (200, 230)  # We set our outlier delay below 200 and over 230
outliers = {}

for selected_subject in selected_subjects:

    # Load the necessary data
    subject = all_subjects[selected_subject]
    print(subject.name)

    path_data_resampled = op.join(subject.path_derivatives.directory, subject.path_derivatives.basename.replace("meg", "ica") + ".fif")
    data_resampled = mne.io.read_raw_fif(path_data_resampled, preload = True, verbose = False)
    data_original = mne.io.read_raw_fif(subject.path_bids_raw, preload = True, verbose = False)

    # Load the events
    events = mne.find_events(data_resampled, stim_channel = "STI101", shortest_event = 1, verbose = False)
    
    # PART ONE: we correct the trigger values

    # 1A. We delete the unwanted trigger values
    events_to_remove = events_to_remove_all[subject.name]
    events_filtered_1 = []

    for event in events:
        if event[2] not in events_to_remove:
            #event[1] = 0
            events_filtered_1.append(event)

    # 1B. We delete the unwanted occurrences of some events
    unwanted_occurrences = unwanted_occurrences_all[subject.name]
    events_count = {}
    events_filtered_2 = []

    for event in events_filtered_1:
        events_count[event[2]] = events_count.get(event[2], 0) + 1

        if not(event[2] in unwanted_occurrences.keys() and events_count[event[2]] in unwanted_occurrences[event[2]]):
            events_filtered_2.append(event)

    # 1C. We set the proper 99 events (in the case where they are noisy)
    # (in this experiment, 99 meant the end of a video, and would always be succeeded by a button press,
    # which was a number between 101 and 104. Comment these lines if not applicable for your experiment).
    for i in range(len(events_filtered_2)):
        if i != 0 and events_filtered_2[i][2] in [101, 102, 103, 104] and events_filtered_2[i-1][2] != 99:
            events_filtered_2[i-1][2] = 99    
    
    # 1D. We remove any event value over max_trigger_value
    events_filtered_3 = []
    for event in events_filtered_2:
        if event[2] < max_trigger_value:
            events_filtered_3.append(event)

    if set_middle_value_at_0:
        for event in events_filtered_3:
            event[1] = 0

    print(f"Number of events before: {len(events)}, Number of events after: {len(events_filtered_3)}")
    sorted_events = list(set([event[2] for event in events_filtered_3]))
    sorted_events.sort()
    print(f"Unique events: {sorted_events}")
    print(f"Number of events with a value at index 1 different from zero: {len(events_filtered_3)-[event[1] for event in events_filtered_3].count(0)}\n")

    for event in events_filtered_3:
        print(event)

    # PART TWO: We correct the trigger delays

    # 2A. We get the sampling of the original and resampled data
    meg_freq_original = data_original.info["sfreq"]
    meg_freq_resampled = data_resampled.info["sfreq"]
    ratio_freq = meg_freq_original / meg_freq_resampled

    # 2B. Get the timestamps of the triggers of interest
    events = events_filtered_3
    onsets = np.array([event for event in events if event[2] in [10, 11, 20, 21, 22, 23, 30, 31, 32, 33]])  # Beginnings of the videos
    offsets = np.array([event for event in events if event[2] == 99])  # Ends of the videos
    max_duration = np.max(offsets[:, 0] - onsets[:, 0])  # Max duration of a video

    # 2C. We multiply the onsets and offsets by the ratio freq
    for i in range(len(onsets)):
        onsets[i][0] *= ratio_freq
        offsets[i][0] *= ratio_freq
    max_duration *= ratio_freq

    # 2D. We create the epochs
    audio_epochs = []
    for onset, offset in zip(onsets, offsets):
        abs_onset = onset[0] - data_original.first_samp
        abs_offset = offset[0] - data_original.first_samp
        epoch = data_original.pick(["MISC001", "STI101"])[:, abs_onset:abs_offset][0]
        audio_epochs.append(epoch[0])

    
    print(f"Number of epochs created: {len(audio_epochs)}.")
    print(f"Max event duration: {max_duration} ms.")
    
    # 2E. Load the data from the MAT files and get the video file name for each trial
    mat_contents = loadmat(subject.path_mat)
    mat_videos = mat_contents["data"][0][0][3][0]

    # 2F. We turn this into a list we can go through
    videos = []
    for i in range(len(mat_videos)):
        videos.append(mat_videos[i][0][0][0])

    # The two first videos of subject 10 are missing
    if subject.name == "subject_10":
        videos = videos[2:]

    print(f"Number of events recorded in the MAT file: {len(videos)}.")

    # 2G. Looping through all the events
    trial = 0
    delays = {}
    corrs = {}

    os.makedirs(op.join(path_figures, subject.name))

    for epoch in audio_epochs:
        print(f"Trial {trial + 1} of {len(audio_epochs)}")
        
        trigger = onsets[trial][2]
        onset = onsets[trial][0]/meg_freq_original
        offset = offsets[trial][0]/meg_freq_original
        print(f"\tTrigger {trigger}: onset {onset}, offset {offset}, duration {offset-onset}")

        if trigger in [20, 21, 22, 23]:
            delays[trial] = np.nan
            print(f"\tSilent trial: no delay calculation possible. Moving to the next trial...")

        else:
            # We load the video, and extract the audio
            video = videos[trial]
            path_video = op.join(path_videos, video[:7], video)
            mp4_video = VideoFileClip(path_video)
            mp4_audio = mp4_video.audio
            audio_sound_array = np.array(list(mp4_audio.iter_frames()))[:, 0]
            print(f"\tVideo {video}, duration {mp4_video.duration}")
            print(f"\tExpected trigger: {conditions_to_triggers[video[:7]]}")
            print(f"\tDuration difference: {np.round(np.abs(mp4_video.duration - (offset - onset)), 3)} ms")

            if conditions_to_triggers[video[:7]] != trigger:
                raise Exception(f"The trigger value ({trigger}) does not match what the expected trigger should be ({conditions_to_triggers[video[:7]]}).")

            delay, corr = find_delay(array_1 = epoch, 
                                     array_2 = audio_sound_array, 
                                     freq_array_1 = meg_freq_original, 
                                     freq_array_2 = mp4_audio.fps,
                                     resampling_rate = 1000,
                                     return_delay_format = "ms", 
                                     threshold = 0.1, 
                                     plot_figure = False,
                                     plot_intermediate_steps = False,
                                     return_correlation_value = True,
                                     path_figure = op.join(path_figures, subject.name, str(trial+1) + ".png"),
                                     verbosity = 0)
            print(f"\tDelay: {delay} ms; correlation: {np.round(corr*100, 2)}%.")
            delays[trial] = delay
            corrs[trial] = corr

        trial += 1

    # 2H. Calculating the average delay and attributing it to the silent trials
    average_delay = np.round(np.nanmean(list(delays.values())))
    for trial in delays.keys():
        if np.isnan(delays[trial]):
            delays[trial] = np.round(average_delay)
            corrs[trial] = "N/A"

        if not outlier_threshold[0] < delays[trial] < outlier_threshold[1]:
            if subject.name in outliers.keys():
                outliers[subject.name].append({"trial": trial, "video": videos[trial], "delay": delays[trial], "corr": corrs[trial]})
            else:
                outliers[subject.name] = [{"trial": trial, "video": videos[trial], "delay": delays[trial], "corr": corrs[trial]}]

    print(f"Average delay: {average_delay}.")

    # 2I. Correcting the events
    trial = 0
    for video in videos:

        trigger = onsets[trial][2]
        if conditions_to_triggers[video[:7]] != trigger:
            raise Exception(f"The trigger value ({trigger}) does not match what the expected trigger should be ({conditions_to_triggers[video[:7]]}).")
        
        event_ms = np.round(events[trial][0] / meg_freq_resampled * 1000)
        print(f"Trial {trial+1} ({video}, {trigger}): trigger moved from {event_ms} to {event_ms + delays[trial]} (+{delays[trial]})")

        delay_freq_resampled = delays[trial] / 1000 * meg_freq_resampled

        events[i][0] += np.round(delay_freq_resampled)

        trial += 1

    # 2J. Save the data
    # path_data_corrected = op.join(subject.path_derivatives.directory, subject.path_derivatives.basename.replace("meg", "ec") + ".fif")
    path_data_corrected = subject.path_eelbrain
    data_resampled.add_events(np.array(events), stim_channel="STI101", replace=True)
    data_resampled.save(path_data_corrected, overwrite=True, verbose=False)

    # PART THREE: We create the variable-duration events
    # onsets = np.array([event for event in events if event[2] in [10, 11, 20, 21, 22, 23, 30, 31, 32, 33]])  # Beginnings of the videos
    # offsets = np.array([event for event in events if event[2] == 99])  # Ends of the videos
    # epochs = []

    # if len(onsets) != len(offsets):
    #     raise Exception(f"There are not as many onsets {len(onsets)} as there are offsets {len(offsets)}. Impossible to create the vairable duration events.")

    # pre_stim = 200
    # post_stim = 200

    # for onset, offset in zip(onsets, offsets):
    #     index = data.time_as_index([onset[0]/meg_freq_resampled - pre_stim, offset[0]/meg_freq_resampled + post_stim])
    #     epoch, times = data_resampled[:, index[0]:index[1]]
    #     epochs.append(mne.EpochsArray(np.expand_dims(epoch, 0), data_resampled.info, tmin=-pre_stim, verbose=False))

    # print(f"Epoching done. {len(epochs)} epochs found.")

    del data_original
    del data_resampled

for subject_name in outliers:
    print(f"Subject: {subject_name}")
    for trial in outliers[subject_name]:
        print(f"\t{trial}")

## Check the correction

### Observe the events

In [None]:
sub = 24
subject = subjects[sub-1]
print(subject.name)

path_data = op.join(subject.path_derivatives.directory, subject.path_derivatives.basename.replace("meg", "ec") + ".fif")
data = mne.io.read_raw_fif(path_data, preload = False)

events = mne.find_events(data, stim_channel = "STI101", shortest_event = 1)

plot = mne.viz.plot_events(events, data.info["sfreq"])

### Print the events

In [None]:
for event in events:
    print(event)