In order to run this notebook you need the `mne_bids` package that you can install through this command:
```bash
pip install mne-bids
```

In [None]:
import mne_bids
from mne_bids import BIDSPath, read_raw_bids
import shutil
import platform

# ERP CORE BIDS dataset
# We'll use mne_bids to download and organize the first 5 participants' data

import os
import requests
from zipfile import ZipFile

# Download the ERP CORE P3 dataset (if not already present)

erp_core_url = "https://osf.io/download/3zk6n/"
zip_path = "sample_data/ERP_CORE_P3.zip"
extract_dir = "sample_data/"

if not os.path.exists(zip_path):
    print("Downloading ERP CORE dataset...")
    r = requests.get(erp_core_url, stream=True)
    with open(zip_path, "wb") as f:
        for chunk in r.iter_content(chunk_size=8192):
            f.write(chunk)
print("Extracting ERP CORE dataset...")
with ZipFile(zip_path, "r") as zip_ref:
    zip_ref.extractall(extract_dir)


In [None]:
import hmp

sfreq = 250 #at what sampling rate we want the data, downsampling to 250Hz just to show that we can use any SF
tmin, tmax = -.2, 2 #window size for the epochs, from 250ms before the stimulus up to 2 seconds after, data will be baseline corrected from tmin to 0

epoch_data = hmp.io.read_mne_data([],
                                  data_format='bids',
                                  tmin=tmin, tmax=tmax,
                                  sfreq=sfreq,
                                  bids_parameters={
                                      'bids_root': 'sample_data/ERP_CORE',
                                      'task': 'P3',
                                      'datatype': 'eeg',
                                      'session': 'P3'
                                  },
                                  high_pass = 0.1,
                                  low_pass = 40,
                                  reject_threshold=1e-4,#Reject if more than 100microV between stimulus and response
                                  reference='average',# VERY IMPORTANT, use the average as reference, other type of reference (except REST) can give weird topographies
                                  verbose=False
                                  )

In [None]:
epoch_data

## Adding the frequent rare condition

In [None]:
# Categorizing stimulus as rare or frequent based on the trigger description in the BIDS file
def classify(stim):
    try:
        target = stim.split("target ")[1].split(",")[0].strip()
        stimulus = stim.split("stimulus ")[1].strip()
        return "rare" if target == stimulus else "frequent"
    except:
        return np.nan

stimulus_type = xr.apply_ufunc(
    classify,
    epoch_data.coords["event_name"],
    vectorize=True,
    output_dtypes=[str]
)

epoch_data = epoch_data.assign_coords(stimulus_type=(("participant", "epoch"), stimulus_type.data))

## Correcting the montage

In [None]:
import mne
# Rename FP1 and FP2 channels to Fp1 and Fp2
epoch_data = epoch_data.assign_coords(
    channel=[
        ch.replace("FP1", "Fp1").replace("FP2", "Fp2") if ch in ["FP1", "FP2"] else ch
        for ch in epoch_data.channel.values
    ]
)

montage = mne.channels.make_standard_montage('standard_1020')
# Keep only channels in the montage that are in epoch_data.channel
keep_chs = [ch for ch in montage.ch_names if ch in list(epoch_data.channel.values)]
# Create a new montage with only the desired channels
montage = mne.channels.make_dig_montage(
    {ch: montage.get_positions()['ch_pos'][ch] for ch in keep_chs},
    coord_frame=montage.get_positions()['coord_frame']
)

info = mne.create_info(list(epoch_data.channel.values), sfreq, ch_types='eeg')
info = info.set_montage(montage, verbose=False)

# Typical workflow

In [None]:
# See the two next tutorials for more details on how to fit an HMP model and how to visualize the results.
preprocessed = hmp.preprocessing.Standard(epoch_data, n_comp=10)
event_properties = hmp.patterns.HalfSine.create_expected(sfreq=epoch_data.sfreq)

# Just plotting expected pattern, FYI
plt.plot(event_properties.template, 'x')
plt.show()
trial_data = hmp.trialdata.TrialData.from_preprocessed(preprocessed=preprocessed, pattern=event_properties.template)
model = hmp.models.CumulativeMethod(event_properties)
_, estimates = model.fit_transform(trial_data)

In [None]:
hmp.visu.plot_topo_timecourse(epoch_data, estimates,info, as_time=True)

# Fitting per condition


In [None]:
frequent_preprocessed_data = hmp.utils.condition_selection(preprocessed.data, 'frequent', variable='stimulus_type')
trial_data_frequent = hmp.trialdata.TrialData.from_preprocessed(frequent_preprocessed_data, pattern=event_properties.template)

rare_preprocesssed_data = hmp.utils.condition_selection(preprocessed.data, 'rare', variable='stimulus_type')
trial_data_rare = hmp.trialdata.TrialData.from_preprocessed(rare_preprocesssed_data, pattern=event_properties.template)

In [None]:
ll_cumulative, estimates = model.fit_transform(trial_data_frequent)
hmp.visu.plot_topo_timecourse(epoch_data, estimates, info, as_time=True)

In [None]:
ll_cumulative, estimates = model.fit_transform(trial_data_rare)
hmp.visu.plot_topo_timecourse(epoch_data, estimates, info, as_time=True)