# Carregamento do dataset

### Informações gravadas em: https://github.com/bneurd/opt020

In [2]:
from scipy.io import loadmat
import numpy as np

subjects = list()

for i in range(0, 5):
    subjects.append(list())
    subjects[i].append(loadmat(f"../multi/Sub{i+1}_1_multitarget.mat"))
    subjects[i].append(loadmat(f"../multi/Sub{i+1}_2_multitarget.mat"))

subjects[0][0]["Data"][0][0][7].shape

(8192, 10)

In [3]:
data7, target_frequencies = list(), list()

for subject in subjects:
    data7.append(list())
    target_frequencies.append(list())

    for trial in subject:
        data7[-1].append(trial["Data"][0][0][7].T)
        target_frequencies[-1].append(trial["Data"][0][0]["TargetFrequency"][0].T.flatten())

data7 = np.array(data7)
print(data7.shape)

data7 = data7.reshape(5, 2 * 10, 8192)
print(data7.shape)

(5, 2, 10, 8192)
(5, 20, 8192)


In [4]:
frequencies = np.array(target_frequencies)
print(frequencies.shape)

frequencies = frequencies.reshape(5, 20)
print(frequencies.shape)

(5, 2, 10)
(5, 20)


In [5]:
for i, subject in enumerate(data7):
    np.save(f"./results/data_{i}.npy", subjects)
    np.save(f"./results/label_{i}.npy", frequencies[i])

data7[0].shape

(20, 8192)

Dado o exemplo acima, faça o carregamento dos cinco participantes, contatenando suas seções ou *trials*. Basta em um `for` externo aumentar a dimensionalidade para a quantidade de participantes e em um `for` interno, concatenar as seções com dez (10) *targets* cada uma.

Ao final, teremos um *shape* igual a `(5, 8192, 20)`.

### Utilizando a biblioteca MNE

Nesta etapa, vamos criar um objeto MNE para armazenar os dados juntamente com os labels.

Para este tipo de tarefa, é recomendo o uso do objeto Epochs, pois armazena tanto os dados quanto os metadados.

In [8]:
# inicialmente criamos o objeto info
import mne

# criação de um objeto "info"
n_channels = 1
sfreq = 512
ch_names = ["Oz"]
ch_types = ["eeg"]
info = mne.create_info(ch_names, sfreq=sfreq, ch_types=ch_types)
%store n_channels
%store sfreq
%store ch_names
%store ch_types
%store info
info

Stored 'n_channels' (int)
Stored 'sfreq' (int)
Stored 'ch_names' (list)
Stored 'ch_types' (list)
Stored 'info' (Info)


0,1
Measurement date,Unknown
Experimenter,Unknown
Participant,Unknown

0,1
Digitized points,Not available
Good channels,1 EEG
Bad channels,
EOG channels,Not available
ECG channels,Not available

0,1
Sampling frequency,512.00 Hz
Highpass,0.00 Hz
Lowpass,256.00 Hz


In [7]:
# Reshaping data
data7 = data7.reshape(5 * 20, 8192)
print(data7.shape)

data_correct = data7.reshape(data7.shape[0], 1, data7.shape[1])
print(data_correct.shape)

# Reshaping labels
frequencies = frequencies.flatten()
print(frequencies.shape)

from sklearn.preprocessing import LabelEncoder

label_encoder = LabelEncoder()
events = np.column_stack(
    (
        np.array(range(len(frequencies))),
        np.zeros(frequencies.shape[0], dtype=int),
        label_encoder.fit_transform(frequencies),
    )
)

event_dict = {"6": 0, "6.5": 1, "7": 2, "7.5": 3, "8.2": 4, "9.3": 5}
mne_data = mne.EpochsArray(data_correct, info, events, event_id=event_dict)
%store data7
%store mne_data

(100, 8192)
(100, 1, 8192)
(100,)
Not setting metadata
100 matching events found
No baseline correction applied
0 projection items activated


Stored 'data7' (ndarray)
Stored 'mne_data' (EpochsArray)
