In [2]:
import numpy as np
import scipy as sc
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import pandas as pd
import re
import os

Download the SEED-IV dataset from here : https://www.kaggle.com/datasets/phhasian0710/seed-iv

## Explaining the files structure:
### The "eeg_raw_data" folder:
   * Contains 3 inner folders named 1, 2 ,3 corresponding to the 3 sessions.
      * Each .mat file inside those folders is for a subject from the  15 subjects (named with {SubjectName}_{Date}.mat), which contains more files:
         * The .mat file contains the EEG signals recorded during 24 trials for 62 channels
   * Each of the 24 trials in each session folder (1, 2 or 3) has a label, and the labels are the same across all subjects 

**Label Mapping**:
- Neutral: 0
- Sad: 1
- Fear: 2
- Happy: 3

### So that we know this , we can calculate the dataset size:
3 sessions * 15 subject * 24 trial * 62 channels = 66960 raw EEG signal (before windowing)

In [3]:
labels = np.array([
    [1,2,3,0,2,0,0,1,0,1,2,1,1,1,2,3,2,2,3,3,0,3,0,3],
    [2,1,3,0,0,2,0,2,3,3,2,3,2,0,1,1,2,1,0,3,0,1,3,1],
    [1,2,2,1,3,3,3,1,1,2,1,0,2,3,3,0,2,3,0,0,2,0,1,0]
])

In [4]:
labels.shape

(3, 24)

Mapping the sad and fear emotions to negative

In [5]:
#currently neutral:0 , happy:3 , sad:1 , fear:2
labels[labels==2] = 1  # changing fear labels from 2 to 1
#currently neutral:0 , happy:3 , sad:1 , fear:1
labels[labels==0] = -1  # changing neutral labels from 0 to -1
#currently neutral:-1 , happy:3 , sad:1 , fear:1
labels[labels==3] = 0  # changing happy labels from 3 to 0
#currently neutral:-1 , happy:0 , sad:1 , fear:1

**Final label mapping**:
- Neutral: -1
- Positive (Happy): 0
- Negative (Sad , Fear): 1

In [6]:
labels

array([[ 1,  1,  0, -1,  1, -1, -1,  1, -1,  1,  1,  1,  1,  1,  1,  0,
         1,  1,  0,  0, -1,  0, -1,  0],
       [ 1,  1,  0, -1, -1,  1, -1,  1,  0,  0,  1,  0,  1, -1,  1,  1,
         1,  1, -1,  0, -1,  1,  0,  1],
       [ 1,  1,  1,  1,  0,  0,  0,  1,  1,  1,  1, -1,  1,  0,  0, -1,
         1,  0, -1, -1,  1, -1,  1, -1]])

**To index a channel by its name**

In [7]:
channelsMapping=pd.read_excel('SEED-IV/Channel Order.xlsx',header=None, names=['channels']).reset_index()
channelsMapping.set_index('channels', inplace=True)

In [8]:
def getChannel(channel):
    return channelsMapping.loc[channel]['index'] 

### Let's play with the files a bit to understand it better.

In [9]:
# To load subject 1 data across all sessions
eeg_data_1=np.array([])
for i in range(1,4): # loops over the 3 sessions
    for file in os.listdir(f'SEED-IV/eeg_raw_data/{i}/'):
        # for every file in the session folder, check if it belongs to subject 1
        if file.startswith('1_'):
            eeg_data_1=np.append(eeg_data_1, sc.io.loadmat(f'SEED-IV/eeg_raw_data/{i}/{file}'))

In [10]:
px.line(eeg_data_1[1]['cz_eeg1'][getChannel('P4')]) #indexing by session then trial then channel

Some plotting for comparisons

In [11]:
fig = make_subplots(
    rows=3, 
    cols=1, 
    subplot_titles=("Session 1 - P4", "Session 2 - P4", "Session 3 - P4"),
)
fig.add_trace(
    go.Scatter(y=eeg_data_1[0]['cz_eeg1'][getChannel('P4')], mode="lines", name="Session 1"),
    row=1, col=1
)
fig.add_trace(
    go.Scatter(y=eeg_data_1[1]['cz_eeg1'][getChannel('P4')], mode="lines", name="Session 2"),
    row=2, col=1
)
fig.add_trace(
    go.Scatter(y=eeg_data_1[2]['cz_eeg1'][getChannel('P4')], mode="lines", name="Session 3"),
    row=3, col=1
)
fig.update_layout(
    title_text="EEG P4 Channel Across 3 Sessions", 
    height=700, 
    showlegend=False
)
fig.update_xaxes(title_text="Sample Number", row=3, col=1)

fig.show()

We can't load more than one session at a time because of the resources it needs, if we try to load all the data the computer will crash

In [13]:
session1Path='SEED-IV/eeg_raw_data/1/'
session1Subjects=os.listdir(session1Path)
s1={}
for i,subjectFile in enumerate(session1Subjects):
    sub=sc.io.loadmat(session1Path+subjectFile)
    sub = {int(re.search(r'(\d+)$', k).group(1))-1: v for k, v in sub.items() if not k.startswith('__')}
    s1[i]=sub

Checking that each subject has 24 trials

In [14]:
[True for l in  s1.keys() if len(s1[l].keys())!=24 ]

[]

In [15]:
# First session --> first subject --> first trial --> channel PZ
s1[0][0][getChannel('PZ')]

array([ -8.46385956, -11.1758709 , -13.23223114, ...,  -4.58955765,
         0.17881393,  -3.69548798], shape=(33601,))

Seeing how different subject have their EEG signals given the same videos (same label)

In [16]:
labels[0]==0

array([False, False,  True, False, False, False, False, False, False,
       False, False, False, False, False, False,  True, False, False,
        True,  True, False,  True, False,  True])

In [17]:
# These are the positive indexes of the first session
posIndex=np.flatnonzero(labels[0]==0)

In [19]:
fig = make_subplots(
    rows=3, 
    cols=1, 
    subplot_titles=("Subject 1", "Subject 2", "Subject 3"),
)
fig.add_trace(
    go.Scatter(y=s1[0][posIndex[2]][getChannel('PZ')], mode="lines", name="Subject 1"),
    row=1, col=1
)
fig.add_trace(
    go.Scatter(y=s1[1][posIndex[2]][getChannel('PZ')], mode="lines", name="Subject 2"),
    row=2, col=1
)
fig.add_trace(
    go.Scatter(y=s1[2][posIndex[2]][getChannel('PZ')], mode="lines", name="Subject 3"),
    row=3, col=1
)
fig.update_layout(
    title_text="EEG PZ Channel Across 3 subject given the same trial (same movie and same label)", 
    height=700, 
    showlegend=False
)
fig.update_xaxes(title_text="Sample Number", row=3, col=1)

fig.show()