In [3]:
import numpy as np
import scipy as sc
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import pandas as pd
import re
import os

Download the SEED-IV dataset from here : https://www.kaggle.com/datasets/phhasian0710/seed-iv

## Explaining the files structure:
### The "eeg_raw_data" folder:
   * Contains 3 inner folders named 1, 2 ,3 corresponding to the 3 sessions.
      * Each .mat file inside those folders is for a subject from the  15 subjects (named with {SubjectName}_{Date}.mat), which contains more files:
         * The .mat file contains the EEG signals recorded during 24 trials for 62 channels
   * Each of the 24 trials in each session folder (1, 2 or 3) has a label, and the labels are the same across all subjects 

**Label Mapping**:
- Neutral: 0
- Sad: 1
- Fear: 2
- Happy: 3

### So that we know this , we can calculate the dataset size:
3 sessions * 15 subject * 24 trial * 62 channels = 66960 raw EEG signal (before windowing)

In [4]:
labels = np.array([
    [1,2,3,0,2,0,0,1,0,1,2,1,1,1,2,3,2,2,3,3,0,3,0,3],
    [2,1,3,0,0,2,0,2,3,3,2,3,2,0,1,1,2,1,0,3,0,1,3,1],
    [1,2,2,1,3,3,3,1,1,2,1,0,2,3,3,0,2,3,0,0,2,0,1,0]
])

In [5]:
labels.shape

(3, 24)

Mapping the sad and fear emotions to negative

In [6]:
#currently neutral:0 , happy:3 , sad:1 , fear:2
labels[labels==2] = 1  # changing fear labels from 2 to 1
#currently neutral:0 , happy:3 , sad:1 , fear:1
labels[labels==0] = -1  # changing neutral labels from 0 to -1
#currently neutral:-1 , happy:3 , sad:1 , fear:1
labels[labels==3] = 0  # changing happy labels from 3 to 0
#currently neutral:-1 , happy:0 , sad:1 , fear:1

**Final label mapping**:
- Neutral: -1
- Positive (Happy): 0
- Negative (Sad , Fear): 1

In [7]:
labels

array([[ 1,  1,  0, -1,  1, -1, -1,  1, -1,  1,  1,  1,  1,  1,  1,  0,
         1,  1,  0,  0, -1,  0, -1,  0],
       [ 1,  1,  0, -1, -1,  1, -1,  1,  0,  0,  1,  0,  1, -1,  1,  1,
         1,  1, -1,  0, -1,  1,  0,  1],
       [ 1,  1,  1,  1,  0,  0,  0,  1,  1,  1,  1, -1,  1,  0,  0, -1,
         1,  0, -1, -1,  1, -1,  1, -1]])

**To index a channel by its name**

In [8]:
channelsMapping=pd.read_excel('SEED-IV/Channel Order.xlsx',header=None, names=['channels']).reset_index()
channelsMapping.set_index('channels', inplace=True)

In [9]:
def getChannel(channel):
    return channelsMapping.loc[channel]['index'] 

### Let's play with the files a bit to understand it better.

In [None]:
def loadSubject(session,subject):
    for file in os.listdir(f'SEED-IV/eeg_raw_data/{session}/'):
        if file.startswith(f'{subject}_'):
            subData=sc.io.loadmat(f'SEED-IV/eeg_raw_data/{session}/{file}')
            break
    subData = [v for k, v in subData.items() if not k.startswith('__')]
    return subData

In [189]:
samplingRate = 200
def downSample(signal):
    q = int(1000/samplingRate)
    return signal[::q]

In [None]:
def segmentChannel(ch, seconds):
    s = []
    samplesCount = samplingRate * seconds
    segmentsCount = int(np.ceil(len(ch)/samplesCount))
    for i in range(segmentsCount):
        s.append(ch[i*samplesCount:(i+1)*samplesCount])
    return s

In [None]:
def preProccess(subData):
    s = [i for i in subData]
    b, a = sc.signal.butter(4, Wn=[1, 75], btype='bandpass', fs=1000)
    for i, eeg in enumerate(s):
        s[i] = np.array([sc.signal.lfilter(b, a, ch) for ch in eeg]) # filtring
        s[i] = np.array([downSample(ch) for ch in eeg]) # downsampling
    return s

In [134]:
s = loadSubject(1,2)

In [196]:
s[1].shape

(62, 19001)

In [186]:
np.ceil(19001/(200*4))

np.float64(24.0)

In [193]:
sp = preProccess(s)

In [195]:
sp[1].shape

(62, 3801)

In [200]:
px.line(segmentChannel(sp[1][0], 4)[-1])


5
0 : 800 -> s[0] : 800
800 : 1600 -> s[1] : 800
1600 : 2400 -> s[2] : 800
2400 : 3200 -> s[3] : 800
3200 : 4000 -> s[4] : 601


In [203]:
px.line(sp[1][0][-600:])

In [157]:
px.line(preProccess(s)[1][getChannel('P4')])

In [72]:
s[1][getChannel('P4')].shape, downSample(s[1][getChannel('P4')]).shape

((19001,), (3801,))

Some plotting for comparisons

We can't load more than one session at a time because of the resources it needs, if we try to load all the data the computer will crash

In [None]:
def loadSession(k):
    sessionPath=f'SEED-IV/eeg_raw_data/{k}/'
    sessionSubjects=os.listdir(sessionPath)
    s=[]
    for i,subjectFile in enumerate(sessionSubjects):
        sub=sc.io.loadmat(sessionPath+subjectFile)
        # sub = {int(re.search(r'(\d+)$', k).group(1))-1: v for k, v in sub.items() if not k.startswith('__')}
        sub = [v for k, v in sub.items() if not k.startswith('__')]
        s.append(sub)
    return s

In [None]:
s1=loadSession(1)

Checking that each subject has 24 trials

In [14]:
[True for l in  s1.keys() if len(s1[l].keys())!=24 ]

[]

In [15]:
# First session --> first subject --> first trial --> channel PZ
s1[0][0][getChannel('PZ')]

array([ -8.46385956, -11.1758709 , -13.23223114, ...,  -4.58955765,
         0.17881393,  -3.69548798], shape=(33601,))

Seeing how different subject have their EEG signals given the same videos (same label)

In [16]:
labels[0]==0

array([False, False,  True, False, False, False, False, False, False,
       False, False, False, False, False, False,  True, False, False,
        True,  True, False,  True, False,  True])

In [17]:
# These are the positive indexes of the first session
posIndex=np.flatnonzero(labels[0]==0)

In [19]:
fig = make_subplots(
    rows=3, 
    cols=1, 
    subplot_titles=("Subject 1", "Subject 2", "Subject 3"),
)
fig.add_trace(
    go.Scatter(y=s1[0][posIndex[2]][getChannel('PZ')], mode="lines", name="Subject 1"),
    row=1, col=1
)
fig.add_trace(
    go.Scatter(y=s1[1][posIndex[2]][getChannel('PZ')], mode="lines", name="Subject 2"),
    row=2, col=1
)
fig.add_trace(
    go.Scatter(y=s1[2][posIndex[2]][getChannel('PZ')], mode="lines", name="Subject 3"),
    row=3, col=1
)
fig.update_layout(
    title_text="EEG PZ Channel Across 3 subject given the same trial (same movie and same label)", 
    height=700, 
    showlegend=False
)
fig.update_xaxes(title_text="Sample Number", row=3, col=1)

fig.show()