In [28]:
import sys

sys.path.append('..')

In [29]:
from feature_extraction.extract_features import Features

import numpy as np
import pandas as pd

from models.load_data import choose_interictal_train_segs, load_features_and_labels
from utils.io import pickle_path

from config.paths import PATHS

np.random.seed(42)

# Real Data

In [30]:
feature_cols = Features.ORDERED_FEATURE_NAMES
pdir = PATHS.patient_dirs()[0]
print(pdir.name)
# Load data
real_segs = pd.read_pickle(pickle_path(pdir.segments_table))
print(real_segs.shape)
real_split = pd.read_pickle(pickle_path(pdir.train_test_split))
real_split

K37N36L4D
(719946, 21)


Timestamp        2022-05-09 10:56:32.069750
segment_index                        431969
Name: train_end, dtype: object

# Fake Data

In [31]:
feature_cols = ['f1', 'f2']
fake_segs = pd.DataFrame({
    'type': ['interictal', 'inter_pre', 'preictal', 'preictal', 'preictal', 'interictal', 'interictal', 'preictal',
             'preictal', 'postictal'],
    'exists': [True, True, True, False, True, True, False, True, True, True],
    'f1': [10, 11, 12, 13, 14, 15, 16, 17, 18, 19],
    'f2': [20, 21, 22, 23, 24, 25, 26, 27, 28, 29],
})
fake_split = pd.Series({'segment_index': 6})

fake_segs

Unnamed: 0,type,exists,f1,f2
0,interictal,True,10,20
1,inter_pre,True,11,21
2,preictal,True,12,22
3,preictal,False,13,23
4,preictal,True,14,24
5,interictal,True,15,25
6,interictal,False,16,26
7,preictal,True,17,27
8,preictal,True,18,28
9,postictal,True,19,29


# Computation

In [32]:
# Select data type
real_data: bool = False

segs, split = (real_segs, real_split) if real_data else (fake_segs, fake_split)

In [33]:
segs = segs[segs['exists']]
train_segs = segs.loc[:split.segment_index - 1]
test_segs = segs.loc[split.segment_index:]
print(train_segs.shape)
print(test_segs.shape)

(5, 4)
(3, 4)


## Training Data

In [34]:
train_segs

Unnamed: 0,type,exists,f1,f2
0,interictal,True,10,20
1,inter_pre,True,11,21
2,preictal,True,12,22
4,preictal,True,14,24
5,interictal,True,15,25


In [35]:
(train_segs.shape[0] + test_segs.shape[0]) == segs.shape[0]

True

In [36]:
interictal_train = choose_interictal_train_segs(train_segs, random_state=42)
preictal_train = train_segs[train_segs['type'] == 'preictal']
print(interictal_train.shape)
print(preictal_train.shape)

(2, 4)
(2, 4)


In [37]:
# Stack DataFrames
train_segs = pd.concat([interictal_train, preictal_train])
train_segs

Unnamed: 0,type,exists,f1,f2
5,interictal,True,15,25
0,interictal,True,10,20
2,preictal,True,12,22
4,preictal,True,14,24


In [38]:
# Shuffle
train_segs = train_segs.sample(frac=1, random_state=42)
train_segs

Unnamed: 0,type,exists,f1,f2
0,interictal,True,10,20
4,preictal,True,14,24
5,interictal,True,15,25
2,preictal,True,12,22


In [39]:
y_train = (train_segs['type'] == 'preictal')
y_train = np.array(y_train, dtype=np.int32)
print(type(y_train[0]))
y_train

<class 'numpy.int32'>


array([0, 1, 0, 1], dtype=int32)

In [40]:
x_train = train_segs.loc[:, feature_cols].to_numpy()
print(type(x_train))
x_train

<class 'numpy.ndarray'>


array([[10, 20],
       [14, 24],
       [15, 25],
       [12, 22]])

## Test Data

In [41]:
test_segs

Unnamed: 0,type,exists,f1,f2
7,preictal,True,17,27
8,preictal,True,18,28
9,postictal,True,19,29


In [42]:
y_test = (test_segs['type'] == 'preictal').to_numpy(dtype=np.int32)
y_test

array([1, 1, 0], dtype=int32)

In [43]:
x_test = test_segs.loc[:, feature_cols].to_numpy()
x_test

array([[17, 27],
       [18, 28],
       [19, 29]])

# Testing


In [44]:
xtr, ytr, xte, yte = load_features_and_labels(segs, split, feature_cols, random_state=42)

In [45]:
print(xtr == x_train)
xtr

[[False False]
 [False False]
 [False False]
 [False False]]


array([[14, 24],
       [10, 20],
       [12, 22],
       [15, 25]])

In [46]:
print(ytr ==  y_train)
ytr

[False False False False]


array([1, 0, 1, 0], dtype=int32)

In [47]:
print(xte == x_test)
xte

[[ True  True]
 [ True  True]
 [ True  True]]


array([[17, 27],
       [18, 28],
       [19, 29]])

In [48]:
print(yte == y_test)
y_test

[ True  True  True]


array([1, 1, 0], dtype=int32)