In [1]:
import seaborn as sns
import pandas as pd
import numpy as np
import sys

from pathlib import Path

# Add the root directory of the project to the Python path of the notebook's session
project_root = Path.cwd().parent  # Assuming the notebook is in the "notebooks" folder
sys.path.append(str(project_root))

# Import function for loading preprocessed data
from src.Preprocessing.utils import load_preprocessed_data, transform_to_tensor, save_preprocessed_data

In [12]:
data_directory = project_root / "data" / "preprocessed" / "Dog_1_5s_slices"
print("Directory of the data: ")
print(data_directory)

Directory of the data: 
c:\Users\User\Documents\Python Projects\SeizureSen\data\preprocessed\Dog_1_5s_slices


## Frequency Domain Train

In [8]:
file_name = "freq_domain_train.npz"

freq_domain_preprocessed = load_preprocessed_data(data_directory, file_name)

X_train_freq = freq_domain_preprocessed["X"]
y_train_freq = freq_domain_preprocessed["y"]

print("Number of slices: ", len(X_train_freq))

print(X_train_freq)

Data loaded from c:\Users\User\Documents\Python Projects\SeizureSen\data\preprocessed\Dog_1_5s_slices\freq_domain_train.npz
Number of slices:  60480
[{'delta_mean': array([3.34540716, 3.27011745, 3.36536499, 3.45652811, 3.34998228,
        3.14850231, 3.37854794, 3.34741056, 3.56231406, 3.44065782,
        3.19107416, 3.35085233, 3.49311424, 3.27811191, 3.24543261,
        3.37508899]), 'delta_std': array([0.42306187, 0.26256484, 0.30427171, 0.23235372, 0.32652243,
        0.34405181, 0.36528261, 0.21772717, 0.27016056, 0.30875591,
        0.25501122, 0.25922853, 0.31195127, 0.25016504, 0.32534909,
        0.25196266]), 'theta_mean': array([3.42835077, 3.24671553, 3.19745099, 3.35531718, 3.24464078,
        3.08203598, 3.10148598, 3.1472589 , 3.25050275, 3.13777465,
        3.04791276, 3.13294559, 3.36843857, 3.06497429, 3.04456557,
        3.17849627]), 'theta_std': array([0.25531608, 0.21182531, 0.36767665, 0.21035678, 0.2204072 ,
        0.21732059, 0.34664799, 0.22883589, 0.4063084

In [4]:
X_train0_freq = X_train_freq[0]

print("Features for every channel: ")
print(list(X_train0_freq.keys()))

print("Value for key 'delta_mean': ")
print(X_train0_freq["delta_mean"])

Features for every channel: 
['delta_mean', 'delta_std', 'theta_mean', 'theta_std', 'alpha_mean', 'alpha_std', 'beta_mean', 'beta_std', 'low_gamma_mean', 'low_gamma_std', 'mid_gamma_mean', 'mid_gamma_std', 'high_gamma_1_mean', 'high_gamma_1_std', 'high_gamma_2_mean', 'high_gamma_2_std']
Value for key 'delta_mean': 
[3.86088277 3.69929204 3.79844679 3.7614892  3.80070317 3.57091407
 3.72508142 3.72632327 3.81148434 3.67766162 3.56334622 3.67775332
 3.83689165 3.61381349 3.65113957 3.72855931]


First slice in frequency domain. Rows represent each channel, columns the features

In [5]:
X_train0_freq_df = pd.DataFrame(X_train_freq[0])
X_train0_freq_df.head()

Unnamed: 0,delta_mean,delta_std,theta_mean,theta_std,alpha_mean,alpha_std,beta_mean,beta_std,low_gamma_mean,low_gamma_std,mid_gamma_mean,mid_gamma_std,high_gamma_1_mean,high_gamma_1_std,high_gamma_2_mean,high_gamma_2_std
0,3.860883,0.253131,3.740264,0.323642,3.545684,0.287894,3.50091,0.276814,3.415099,0.289872,3.230648,0.265392,3.005063,0.292402,2.595448,0.315658
1,3.699292,0.303583,3.614573,0.277721,3.516531,0.265756,3.388776,0.28077,3.351585,0.273382,3.175433,0.286158,2.97751,0.292089,2.574946,0.327762
2,3.798447,0.304884,3.51856,0.289902,3.410249,0.287694,3.28522,0.261799,3.226586,0.309864,3.080569,0.290002,2.904999,0.276862,2.544502,0.315481
3,3.761489,0.310649,3.64013,0.262211,3.528653,0.28435,3.366406,0.270879,3.3059,0.286259,3.142568,0.281287,2.941553,0.299685,2.594057,0.310177
4,3.800703,0.262843,3.586927,0.319211,3.34972,0.29559,3.369538,0.276502,3.325118,0.285174,3.181188,0.281649,2.978383,0.272462,2.608836,0.309739


Second slice in frequency domain. Rows represent each channel, columns the features

In [9]:
X_train1_freq_df = pd.DataFrame(X_train_freq[1])
X_train1_freq_df.head()

Unnamed: 0,delta_mean,theta_mean,alpha_mean,beta_mean,low_gamma_mean,mid_gamma_mean,high_gamma_1_mean,high_gamma_2_mean,time_domain_std
0,3.844872,3.741933,3.5829,3.474548,3.422789,3.22366,2.988562,2.579718,24.296243
1,3.737212,3.599041,3.484442,3.424737,3.366246,3.154363,2.968944,2.554193,20.618137
2,3.706727,3.528849,3.400081,3.294093,3.224514,3.073693,2.874914,2.51277,16.765795
3,3.705572,3.580964,3.503501,3.382478,3.241555,3.109334,2.940445,2.552445,19.119106
4,3.752099,3.611241,3.415642,3.356246,3.325826,3.191378,2.982485,2.611584,21.063647


In the tensor below, the data is organized as (n_samples, channels, features, time_steps). Therefore, in the first line, the values are presented for feature "delta mean" for slices 1, 2, ..., 8.

In [11]:
steps = 10

X_train_freq_tensor, y_train_freq_tensor = transform_to_tensor(X_train_freq, y_train_freq, steps=steps)

print("Number of slices: ", len(X_train_freq))
print("Steps: ", steps)
print("Shape of the tensor X_freq: ", X_train_freq_tensor.shape)
print("Shape of the tensor y_freq: ", y_train_freq_tensor.shape)

print("tensor: ", X_train_freq_tensor)

Number of slices:  10080
Steps:  10
Shape of the tensor X_freq:  (1008, 16, 16, 10)
Shape of the tensor y_freq:  (1007,)
tensor:  tf.Tensor(
[[[[3.8608828  3.8448725  3.7824528  ... 3.8070314  3.8683736
    3.8801234 ]
   [0.25313124 0.27153903 0.3084179  ... 0.25320688 0.3054586
    0.30551648]
   [3.740264   3.7419329  3.6909974  ... 3.657331   3.7423797
    3.7228827 ]
   ...
   [0.29240248 0.3094082  0.26101238 ... 0.27851334 0.28864077
    0.26750696]
   [2.5954478  2.579718   2.6130345  ... 2.5600183  2.6362095
    2.5920715 ]
   [0.3156577  0.31446722 0.31714818 ... 0.3192028  0.3082119
    0.32474682]]

  [[3.699292   3.7372122  3.6501362  ... 3.6823347  3.777118
    3.792921  ]
   [0.3035826  0.29952234 0.26815268 ... 0.30610272 0.29825956
    0.27273175]
   [3.6145732  3.5990405  3.5896225  ... 3.5255258  3.6685297
    3.6985788 ]
   ...
   [0.2920887  0.2847382  0.29796323 ... 0.28086916 0.29610315
    0.2842882 ]
   [2.574946   2.5541925  2.5779233  ... 2.5385985  2.5881245

In [12]:
print("First part of the segment: ")
pd.DataFrame(X_train_freq_tensor[0, :, :, 0]).head()

First part of the segment: 


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
0,3.860883,0.253131,3.740264,0.323642,3.545684,0.287894,3.50091,0.276814,3.415099,0.289872,3.230648,0.265392,3.005063,0.292402,2.595448,0.315658
1,3.699292,0.303583,3.614573,0.277721,3.516531,0.265756,3.388776,0.28077,3.351585,0.273382,3.175433,0.286158,2.97751,0.292089,2.574946,0.327762
2,3.798447,0.304884,3.51856,0.289902,3.410249,0.287694,3.28522,0.261799,3.226586,0.309864,3.080569,0.290002,2.904999,0.276862,2.544502,0.315481
3,3.761489,0.310649,3.64013,0.262211,3.528653,0.28435,3.366406,0.270879,3.3059,0.286259,3.142568,0.281287,2.941553,0.299685,2.594057,0.310177
4,3.800703,0.262843,3.586927,0.319211,3.34972,0.29559,3.369538,0.276502,3.325118,0.285174,3.181188,0.281649,2.978383,0.272462,2.608835,0.309739


In [13]:
unique, counts = np.unique(y_train_freq_tensor.numpy(), return_counts=True)
dict(zip(unique, counts))

{np.int32(0): np.int64(959), np.int32(1): np.int64(48)}

## Time Domain Train

In [12]:
file_name = "time_domain_train.npz"

time_domain_preprocessed = load_preprocessed_data(data_directory, file_name)

X_train_time = time_domain_preprocessed["X"]
y_train_time = time_domain_preprocessed["y"]

print("Number of slices: ", len(X_train_time))
print("Features for every channel: ")
print(X_train_time)

Data loaded from c:\Users\lucas\Documents\Python Projects\SeizureSen\data\preprocessed\Dog_1\time_domain_train.npz
Number of slices:  10080
Features for every channel: 
[{'pc1': array([3.14343124, 3.04868057, 2.93749289, 2.93062298, 3.04507777,
        2.95959165, 2.90091313, 2.91894951, 3.140064  , 3.15832283,
        2.87349487, 1.80238202, 3.49388459, 3.08425634, 2.25891061,
        2.43190903]), 'pc2': array([3.21121845, 2.98384044, 2.3792283 , 2.98578732, 3.11646768,
        2.89302506, 2.34779919, 2.88506361, 2.97979386, 2.56814349,
        2.85674924, 3.11845687, 3.02967103, 2.78516586, 3.07813825,
        3.17107961]), 'pc3': array([3.07570223, 1.88774932, 3.09317794, 3.01760049, 2.78970683,
        2.49270151, 3.02087666, 2.99700973, 2.55819912, 1.9691515 ,
        2.38140224, 2.91524144, 2.83384678, 2.20441668, 2.92790344,
        3.03875293]), 'pc4': array([2.20326576, 2.53721232, 2.30671015, 3.03504773, 2.0535966 ,
        2.58583622, 3.01403007, 2.43168461, 3.05920564, 2.0

In [13]:
steps = 4

X_train_time_tensor, y_train_time_tensor = transform_to_tensor(X_train_time, y_train_time, steps=steps)

In [14]:
X_train_time_tensor

<tf.Tensor: shape=(2520, 16, 16, 4), dtype=float32, numpy=
array([[[[ 3.1434312e+00,  3.2176368e+00,  2.8723414e+00,
           2.7425854e+00],
         [ 3.2112184e+00,  3.0674317e+00,  3.2393110e+00,
           3.2701874e+00],
         [ 3.0757022e+00,  3.0917501e+00,  3.0569167e+00,
           2.9960761e+00],
         ...,
         [ 1.5887877e+00,  9.8594427e-01,  1.3963199e+00,
           1.5945001e+00],
         [-5.3292996e-01,  9.6192092e-01,  8.0415362e-01,
           5.8711988e-01],
         [-1.2150851e+01, -1.2179293e+01, -1.2288362e+01,
          -1.2033110e+01]],

        [[ 3.0486805e+00,  3.1590426e+00,  2.8423519e+00,
           2.7286978e+00],
         [ 2.9838405e+00,  2.8516667e+00,  3.0910103e+00,
           3.1238363e+00],
         [ 1.8877493e+00,  1.2735853e+00,  1.9958744e+00,
           1.4184664e+00],
         ...,
         [ 1.7461941e+00,  1.4081969e+00,  1.6869919e+00,
           1.8880793e+00],
         [ 1.2254529e-01,  8.8066095e-01,  8.3396286e-01,
   

In [15]:
X_train0_time = X_train_time_tensor[0, :, :, 0]

print("First part of the segment: ")
X_train0_time = pd.DataFrame(X_train0_time)
X_train0_time.head()

First part of the segment: 


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
0,3.143431,3.211218,3.075702,2.203266,2.750378,2.430432,2.556178,2.709589,2.826022,1.900633,1.605318,-0.008401,1.812562,1.588788,-0.53293,-12.150851
1,3.048681,2.98384,1.887749,2.537212,2.765255,3.069025,2.733554,2.37265,2.705566,1.604023,0.653533,0.18541,2.022786,1.746194,0.122545,-12.150851
2,2.937493,2.379228,3.093178,2.30671,2.452029,2.661938,1.909554,2.909717,2.827105,2.499948,1.472634,1.209351,1.392374,0.571016,1.383795,-12.150851
3,2.930623,2.985787,3.017601,3.035048,2.696918,2.399433,1.856555,2.357911,2.079626,2.79433,2.243201,1.810169,1.44403,1.402019,1.21271,-12.150851
4,3.045078,3.116468,2.789707,2.053596,2.193357,2.867617,2.125741,2.851568,2.647869,2.01642,0.340748,1.818309,2.46971,2.086541,1.087176,-12.150851


In [17]:
labels_train_freq = np.array(y_train_freq)
labels_train_time = np.array(y_train_time)

assert np.array_equal(labels_train_freq, labels_train_time)

## Test Data

In [13]:
filename = "freq_domain_test.npz"
data_test_freq = load_preprocessed_data(data_directory, filename)

print("Number of slices: ", len(data_test_freq["X"]))
print("Number of labels: ", len(data_test_freq["y"]))

Data loaded from c:\Users\User\Documents\Python Projects\SeizureSen\data\preprocessed\Dog_1_5s_slices\freq_domain_test.npz
Number of slices:  60240
Number of labels:  60240


In [18]:
filename = "time_domain_test.npz"
data_test_time = load_preprocessed_data(data_directory, filename)

print("Number of slices: ", len(data_test_time["X"]))
print("Number of labels: ", len(data_test_time["y"]))

Data loaded from c:\Users\User\Documents\Python Projects\SeizureSen\data\preprocessed\Dog_1\time_domain_test.npz
Number of slices:  10040
Number of labels:  10040


In [20]:
labels_test_freq = np.array(data_test_freq["y"])
labels_test_time = np.array(data_test_time["y"])

assert np.array_equal(labels_test_freq, labels_test_time)

In [21]:
steps = 10
X_freq, y_freq = transform_to_tensor(data_test_freq["X"], data_test_freq["y"], steps=steps)

In [22]:
print("X shape: ", X_freq.shape)
print("y shape: ", y_freq.shape)

X shape:  (1004, 16, 16, 10)
y shape:  (1004,)


In [23]:
X_time, y_time = transform_to_tensor(data_test_time["X"], data_test_time["y"], steps=steps)

In [24]:
print("X shape: ", X_time.shape)
print("y shape: ", y_time.shape)

X shape:  (1004, 16, 16, 10)
y shape:  (1004,)


In [25]:
unique, counts = np.unique(y.numpy(), return_counts=True)
dict(zip(unique, counts))

{np.int32(0): np.int64(956), np.int32(1): np.int64(48)}

## Augmented Train Data

In [3]:
file_name = "freq_domain_train_augmented_preictal.npz"

freq_domain_preprocessed = load_preprocessed_data(data_directory, file_name)

X_train_freq = freq_domain_preprocessed["X"]
y_train_freq = freq_domain_preprocessed["y"]

print("Number of slices: ", len(X_train_freq))

print(X_train_freq)

Data loaded from c:\Users\User\Documents\Python Projects\SeizureSen\data\preprocessed\Dog_1\freq_domain_train_augmented_preictal.npz
Number of slices:  10559
[{'delta_mean': array([3.86088277, 3.69929204, 3.79844679, 3.7614892 , 3.80070317,
        3.57091407, 3.72508142, 3.72632327, 3.81148434, 3.67766162,
        3.56334622, 3.67775332, 3.83689165, 3.61381349, 3.65113957,
        3.72855931]), 'delta_std': array([0.25313124, 0.30358259, 0.30488416, 0.31064912, 0.26284323,
        0.30009964, 0.34379839, 0.2747996 , 0.26850826, 0.28619907,
        0.26151922, 0.26320181, 0.27720275, 0.30441275, 0.26347024,
        0.29038639]), 'theta_mean': array([3.74026402, 3.61457315, 3.51855968, 3.64013039, 3.58692677,
        3.42097063, 3.51769658, 3.46424254, 3.66404604, 3.4383492 ,
        3.3980832 , 3.51560034, 3.65630806, 3.49531712, 3.4690012 ,
        3.60868276]), 'theta_std': array([0.32364167, 0.27772096, 0.28990199, 0.26221081, 0.31921086,
        0.29361066, 0.25904511, 0.31230111, 

In [4]:
y_train_freq_s = pd.DataFrame(y_train_freq)
print("Value counts for the labels for augmented data: ")
y_train_freq_s.value_counts()

Value counts for the labels for augmented data: 


0
0    9600
1     959
Name: count, dtype: int64

In [5]:
steps = 10

X_train_freq_tensor, y_train_freq_tensor = transform_to_tensor(X_train_freq, y_train_freq, steps=steps)

print("Number of slices: ", len(X_train_freq))
print("Steps: ", steps)
print("Shape of the tensor X_freq: ", X_train_freq_tensor.shape)
print("Shape of the tensor y_freq: ", y_train_freq_tensor.shape)

Number of slices:  10559
Steps:  10
Shape of the tensor X_freq:  (1055, 16, 16, 10)
Shape of the tensor y_freq:  (1055,)


In [28]:
unique, counts = np.unique(y_train_freq_tensor.numpy(), return_counts=True)
dict(zip(unique, counts))

{np.int32(0): np.int64(960), np.int32(1): np.int64(95)}

In [7]:
import tensorflow as tf
y_train_freq_tensor_repeated = tf.repeat(y_train_freq_tensor, steps)

In [15]:
np.array_equal(y_train_freq_tensor_repeated.numpy(), y_train_freq.astype(np.int32)[:-9])

True

In [14]:
y_train_freq.astype(np.int32)[:-9]

array([1, 1, 1, ..., 1, 1, 1], dtype=int32)