Labelled Data

In [2]:
import pyedflib
import numpy as np
import matplotlib.pyplot as plt
import os


#Samples are represented in 16-bit 2's complement

# Get the directory where the script is located
script_dir = os.getcwd()
file_counter = 0
Files = []
badFiles = []
# Construct the full path to the file
file_path = os.path.join(script_dir, 'EDF', 'PD patient Frontal')
for filename in os.listdir(file_path):
    # Check if the file ends with .edf
    if filename.endswith('.edf'):
        file_counter = file_counter+1
        Files.append(filename)
for k in np.arange(file_counter):
    path = os.path.join(file_path, Files[k])
    try:
        f = pyedflib.EdfReader(path)
    except OSError:
        badFiles.append(Files[k])     
Files = [item for item in Files if item not in badFiles]
Files.reverse()
n = f.signals_in_file
n = n-9
number_of_samples = f.getNSamples()[0]
Nblocks = int((number_of_samples-250)/64)
TotalBlocks=(5*Nblocks)*n
fuller_data = np.ndarray(shape=(TotalBlocks, 64, 2))
BlockCount=0
multiplier = f.getPhysicalMaximum(0)/f.getDigitalMaximum(0)
f.close()
for index, name in enumerate(Files):
    print(name)
    path = os.path.join(file_path, name)
    f = pyedflib.EdfReader(path)
    number_of_samples = f.getNSamples()[0]
    Nblocks = int((number_of_samples-250)/64)
    sigbufs = np.zeros(number_of_samples)
    full_data64 = np.ndarray(shape=(Nblocks*n, 64, 2))
    signalList = []  
    BlockCount = BlockCount+Nblocks*n
    ran = np.ndarray(shape=(21, number_of_samples-250))
    for i in np.arange(n):
        sigbufs[:] = f.readSignal(i, digital=True)
        sigbufs_new = sigbufs[250:]
        #ran[i] = sigbufs_new
        signalList.append(sigbufs_new) 
        labels = np.zeros(number_of_samples-250)
        if name == "DP14.edf":
            sezStart = 79650
            sezEnd = 82250
            labels[sezStart:sezEnd] = 1
        elif name == "DP141.edf":
            sezStart = 103500
            sezEnd = 106500
            labels[sezStart:sezEnd] = 1
        elif name == "DP142.edf":
            sezStart = 223250
            sezEnd = 224750
            labels[sezStart:sezEnd] = 1
        elif name == "DP18.edf":
            sezStart = 93250
            sezEnd = 94000
            labels[sezStart:sezEnd] = 1
        for j in np.arange(Nblocks):
                full_data64[i*Nblocks+j,:,0] = signalList[i][j*64:(j+1)*64]
                full_data64[i*Nblocks+j,:,1] = labels[j*64:(j+1)*64]
    fuller_data[BlockCount-Nblocks*n:BlockCount] = full_data64
    f.close()
seizures = np.sum(fuller_data[:,:,1] == 1) 
normal = np.sum(fuller_data[:,:,1] == 0)
percentage_seizure = seizures/(seizures+normal)*100
print(fuller_data.shape)
print(percentage_seizure)

DP14.edf
DP18.edf
DP141.edf
DP142.edf
DP15.edf
(369495, 64, 2)
0.697108553566354


In [2]:
import tensorflow as tf
def prd_loss_dig2phy(y_true, y_pred):
    y_true = (y_true)*multiplier
    y_pred = (y_pred)*multiplier
    rms_deviation = (tf.reduce_sum(tf.square(y_true - y_pred)))
    percentage_rmsd = tf.sqrt(rms_deviation/(tf.reduce_sum(tf.square(y_true))+tf.keras.backend.epsilon()))* 100
    return percentage_rmsd

2024-07-13 22:08:45.525691: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-07-13 22:08:45.525743: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-07-13 22:08:45.526447: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-07-13 22:08:45.530851: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [3]:
import tensorflow as tf
def prd_loss_dig2phy_new(y_true, y_pred):
    y_true_processed = y_true[:, :, 0]
    y_true_processed = y_true_processed[..., tf.newaxis]
    y_true_processed = (y_true_processed)*multiplier
    y_pred = (y_pred)*multiplier
    rms_deviation = (tf.reduce_sum(tf.square(y_true_processed - y_pred)))
    percentage_rmsd = tf.sqrt(rms_deviation/(tf.reduce_sum(tf.square(y_true_processed))+tf.keras.backend.epsilon()))* 100
    return percentage_rmsd

In [4]:
import tensorflow.keras.backend as K
def weighted_mse_loss(y_true, y_pred):
    # y_true, y_pred shape: (batch_size, num_channels, signal_length)
    # label shape: (batch_size, )
    weight = 200
    # Extract the labels (0 or 1) from the last dimension of y_true
    labels = y_true[:, :, 1]
    # Remove the labels from y_true for loss calculation
    y_true_processed = y_true[:, :, 0]
    y_true_processed=y_true_processed[..., tf.newaxis]
    loss = K.mean(K.square(y_pred - y_true_processed))
    weighted_loss = loss * ((labels * (weight - 1)) + 1)
    return K.mean(weighted_loss)

In [6]:
import tensorflow as tf

def weighted_mae_loss(y_true, y_pred):
    # y_true, y_pred shape: (batch_size, signal_length, features)
    weight = 100
    # Extract the labels (0 or 1) from the last feature dimension of y_true
    labels = y_true[:, :, 1]  # Assuming the label is the last feature
    #labels = tf.expand_dims(labels, axis=-1)  # Expand dims to make labels shape (batch_size, signal_length, 1)
    
    # Remove the labels from y_true for loss calculation
    y_true_processed = y_true[:, :, 0]  # Assuming the first feature is the target value
    y_true_processed = y_true_processed[..., tf.newaxis]  # Expand dims to make shape (batch_size, signal_length, 1)
    # Compute the absolute error
    prd_error = prd_loss_dig2phy(y_true_processed, y_pred)

    # Apply the weight based on the labels
    weighted_abs_error = prd_error * (labels * (weight - 1) + 1)

    # Compute the mean of the weighted absolute error
    weighted_loss = tf.reduce_mean(weighted_abs_error)
    return weighted_loss

# Continue with the model definition and training as before


In [5]:
from tensorflow import keras
import numpy as np
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint
from sklearn.model_selection import train_test_split

# Define the encoder
input_ts = keras.layers.Input(shape=(64, 1), name="input_time_series") # 64, 1
x = keras.layers.Conv1D(16, 3, activation='linear', padding='same')(input_ts) # 64, 16
x = keras.layers.Conv1D(8, 5, activation='linear', padding='same')(input_ts) # 64, 16
x = keras.layers.Conv1D(8, 5, activation='linear', padding='same', strides=2)(x) # 32, 8
x = keras.layers.Conv1D(4, 5, activation='linear', padding='same', strides=2)(x) # 16, 4
x = keras.layers.Flatten()(x) # Flatten for Dense layer
encoded = keras.layers.Dense(16, activation='linear')(x) # 16
encoder = keras.models.Model(input_ts, encoded, name="encoder")

# Define the decoder
encoded_input = keras.layers.Input(shape=(16,), name="encoded_input") # 16
x = keras.layers.Dense(16 * 4, activation='linear')(encoded_input) # 16 * 4
x = keras.layers.Reshape((16, 4))(x) # Reshape back to (16, 4)
x = keras.layers.Conv1DTranspose(8, 5, activation='linear', strides=2, padding='same')(x) # 16, 8
x = keras.layers.Conv1DTranspose(16, 5, activation='linear', strides=2, padding='same')(x) # 32, 16
x = keras.layers.Conv1DTranspose(8, 7, activation='linear', padding='same')(x) # 32, 16
decoded = keras.layers.Conv1DTranspose(1, 3, activation='linear', padding='same')(x) # 64, 1

decoder = keras.models.Model(encoded_input, decoded, name="decoder")

# Define the autoencoder
autoencoder_input = keras.layers.Input(shape=(64, 1), name="autoencoder_input")
encoded_ts = encoder(autoencoder_input)
decoded_ts = decoder(encoded_ts)

autoencoder = keras.models.Model(autoencoder_input, decoded_ts, name="autoencoder")

# Compile the autoencoder
autoencoder.compile(optimizer='adam', loss=weighted_mse_loss, metrics=[prd_loss_dig2phy_new])

# Summary of the autoencoder
autoencoder.summary()
checkpoint_callback = ModelCheckpoint(
    filepath='sez_model.keras',  # Path to save the model
    monitor='val_loss',        # Metric to monitor
    save_best_only=True,       # Save only the best model
    mode='min',                # Mode: minimize the monitored metric
    verbose=1                  # Print a message when saving the model
)
#features_train = fuller_data[:, :, 0]
# features_val = val_data[:, :, 0]

new_train, new_val = train_test_split(fuller_data, test_size=0.2, random_state=42)
features_val = new_val[:, :, 0]
features_val = features_val[..., tf.newaxis]
features_train = new_train[:, :, 0]
features_train = features_train[..., tf.newaxis]
#features_val = features_val[..., tf.newaxis]
#print(features_train.shape)
# Train the autoencoder
autoencoder.fit(features_train, new_train, epochs=30, batch_size=64, shuffle=True, callbacks=[checkpoint_callback], validation_data=(features_val, new_val))




2024-07-13 22:09:00.142894: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:887] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-07-13 22:09:00.170303: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:887] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-07-13 22:09:00.170623: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:887] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-07-13 22:09:00.174167: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:887] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-07-13 22:09:00.174526: I external/local_xla/xla/stream_executor

Model: "autoencoder"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 autoencoder_input (InputLa  [(None, 64, 1)]           0         
 yer)                                                            
                                                                 
 encoder (Functional)        (None, 16)                1580      
                                                                 
 decoder (Functional)        (None, 64, 1)             2841      
                                                                 
Total params: 4421 (17.27 KB)
Trainable params: 4421 (17.27 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
Epoch 1/30


2024-07-13 22:09:05.037103: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:454] Loaded cuDNN version 8904
2024-07-13 22:09:05.111723: I external/local_tsl/tsl/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory
2024-07-13 22:09:05.873878: I external/local_tsl/tsl/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory
2024-07-13 22:09:06.813233: I external/local_xla/xla/service/service.cc:168] XLA service 0x7f039aba0b50 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2024-07-13 22:09:06.813309: I external/local_xla/xla/service/service.cc:176]   StreamExecutor device (0): NVIDIA GeForce GTX 1650 Ti, Compute Capability 7.5
2024-07-13 22:09:06.818715: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
I0000 00:00:1720904946.902687  109896 device_compil

Epoch 1: val_loss improved from inf to 30527.14453, saving model to sez_model.keras
Epoch 2/30
Epoch 2: val_loss improved from 30527.14453 to 18515.79492, saving model to sez_model.keras
Epoch 3/30
Epoch 3: val_loss did not improve from 18515.79492
Epoch 4/30
Epoch 4: val_loss improved from 18515.79492 to 13607.65234, saving model to sez_model.keras
Epoch 5/30
Epoch 5: val_loss did not improve from 13607.65234
Epoch 6/30
Epoch 6: val_loss improved from 13607.65234 to 10562.68359, saving model to sez_model.keras
Epoch 7/30
Epoch 7: val_loss did not improve from 10562.68359
Epoch 8/30
Epoch 8: val_loss did not improve from 10562.68359
Epoch 9/30
Epoch 9: val_loss did not improve from 10562.68359
Epoch 10/30
Epoch 10: val_loss did not improve from 10562.68359
Epoch 11/30
Epoch 11: val_loss did not improve from 10562.68359
Epoch 12/30
Epoch 12: val_loss improved from 10562.68359 to 10213.31836, saving model to sez_model.keras
Epoch 13/30
Epoch 13: val_loss did not improve from 10213.31836


<keras.src.callbacks.History at 0x7f070cc00220>

In [41]:
path = os.path.join(script_dir, 'EDF', 'DP153_new.edf')
f = pyedflib.EdfReader(path)
number_of_samples = f.getNSamples()[0]
Nblocks = int((number_of_samples-250)/64)
sigbufs = np.zeros(number_of_samples)
full_data64 = np.ndarray(shape=(Nblocks*n, 64, 2))
signalList = []  
BlockCount = BlockCount+Nblocks*n
ran = np.ndarray(shape=(21, number_of_samples-250))
for i in np.arange(n):
    sigbufs[:] = f.readSignal(i, digital=True)
    sigbufs_new = sigbufs[250:]
    #ran[i] = sigbufs_new
    signalList.append(sigbufs_new) 
    for j in np.arange(Nblocks):
        full_data64[i*Nblocks+j,:,0] = signalList[i][j*64:(j+1)*64]
print(full_data64.shape)

(62748, 64, 2)


: 

In [31]:
from tensorflow import keras
best_autoencoder = keras.models.load_model('sez_model.keras', custom_objects={'weighted_mse_loss': weighted_mse_loss, 'prd_loss_dig2phy_new': prd_loss_dig2phy_new})
test_data = full_data64[:, :,0][..., tf.newaxis]
pred_data = best_autoencoder.predict(test_data)
prd = prd_loss_dig2phy(test_data, pred_data).numpy()
print(prd)

13.53920549993215
