In [1]:
import numpy as np
# At the start of your notebook
from IPython.display import clear_output
import gc

# After heavy computations
clear_output(wait=True)
gc.collect()

0

In [2]:
from steps import setup_and_train_models, analyze_seizure_propagation
import torch
from datasetConstruct import construct_channel_recognition_dataset
from models import Wavenet, train_using_optimizer
import pickle
import os
import matplotlib.pyplot as plt
import pandas as pd

CUDA extension for structured kernels (Cauchy and Vandermonde multiplication) not found. Install by going to extensions/kernels/ and running `python setup.py install`, for improved speed and memory efficiency. Note that the kernel changed for state-spaces 4.0 and must be recompiled.



  Policy CMP0146 is not set: The FindCUDA module is removed.  Run "cmake
  --help-policy CMP0146" for policy details.  Use the cmake_policy command to


CUDA_CUDART_LIBRARY = C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v11.8/lib/x64/cudart.lib
CUDA_LIBRARIES = C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v11.8/lib/x64/cudart_static.lib
CUDA_TOOLKIT_ROOT_DIR = C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v11.8
set nvrtc path = C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v11.8/lib/x64
  Policy CMP0148 is not set: The FindPythonInterp and FindPythonLibs modules
  are removed.  Run "cmake --help-policy CMP0148" for policy details.  Use
  preferably upgrade to using FindPython, either by calling it explicitly
  before pybind11, or by setting PYBIND11_FINDPYTHON ON before pybind11.
Call Stack (most recent call first):
  C:/Users/arthu/anaconda3/envs/seizure/Lib/site-packages/pybind11/share/cmake/pybind11/pybind11Tools.cmake:50 (find_package)
  C:/Users/arthu/anacon

In [3]:
RESULT_FOLDER = "result"
MODEL_FOLDER = "model"
model_names = ['Wavenet']  # 'CNN1D', 'Wavenet', 'LSTM', 'S4', 'ResNet'

In [4]:
# Do batch analysis to find the best hyperparameters
seizures = [1, 2, 3, 5, 7]
thresholds = [0.8]
smooth_windows = [80]

In [None]:
from steps import extract_sEEG_features
from datasetConstruct import load_seizure_across_patients

dataset = load_seizure_across_patients(data_folder='data')

for seizure in dataset:
    seizure_new = extract_sEEG_features(seizure, sampling_rate=seizure.samplingRate)

Processing 1491 ictal segments × 96 channels


In [None]:
results, models = setup_and_train_models(
    data_folder="data",
    model_folder="checkpoints",
    model_names=model_names,  # Only use CNN1D and Wavenet
    train=True,
    input_type='transformed',  # 'transformed' or 'raw'
    params={'epochs': 100, 'batch_size': 4096, 'checkpoint_freq': 20},  # params: epochs, checkpoint_freq, lr, batch_size, device, patience, gradient_clip
    hyperparameter_search=True
)

In [None]:
from typing import List, Tuple, Dict
from utils import split_data, find_seizure_related_channels
from datasetConstruct import load_single_seizure
from models import output_to_probability
from steps import extract_sEEG_features
from sklearn.preprocessing import StandardScaler

marking_file = 'data/Seizure_Onset_Type_ML_USC.xlsx'
patient_no = 66
seizure_no = 1
data_folder = 'data'
# Set up paths
single_seizure_folder = os.path.join(data_folder, f"P{patient_no}")
save_folder = os.path.join("result", f"P{patient_no}", f"Seizure{seizure_no}")
os.makedirs(save_folder, exist_ok=True)
model_name = model_names[0]
model = models[model_name]
params = {
    'threshold': 0.8,
    'smooth_window': 10,
    'n_seconds': 60,
    'seizure_start': 10,
    'overlap': 0.9,
    'device': 'cuda:0'
}

def load_seizure_data() -> Tuple[object, List[str], List[str]]:
    """Load seizure data and channel information"""
    # Load seizure marking data
    seizure_marking = pd.read_excel(marking_file)

    # Find seizure-related channels
    seizure_channels, seizure_onset_channels = find_seizure_related_channels(
        seizure_marking, seizure_no, patient_no
    )

    # Load seizure data
    seizure_obj = load_single_seizure(single_seizure_folder, seizure_no)
    
    if not hasattr(seizure_obj, 'ictal_transformed'):
        seizure_obj = extract_sEEG_features(seizure_obj, sampling_rate=seizure_obj.samplingRate)

    return seizure_obj, seizure_channels, seizure_onset_channels


def process_data(seizure_obj) -> Tuple[np.ndarray, np.ndarray, float]:
    """Process raw seizure data"""
    fs = seizure_obj.samplingRate
    if not hasattr(seizure_obj, 'ictal_transformed'):
        ictal_data = seizure_obj.ictal_
        preictal_data = seizure_obj.preictal2
    
        # Reshape and combine data
        ictal_combined = ictal_data.reshape(-1, ictal_data.shape[2])
        total_data = np.concatenate((preictal_data, ictal_combined), axis=0)
    
    else:
        ictal_data = seizure_obj.ictal_transformed
        preictal_data = seizure_obj.interictal_transformed
        
        # Reshape and combine data
        ictal_combined = ictal_data.transpose(0, 2, 1, 3).reshape(ictal_data.shape[0]*ictal_data.shape[2], ictal_data.shape[1], ictal_data.shape[3])
        preictal_data = preictal_data.transpose(0, 2, 1, 3).reshape(preictal_data.shape[0]*preictal_data.shape[2], preictal_data.shape[1], preictal_data.shape[3])
        
        total_data = np.concatenate((preictal_data, ictal_combined))

    # Split data into windows
    total_windows = split_data(total_data, 40, overlap=params['overlap'])

    return total_data, total_windows, fs


def compute_probabilities(data: np.ndarray, model, device: str) -> np.ndarray:
    """
    Compute seizure probabilities for each channel.
    
    Parameters:
    -----------
    data : numpy.ndarray
        Input data with shape (chunks, fs, channel) or (chunks, fs, channel, features)
    model : torch model
        The seizure detection model
    device : str
        The device to run the model on ('cpu' or 'cuda')
        
    Returns:
    --------
    numpy.ndarray
        Probability matrix with shape (chunks, channel)
    """
    # Determine if the input is 3D or 4D
    is_4d = len(data.shape) == 4
    
    # Get dimensions
    chunks = data.shape[0]
    fs = data.shape[1]
    n_channels = data.shape[2]
    
    # Initialize probability matrix
    prob_matrix = np.zeros((chunks, n_channels))

    for channel in range(n_channels):
        
        scaler = StandardScaler()
        
        if is_4d:
            # 4D data: [chunks, fs, channel, features]
            # Extract data for current channel
            channel_data = data[:, :, channel, :]
            
            # Reshape for scaling: [chunks*fs, features]
            original_shape = channel_data.shape
            reshaped_data = channel_data.reshape(-1, original_shape[2])
            
            # Fit and transform the data
            scaled_data = scaler.fit_transform(reshaped_data)
            
            # Reshape back
            channel_data = scaled_data.reshape(original_shape)
            
            # Transpose for model input: [chunks, features, fs]
            input_data = np.transpose(channel_data, (0, 2, 1))
        else:
            # 3D data: [chunks, fs, channel]
            # Extract data for current channel
            channel_data = data[:, :, channel]
            
            # Reshape for scaling: [chunks*fs, 1]
            reshaped_data = channel_data.reshape(-1, 1)
            
            # Fit and transform the data
            scaled_data = scaler.fit_transform(reshaped_data)
            
            # Reshape back to original shape and then to model input format
            channel_data = scaled_data.reshape(chunks, fs)
            input_data = channel_data.reshape(chunks, 1, fs)

        # Convert to tensor and move to device
        input_data = torch.tensor(input_data, dtype=torch.float32).to(device)
        
        # Compute probabilities
        prob_matrix[:, channel] = output_to_probability(model, input_data, device)

    return prob_matrix


In [None]:
# Test and debug single seizure data
# Load data
seizure_obj, seizure_channels, seizure_onset_channels = load_seizure_data()

# Process data
total_data, windowed_data, fs = process_data(seizure_obj)

# Compute probabilities
probabilities = compute_probabilities(windowed_data, model, params['device'])

In [None]:
channel = 31
seconds = 60
preictal_seconds = 10
preictal_samples = int((60-preictal_seconds)/(1-params['overlap']))
nsamples = int(seconds/(1-params['overlap'])) + preictal_samples
raw_data_ictal = seizure_obj.ictal
raw_data_preicatal = seizure_obj.interictal

# Reshape the data
raw_data_ictal = raw_data_ictal.reshape(-1, raw_data_ictal.shape[2])[:60*512]
raw_data_preicatal = raw_data_preicatal.reshape(-1, raw_data_preicatal.shape[2])[-10*512:]

# Combine the data
raw_data = np.concatenate((raw_data_preicatal, raw_data_ictal), axis=0)
raw_data = raw_data[:, channel]
# Plot the total data and seizure probability
feature_data = np.mean(windowed_data, axis=1)[:, channel][preictal_samples:nsamples]

# Scale the raw data
scaler = StandardScaler()
feature_data = scaler.fit_transform(feature_data)
probability = probabilities[:, channel][preictal_samples:nsamples]

# Smooth the probability by using a moving average
probability = np.convolve(probability, np.ones(params['smooth_window']) / params['smooth_window'], mode='same')

fig, ax = plt.subplots(2, 1, figsize=(10, 6))
ax[0].plot(raw_data)
ax[1].plot(probability)
ax[0].set_title(f'Channel {channel} - Raw Data')
ax[1].set_title(f'Channel {channel} - Seizure Probability')
# Set x-axis labels
ax[1].set_xlabel('Time (s)')
# Change x-ticks to seconds
x_ticks = np.arange(0, nsamples-preictal_samples, 100)
x_labels = np.arange(0, seconds, 10)
ax[1].set_xticks(x_ticks)
ax[1].set_xticklabels(x_labels)

# Delete ax[0] x-axis labels
ax[0].set_xticks([])

plt.tight_layout()
plt.show()

In [None]:
# Calculate probability correlation to check the contamination of common noise
from scipy.stats import pearsonr
correlation = np.zeros((probabilities.shape[1], probabilities.shape[1]))
for i in range(probabilities.shape[1]):
    for j in range(probabilities.shape[1]):
        correlation[i, j] = np.abs(pearsonr(probabilities[:, i], probabilities[:, j])[0])
        
plt.imshow(correlation)
plt.colorbar()
plt.show()

In [None]:
results_propagation_total = []
model_name = model_names[0]
model = models[model_name]
threshold = 0.8
smooth_window = 10
LOAD=False
filename = f'{RESULT_FOLDER}/results_propagation_{model_name}_{threshold}_{smooth_window}.pkl'

# if file exists
if os.path.exists(filename) and LOAD:
    with open(filename, 'rb') as f:
        results_propagation_total = pickle.load(f)
        
if len(results_propagation_total) == 0:
    for PAT_NO in [65, 66]:
        for seizure_no in seizures:
            if PAT_NO == 66 and seizure_no > 3:
                continue
            params = {
                'threshold': threshold,
                'smooth_window': smooth_window,
                'n_seconds': 60,
                'seizure_start': 10,
            }
            results_propagation = analyze_seizure_propagation(
                patient_no=PAT_NO,
                seizure_no=seizure_no,
                model=model,
                data_folder='data',
                params=params,
                save_results_ind=True
            )
            results_propagation_total.append(results_propagation)
            
    with open(filename, 'wb') as f:
        pickle.dump(results_propagation_total, f)

In [None]:
# Examine the result:
from plotFun import plot_eeg_style
# 1. Plot the smoothed result:
sample_result = results_propagation_total[5]['smoothed_probabilities'][20:350]
sample_result2 = results_propagation_total[5]['probabilities'][20:300]
fig = plot_eeg_style(sample_result.T, sampling_rate=5)
plt.show()

In [None]:
# # Load and test the augmented data
# augdata = pd.read_csv('data/clips.tar.gz', compression='gzip', header=0, sep=' ;', encoding='ISO-8859-2', quotechar='"', engine='python')


In [None]:
seizure_channels_dataset_train, seizure_channels_dataset_val, seizure_onset_dataset_train, seizure_onset_dataset_val = construct_channel_recognition_dataset(results_propagation_total, 50, batch_size=128, data_aug=False)

In [None]:
# Define the model
model_seizure_channel = Wavenet(input_dim=1, output_dim=2, lr=0.001)

# Train the model
train_loss, val_loss, val_accuracy = train_using_optimizer(model_seizure_channel, seizure_channels_dataset_train, seizure_channels_dataset_val, epochs=200, checkpoint_freq=20)

In [None]:
# Plot the training and validation loss

x_ticks = range(0, 200, 20)

plt.plot(train_loss, label='Train')
plt.plot(x_ticks, val_loss, label='Validation')
# Twin the y-axis for accuracy of validation
plt.twinx()
plt.plot(x_ticks, val_accuracy, label='Validation Accuracy', color='red')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.savefig('result/loss_seizure_channels.png')

plt.show()

In [None]:
predicted_labels_total = []

time_lengths = [10, 20, 30, 40, 50, 60]

for i in range(len(results_propagation_total)):

    sample_seizure = results_propagation_total[i]['smoothed_probabilities'][50:300, :]
    
    # Feed the data to the model and get predictions
    sample_seizure = sample_seizure.T
    sample_seizure = np.expand_dims(sample_seizure, axis=1)
    
    # Convert to tensor
    sample_seizure = torch.tensor(sample_seizure, dtype=torch.float32)
    
    # Get the predictions
    predictions = model_seizure_channel(sample_seizure)
    
    # Get the predicted labels, where predicted_labels = 1 when chance is more than 80%
    predicted_labels = predictions.detach().to('cpu').numpy()
    
    predicted_labels = predicted_labels[:, 1] > 0.5
    
    predicted_labels_total.append(predicted_labels)

In [None]:
# Convert the channel from results_propagation to the y_true as 1s and 0s
gound_truth_total = []
for i in range(len(results_propagation_total)):
    y_true = np.zeros(results_propagation_total[i]['smoothed_probabilities'].shape[1])
    y_true[results_propagation_total[i]['true_seizure_channels']] = 1
    gound_truth_total.append(y_true)
    
# Convert the gound_truth_total to a single array
gound_truth_total = np.concatenate(gound_truth_total)

# Convert the predicted_labels_total to a single array
predicted_labels_total = np.concatenate(predicted_labels_total)

In [None]:
# Plot the confusion matrix
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix

conf_matrix = confusion_matrix(gound_truth_total.flatten(), predicted_labels_total.flatten())
sns.heatmap(conf_matrix, annot=True, fmt='d')
plt.xlabel('Predicted')
plt.ylabel('True')
plt.savefig('result/confusion_matrix_seizure_channels.png')

plt.show()


In [None]:
# Calculate the accuracy, precision, recall, and F1 score
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

accuracy = accuracy_score(gound_truth_total.flatten(), predicted_labels_total.flatten())
precision = precision_score(gound_truth_total.flatten(), predicted_labels_total.flatten())
recall = recall_score(gound_truth_total.flatten(), predicted_labels_total.flatten())
f1 = f1_score(gound_truth_total.flatten(), predicted_labels_total.flatten())

print(f'Accuracy: {accuracy}')
print(f'Precision: {precision}')
print(f'Recall: {recall}')
print(f'F1: {f1}')