In [1]:
import vitaldb
import random
import numpy as np
import pandas as pd
import scipy.signal
import matplotlib.pyplot as plt

# Constants
SRATE = 128  # Hz
SEGLEN = 4 * SRATE  # 4 seconds in samples
MAX_CASES = 500

# Data Loading
df_trks = pd.read_csv("https://api.vitaldb.net/trks")
df_cases = pd.read_csv("https://api.vitaldb.net/cases")

# Track selection
track_names = ['BIS/EEG1_WAV', 'BIS/BIS']
EEG = 0
BIS = 1

# Filter based on inclusion/exclusion criteria
caseids = set(df_cases.loc[df_cases['age'] > 18, 'caseid'])
caseids &= set(df_trks.loc[df_trks['tname'] == 'BIS/EEG2_WAV', 'caseid'])
caseids &= set(df_trks.loc[df_trks['tname'] == 'BIS/BIS', 'caseid'])
caseids -= set(df_trks.loc[df_trks['tname'] == 'Primus/EXP_DES', 'caseid'])
caseids -= set(df_trks.loc[df_trks['tname'] == 'Orchestra/PPF20_CE', 'caseid'])
caseids -= set(df_trks.loc[df_trks['tname'] == 'Orchestra/RFTN50_CE', 'caseid'])
caseids = list(caseids)

# Data extraction initialization
x = []
y = []
c = []
icase = 0

# **Updated**: Bandpass filter setup (one-time initialization)
# This avoids repeatedly initializing the filter in every iteration of the loop
lowcut = 0.5
highcut = 50.0
order = 4
b, a = scipy.signal.butter(order, [lowcut, highcut], btype='band', fs=SRATE)

# Processing each case
for caseid in caseids:
    print(f'Loading caseid={caseid} ({icase + 1}/{MAX_CASES})...', end='')

    # Load case data
    vals = vitaldb.load_case(caseid, track_names, 1 / SRATE)
    op_st = df_cases[df_cases['caseid'] == caseid]['opstart'].values[0]
    op_end = df_cases[df_cases['caseid'] == caseid]['opend'].values[0]

    # Exclusion Criteria: **Updated** logging to inform about skipped cases
    if not np.any(vals[:, BIS] > 0):
        print('Skipping due to all BIS <= 0')
        continue
    if len(vals) < 1800 * SRATE:
        print('Skipping due to short length')
        continue

    # Fill missing BIS values with forward fill: **Kept as is**
    vals[:, BIS:] = pd.DataFrame(vals[:, BIS:]).ffill(limit=7*SRATE).values

    # Extract and filter segments
    for isamp in range(SEGLEN, len(vals), 2 * SRATE):
        if op_st <= isamp / SRATE <= op_end:
            bis = vals[isamp, BIS]
            if np.isnan(bis) or bis == 0:
                continue
            eeg = vals[isamp - SEGLEN:isamp, EEG]
            eeg_filtered = scipy.signal.filtfilt(b, a, eeg)  # Apply filter
            # **Updated**: Normalize EEG signal to zero mean and unit variance
            eeg_filtered = (eeg_filtered - np.mean(eeg_filtered)) / np.std(eeg_filtered)  # Normalization
            x.append(eeg_filtered)
            y.append(bis)
            c.append(caseid)

    # Print progress: **Kept as is**
    icase += 1
    print(f'{len(y)} segments loaded so far')

    if icase >= MAX_CASES:
        break

# Convert to numpy arrays: **Kept as is**
x = np.array(x)
y = np.array(y)
c = np.array(c)


import joblib

# Assuming x, y, c are already populated as NumPy arrays
data = {'x': x, 'y': y, 'c': c}

# Save the data dictionary to a file
joblib.dump(data, 'data_4_sec_500.pkl')


Loading caseid=2 (1/500)...6450 segments loaded so far
Loading caseid=4 (2/500)...14081 segments loaded so far
Loading caseid=10 (3/500)...22761 segments loaded so far
Loading caseid=12 (4/500)...35306 segments loaded so far
Loading caseid=18 (5/500)...Skipping due to all BIS <= 0
Loading caseid=21 (5/500)...39504 segments loaded so far
Loading caseid=24 (6/500)...41004 segments loaded so far
Loading caseid=25 (7/500)...46990 segments loaded so far
Loading caseid=27 (8/500)...53797 segments loaded so far
Loading caseid=33 (9/500)...55252 segments loaded so far
Loading caseid=39 (10/500)...Skipping due to all BIS <= 0
Loading caseid=43 (10/500)...60763 segments loaded so far
Loading caseid=49 (11/500)...63926 segments loaded so far
Loading caseid=56 (12/500)...76068 segments loaded so far
Loading caseid=58 (13/500)...81465 segments loaded so far
Loading caseid=61 (14/500)...84527 segments loaded so far
Loading caseid=62 (15/500)...87662 segments loaded so far
Loading caseid=64 (16/500).

['data_4_sec_500.pkl']