# **Copy Data from Drive to Colab Hard**

In [None]:
!cp /content/drive/MyDrive/Masoudi/dataset.zip .

In [None]:
!unzip -qq dataset.zip

In [2]:
!cp /content/drive/MyDrive/Masoudi/4.zip .
!unzip -qq /content/4.zip

In [3]:
!cp /content/drive/MyDrive/Masoudi/empty.zip /content

In [4]:
!unzip -qq empty.zip -d  /content/example/test_data

In [5]:
!unzip -qq empty.zip -d  /content/example/training_data

# **util: Helper Code**

In [6]:
#!/usr/bin/env python

# Do *not* edit this script.
# These are helper functions that you can use with your code.
# Check the example code to see how to import these functions to your code.

import os, numpy as np, scipy as sp, scipy.io

### Challenge data I/O functions

# Find the folders with data files.
def find_data_folders(root_folder):
    data_folders = list()
    for x in os.listdir(root_folder):
        data_folder = os.path.join(root_folder, x)
        if os.path.isdir(data_folder):
            data_folders.append(x)
    return sorted(data_folders)

def load_challenge_data(data_folder, patient_id):
    # Define file location.
    patient_metadata_file = os.path.join(data_folder, patient_id, patient_id + '.txt')
    recording_metadata_file = os.path.join(data_folder, patient_id, patient_id + '.tsv')

    # Load non-recording data.
    patient_metadata = load_text_file(patient_metadata_file)
    recording_metadata = load_text_file(recording_metadata_file)

    # Load recordings.
    recordings = list()
    recording_ids = get_recording_ids(recording_metadata)
    for recording_id in recording_ids:
        if recording_id != 'nan':
            recording_location = os.path.join(data_folder, patient_id, recording_id)
            recording_data, sampling_frequency, channels = load_recording(recording_location)
        else:
            recording_data = None
            sampling_frequency = None
            channels = None
        recordings.append((recording_data, sampling_frequency, channels))

    return patient_metadata, recording_metadata, recordings

# Load the WFDB data for the Challenge (but not all possible WFDB files).
def load_recording(record_name):
    # Allow either the record name or the header filename.
    root, ext = os.path.splitext(record_name)
    if ext=='':
        header_file = record_name + '.hea'
    else:
        header_file = record_name

    # Load the header file.
    if not os.path.isfile(header_file):
        raise FileNotFoundError('{} recording not found.'.format(record_name))

    with open(header_file, 'r') as f:
        header = [l.strip() for l in f.readlines() if l.strip()]

    # Parse the header file.
    record_name = None
    num_signals = None
    sampling_frequency = None
    num_samples = None
    signal_files = list()
    gains = list()
    offsets = list()
    channels = list()
    initial_values = list()
    checksums = list()

    for i, l in enumerate(header):
        arrs = [arr.strip() for arr in l.split(' ')]
        # Parse the record line.
        if i==0:
            record_name = arrs[0]
            num_signals = int(arrs[1])
            sampling_frequency = float(arrs[2])
            num_samples = int(arrs[3])
        # Parse the signal specification lines.
        else:
            signal_file = arrs[0]
            gain = float(arrs[2].split('/')[0])
            offset = int(arrs[4])
            initial_value = int(arrs[5])
            checksum = int(arrs[6])
            channel = arrs[8]
            signal_files.append(signal_file)
            gains.append(gain)
            offsets.append(offset)
            initial_values.append(initial_value)
            checksums.append(checksum)
            channels.append(channel)

    # Check that the header file only references one signal file. WFDB format  allows for multiple signal files, but we have not
    # implemented that here for simplicity.
    num_signal_files = len(set(signal_files))
    if num_signal_files!=1:
        raise NotImplementedError('The header file {}'.format(header_file) \
            + ' references {} signal files; one signal file expected.'.format(num_signal_files))

    # Load the signal file.
    head, tail = os.path.split(header_file)
    signal_file = os.path.join(head, list(signal_files)[0])
    data = np.asarray(sp.io.loadmat(signal_file)['val'])

    # Check that the dimensions of the signal data in the signal file is consistent with the dimensions for the signal data given
    # in the header file.
    num_channels = len(channels)
    if np.shape(data)!=(num_channels, num_samples):
        raise ValueError('The header file {}'.format(header_file) \
            + ' is inconsistent with the dimensions of the signal file.')

    # Check that the initial value and checksums for the signal data in the signal file are consistent with the initial value and
    # checksums for the signal data given in the header file.
    for i in range(num_channels):
        if data[i, 0]!=initial_values[i]:
            raise ValueError('The initial value in header file {}'.format(header_file) \
                + ' is inconsistent with the initial value for channel'.format(channels[i]))
        if np.sum(data[i, :])!=checksums[i]:
            raise ValueError('The checksum in header file {}'.format(header_file) \
                + ' is inconsistent with the initial value for channel'.format(channels[i]))

    # Rescale the signal data using the ADC gains and ADC offsets.
    rescaled_data = np.zeros(np.shape(data), dtype=np.float32)
    for i in range(num_channels):
        rescaled_data[i, :] = (data[i, :]-offsets[i])/gains[i]

    return rescaled_data, sampling_frequency, channels

# Reorder/reselect the channels.
def reorder_recording_channels(current_data, current_channels, reordered_channels):
    if current_channels == reordered_channels:
        return current_data
    else:
        indices = list()
        for channel in reordered_channels:
            if channel in current_channels:
                i = current_channels.index(channel)
                indices.append(i)
        num_channels = len(reordered_channels)
        num_samples = np.shape(current_data)[1]
        reordered_data = np.zeros((num_channels, num_samples))
        reordered_data[:, :] = current_data[indices, :]
        return reordered_data

### Helper Challenge data I/O functions

# Load text file as a string.
def load_text_file(filename):
    with open(filename, 'r') as f:
        data = f.read()
    return data

# Get a variable from the patient metadata.
def get_variable(text, variable_name, variable_type, preserve_nan=True):
    variable = None
    for l in text.split('\n'):
        if l.startswith(variable_name):
            variable = l.split(':')[1].strip()
            if preserve_nan and variable.lower() == 'nan':
                variable = float('nan')
            else:
                variable = variable_type(variable)
            return variable

# Get a column from the recording metadata.
def get_column(string, column, variable_type, sep='\t'):
    variables = list()
    for i, l in enumerate(string.split('\n')):
        arrs = [arr.strip() for arr in l.split(sep) if arr.strip()]
        if i==0:
            column_index = arrs.index(column)
        elif arrs:
            variable = arrs[column_index]
            variable = variable_type(variable)
            variables.append(variable)
    return np.asarray(variables)

# Get the patient ID variable from the patient data.
def get_patient_id(string):
    return get_variable(string, 'Patient', str)

# Get the age variable (in years) from the patient data.
def get_age(string):
    return get_variable(string, 'Age', int)

# Get the sex variable from the patient data.
def get_sex(string):
    return get_variable(string, 'Sex', str)

# Get the ROSC variable (in minutes) from the patient data.
def get_rosc(string):
    return get_variable(string, 'ROSC', int)

# Get the OHCA variable from the patient data.
def get_ohca(string):
    return get_variable(string, 'OHCA', bool)

# Get the VFib variable from the patient data.
def get_vfib(string):
    return get_variable(string, 'VFib', bool)

# Get the TTM variable (in Celsius) from the patient data.
def get_ttm(string):
    return get_variable(string, 'TTM', int)

# Get the Outcome variable from the patient data.
def get_outcome(string):
    variable = get_variable(string, 'Outcome', str)
    if variable is None or is_nan(variable):
        raise ValueError('No outcome available. Is your code trying to load labels from the hidden data?')
    if variable == 'Good':
        variable = 0
    elif variable == 'Poor':
        variable = 1
    return variable

# Get the Outcome probability variable from the patient data.
def get_outcome_probability(string):
    variable = sanitize_scalar_value(get_variable(string, 'Outcome probability', str))
    if variable is None or is_nan(variable):
        raise ValueError('No outcome available. Is your code trying to load labels from the hidden data?')
    return variable

# Get the CPC variable from the patient data.
def get_cpc(string):
    variable = sanitize_scalar_value(get_variable(string, 'CPC', str))
    if variable is None or is_nan(variable):
        raise ValueError('No CPC score available. Is your code trying to load labels from the hidden data?')
    return variable

# Get the hour number column from the patient data.
def get_hours(string):
    return get_column(string, 'Hour', int)

# Get the time column from the patient data.
def get_times(string):
    return get_column(string, 'Time', str)

# Get the quality score column from the patient data.
def get_quality_scores(string):
    return get_column(string, 'Quality', float)

# Get the recording IDs column from the patient data.
def get_recording_ids(string):
    return get_column(string, 'Record', str)

### Challenge label and output I/O functions

# Load the Challenge labels for one file.
def load_challenge_label(string):
    if os.path.isfile(string):
        string = load_text_file(string)

    outcome = get_outcome(string)
    cpc = get_cpc(string)

    return outcome, cpc

# Load the Challenge labels for all of the files in a folder.
def load_challenge_labels(folder):
    patient_folders = find_data_folders(folder)
    num_patients = len(patient_folders)

    patient_ids = list()
    outcomes = np.zeros(num_patients, dtype=np.bool_)
    cpcs = np.zeros(num_patients, dtype=np.float64)

    for i in range(num_patients):
        patient_metadata_file = os.path.join(folder, patient_folders[i], patient_folders[i] + '.txt')
        patient_metadata = load_text_file(patient_metadata_file)

        patient_ids.append(get_patient_id(patient_metadata))
        outcomes[i] = get_outcome(patient_metadata)
        cpcs[i] = get_cpc(patient_metadata)

    return patient_ids, outcomes, cpcs

# Save the Challenge outputs for one file.
def save_challenge_outputs(filename, patient_id, outcome, outcome_probability, cpc):
    # Sanitize values, e.g., in case they are a singleton array.
    outcome = sanitize_boolean_value(outcome)
    outcome_probability = sanitize_scalar_value(outcome_probability)
    cpc = sanitize_scalar_value(cpc)

    # Format Challenge outputs.
    patient_string = 'Patient: {}'.format(patient_id)
    if outcome == 0:
        outcome = 'Good'
    elif outcome == 1:
        outcome = 'Poor'
    outcome_string = 'Outcome: {}'.format(outcome)
    outcome_probability_string = 'Outcome probability: {:.3f}'.format(float(outcome_probability))
    cpc_string = 'CPC: {:.3f}'.format(int(float(cpc)) if is_integer(cpc) else float(cpc))
    output_string = patient_string + '\n' + \
        outcome_string + '\n' + outcome_probability_string + '\n' + cpc_string + '\n'

    # Write the Challenge outputs.
    if filename is not None:
        with open(filename, 'w') as f:
            f.write(output_string)

    return output_string

# Load the Challenge outputs for one file.
def load_challenge_output(string):
    if os.path.isfile(string):
        string = load_text_file(string)

    patient_id = get_patient_id(string)
    outcome = get_outcome(string)
    outcome_probability = get_outcome_probability(string)
    cpc = get_cpc(string)

    return patient_id, outcome, outcome_probability, cpc

# Load the Challenge outputs for all of the files in folder.
def load_challenge_outputs(folder, patient_ids):
    num_patients = len(patient_ids)
    outcomes = np.zeros(num_patients, dtype=np.bool_)
    outcome_probabilities = np.zeros(num_patients, dtype=np.float64)
    cpcs = np.zeros(num_patients, dtype=np.float64)

    for i in range(num_patients):
        output_file = os.path.join(folder, patient_ids[i], patient_ids[i] + '.txt')
        patient_id, outcome, outcome_probability, cpc = load_challenge_output(output_file)
        outcomes[i] = outcome
        outcome_probabilities[i] = outcome_probability
        cpcs[i] = cpc

    return outcomes, outcome_probabilities, cpcs

### Other helper functions

# Check if a variable is a number or represents a number.
def is_number(x):
    try:
        float(x)
        return True
    except (ValueError, TypeError):
        return False

# Check if a variable is an integer or represents an integer.
def is_integer(x):
    if is_number(x):
        return float(x).is_integer()
    else:
        return False

# Check if a variable is a finite number or represents a finite number.
def is_finite_number(x):
    if is_number(x):
        return np.isfinite(float(x))
    else:
        return False

# Check if a variable is a NaN (not a number) or represents a NaN.
def is_nan(x):
    if is_number(x):
        return np.isnan(float(x))
    else:
        return False

# Remove any quotes, brackets (for singleton arrays), and/or invisible characters.
def remove_extra_characters(x):
    return str(x).replace('"', '').replace("'", "").replace('[', '').replace(']', '').replace(' ', '').strip()

# Sanitize boolean values, e.g., from the Challenge outputs.
def sanitize_boolean_value(x):
    x = remove_extra_characters(x)
    if (is_finite_number(x) and float(x)==0) or (x in ('False', 'false', 'F', 'f')):
        return 0
    elif (is_finite_number(x) and float(x)==1) or (x in ('True', 'true', 'T', 't')):
        return 1
    else:
        return float('nan')

# Santize scalar values, e.g., from the Challenge outputs.
def sanitize_scalar_value(x):
    x = remove_extra_characters(x)
    if is_number(x):
        return float(x)
    else:
        return float('nan')

# **util: getFeatures function**

In [7]:
!pip install -q mne

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/7.6 MB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.3/7.6 MB[0m [31m9.6 MB/s[0m eta [36m0:00:01[0m[2K     [91m━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/7.6 MB[0m [31m29.1 MB/s[0m eta [36m0:00:01[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━[0m [32m6.2/7.6 MB[0m [31m57.9 MB/s[0m eta [36m0:00:01[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m7.6/7.6 MB[0m [31m63.2 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.6/7.6 MB[0m [31m42.6 MB/s[0m eta [36m0:00:00[0m
[?25h

In [8]:
import mne
from sklearn.preprocessing import StandardScaler
def get_features(patient_metadata, recording_metadata, recording_data):
    # Extract features from the patient metadata.
    age = get_age(patient_metadata)
    sex = get_sex(patient_metadata)
    rosc = get_rosc(patient_metadata)
    ohca = get_ohca(patient_metadata)
    vfib = get_vfib(patient_metadata)
    ttm = get_ttm(patient_metadata)

    # Use one-hot encoding for sex; add more variables
    sex_features = np.zeros(2, dtype=int)
    if sex == 'Female':
        female = 1
        male   = 0
        other  = 0
    elif sex == 'Male':
        female = 0
        male   = 1
        other  = 0
    else:
        female = 0
        male   = 0
        other  = 1

    # Combine the patient features.
    patient_features = np.array([age, female, male, other, rosc, ohca, vfib, ttm])

    # Extract features from the recording data and metadata.
    channels = ['Fp1-F7', 'F7-T3', 'T3-T5', 'T5-O1', 'Fp2-F8', 'F8-T4', 'T4-T6', 'T6-O2', 'Fp1-F3',
                'F3-C3', 'C3-P3', 'P3-O1', 'Fp2-F4', 'F4-C4', 'C4-P4', 'P4-O2', 'Fz-Cz', 'Cz-Pz']
    num_channels = len(channels)
    num_recordings = len(recording_data)
    print('LENGTH OF RECORDED DATASET', num_recordings)

    # Compute mean and standard deviation for each channel for each recording.
    available_signal_data = list()
    for i in range(num_recordings):
        signal_data, sampling_frequency, signal_channels = recording_data[i]
        if signal_data is not None:
            signal_data = reorder_recording_channels(signal_data, signal_channels, channels) # Reorder the channels in the signal data, as needed, for consistency across different recordings.
            available_signal_data.append(signal_data)

    if len(available_signal_data) > 0:
        available_signal_data = np.hstack(available_signal_data)
        signal_mean = np.nanmean(available_signal_data, axis=1)
        signal_std  = np.nanstd(available_signal_data, axis=1)
    else:
        signal_mean = float('nan') * np.ones(num_channels)
        signal_std  = float('nan') * np.ones(num_channels)

    # Compute the power spectral density for the delta, theta, alpha, and beta frequency bands for each channel of the most
    # recent recording.
    index = None
    for i in reversed(range(num_recordings)):
        signal_data, sampling_frequency, signal_channels = recording_data[i]
        if signal_data is not None:
            index = i
            break

    if index is not None:
        signal_data, sampling_frequency, signal_channels = recording_data[index]
        signal_data = reorder_recording_channels(signal_data, signal_channels, channels) # Reorder the channels in the signal data, as needed, for consistency across different recordings.

        delta_psd, _ = mne.time_frequency.psd_array_welch(signal_data, sfreq=sampling_frequency,  fmin=0.5,  fmax=8.0, verbose=False)
        theta_psd, _ = mne.time_frequency.psd_array_welch(signal_data, sfreq=sampling_frequency,  fmin=4.0,  fmax=8.0, verbose=False)
        alpha_psd, _ = mne.time_frequency.psd_array_welch(signal_data, sfreq=sampling_frequency,  fmin=8.0, fmax=12.0, verbose=False)
        beta_psd,  _ = mne.time_frequency.psd_array_welch(signal_data, sfreq=sampling_frequency, fmin=12.0, fmax=30.0, verbose=False)

        delta_psd_mean = np.nanmean(delta_psd, axis=1)
        theta_psd_mean = np.nanmean(theta_psd, axis=1)
        alpha_psd_mean = np.nanmean(alpha_psd, axis=1)
        beta_psd_mean  = np.nanmean(beta_psd,  axis=1)

        quality_score = get_quality_scores(recording_metadata)[index]
    else:
        delta_psd_mean = theta_psd_mean = alpha_psd_mean = beta_psd_mean = float('nan') * np.ones(num_channels)
        quality_score = float('nan')

    recording_features = np.hstack((signal_mean, signal_std, delta_psd_mean, theta_psd_mean, alpha_psd_mean, beta_psd_mean, quality_score))

    # Combine the features from the patient metadata and the recording data and metadata.
    features = np.hstack((patient_features, recording_features))

    return features

# **Generate STFT**

In [None]:
!rm -r /content/example/training_data

In [None]:
!cp -r /content/i-care-international-cardiac-arrest-research-consortium-database-1.0/training /content/example

In [None]:
# rename training to training_data

In [None]:
%cd /content/example

/content/example


In [9]:
data_folder = "/content/example/training_data"
model_folder = "/content/example/model"

In [10]:
os.makedirs(model_folder, exist_ok=True)

In [11]:
training = os.listdir(data_folder)
sub_root_mat = []
for x in training:
  p = os.path.join(data_folder,x)
  p1 = os.listdir(p)
  for y in p1:
    z = os.path.join(p,y)
    if z.endswith('.mat'):
      sub_root_mat.append(z) 

In [12]:
from scipy.io import loadmat
EEG_data = []
EEG_label = []
for x in sub_root_mat:
  a1 = x.split('/')
  folder_id = a1[-2]
  patient_metadata, recording_metadata, recording_data = load_challenge_data(data_folder,folder_id)
  current_outcome = get_outcome(patient_metadata)
  EEG_label.append(current_outcome)
  EEG_data.append(loadmat(x)['val'])

In [13]:
EEG_data = np.array(EEG_data)
EEG_label = np.array(EEG_label)

In [14]:
from keras.utils import to_categorical
from sklearn.preprocessing import OneHotEncoder


enc = OneHotEncoder(sparse_output=False)
target_total1 = enc.fit_transform(EEG_label.reshape(-1,1))
target_total1 = np.array(target_total1)
print(target_total1.shape)

(529, 2)


In [15]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(EEG_data,target_total1, test_size=0.2, random_state=21)
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

(423, 18, 30000) (106, 18, 30000) (423, 2) (106, 2)


# **Model**

In [16]:
from tensorflow import keras
from keras.callbacks import CSVLogger, ModelCheckpoint, ReduceLROnPlateau, EarlyStopping

def _inception_module( input_tensor, stride=1, activation='linear'):

        if use_bottleneck and int(input_tensor.shape[-1]) > 1:
            input_inception = keras.layers.Conv1D(filters=bottleneck_size, kernel_size=1,
                                                  padding='same', activation=activation, use_bias=False)(input_tensor)
        else:
            input_inception = input_tensor

        kernel_size_s = [kernel_size // (2 ** i) for i in range(3)]

        conv_list = []

        for i in range(len(kernel_size_s)):
            conv_list.append(keras.layers.Conv1D(filters=nb_filters, kernel_size=kernel_size_s[i],
                                                 strides=stride, padding='same', activation=activation, use_bias=False)(
                input_inception))

        max_pool_1 = keras.layers.MaxPool1D(pool_size=3, strides=stride, padding='same')(input_tensor)

        conv_6 = keras.layers.Conv1D(filters=nb_filters, kernel_size=1,
                                     padding='same', activation=activation, use_bias=False)(max_pool_1)

        conv_list.append(conv_6)

        x = keras.layers.Concatenate(axis=2)(conv_list)
        x = keras.layers.BatchNormalization()(x)
        x = keras.layers.Activation(activation='relu')(x)
        return x

def _shortcut_layer( input_tensor, out_tensor):
      shortcut_y = keras.layers.Conv1D(filters=int(out_tensor.shape[-1]), kernel_size=1,
                                         padding='same', use_bias=False)(input_tensor)
      shortcut_y = keras.layers.BatchNormalization()(shortcut_y)

      x = keras.layers.Add()([shortcut_y, out_tensor])
      x = keras.layers.Activation('relu')(x)
      return x

def build_model( input_shape, nb_classes):
      input_layer = keras.layers.Input(input_shape)

      x = input_layer
      input_res = input_layer

      for d in range(depth):

          x = _inception_module(x)

          if use_residual and d % 3 == 2:
              x = _shortcut_layer(input_res, x)
              input_res = x

      gap_layer = keras.layers.GlobalAveragePooling1D()(x)

      output_layer = keras.layers.Dense(nb_classes, activation='softmax')(gap_layer)

      model = keras.models.Model(inputs=input_layer, outputs=output_layer)

      model.compile(loss='categorical_crossentropy', optimizer='adam',
                      metrics=['accuracy'])



      return model



input_shape = [18, 30000]             #1 channel and 12-hours records
nb_classes = 2                        #MDD of Healthy
nb_filters = 64
verbose= True
use_residual = True
use_bottleneck = True
depth = 6
kernel_size = 41 - 1
batch_size = 32
mini_batch_size = 32
bottleneck_size = 64
nb_epochs = 1500


reduce_lr = ReduceLROnPlateau(monitor='val_accuracy', patience=30, factor=0.25, mode='max', 
                                      verbose=1, min_lr=0.01)

es = EarlyStopping(monitor='val_accuracy', mode='max', verbose=1, patience=100)


model_checkpoint = keras.callbacks.ModelCheckpoint(filepath=f'{model_folder}/my_model.h5', monitor='loss',save_best_only=True)

callbacks = [reduce_lr, model_checkpoint,es]

model = build_model(input_shape, nb_classes)

hist = model.fit(X_train, y_train, batch_size=mini_batch_size, epochs=nb_epochs,  validation_data=(X_test, y_test), shuffle=True,verbose=verbose, callbacks = callbacks)


Epoch 1/1500
Epoch 2/1500
 1/14 [=>............................] - ETA: 28s - loss: 0.6290 - accuracy: 0.6875

KeyboardInterrupt: ignored

# **Inference**

In [None]:
!rm -r /content/example/test_outputs

In [17]:
#model_folder
data_folder = "/content/example/test_data"
output_folder = "/content/example/test_outputs"
verbose = True
allow_failures = False

In [18]:
################################# edit this
import tensorflow as tf

def load_challenge_models(model_folder, verbose):
    filename = os.path.join(model_folder, 'my_model.h5')
    return tf.keras.models.load_model(filename)

In [None]:
################ start test

In [None]:
# patient_metadata, recording_metadata, recording_data = load_challenge_data(data_folder, "ICARE_0284")

# eeg_data = get_EEG(recording_data)
# total = 0
# for i in range(18):
#   filtered_EEG = butter_bandpass_filter(np.array(eeg_data[i]),lowcut, highcut, fs, order)
#   total += filtered_EEG  
# f, t, Zxx = signal.stft(np.array(total), fs, nperseg=1000)
# amp = np.abs(np.mean(total))
# plt.pcolormesh(t, f, np.abs(Zxx), vmin=0, vmax=amp, shading='gouraud')
# img_path = f'{patient_id}.png'
# plt.savefig(img_path)

# img = cv2.imread(img_path)
# img = cv2.resize(img, (IMAGE_HEIGHT, IMAGE_WIDTH))
# os.remove(img_path)

In [None]:
# img.shape

TensorShape([1, 256, 256, 3])

In [None]:
# img = tf.expand_dims(img, 0)

In [None]:
# result = classifier.predict(img)



In [None]:
# result

array([[0.9880442 , 0.01195578]], dtype=float32)

In [None]:
# outcome = np.argmax(result)

0

In [None]:
# good_prob = result[0][0]

In [None]:
# outcome_probability = result[0][1]

In [None]:
# good_prob = 0.658588

In [None]:
# if good_prob >=0.8:
#     cpc = 1
# elif good_prob >=0.6:
#     cpc = 2
# elif good_prob >=0.4:
#     cpc = 3
# elif good_prob >=0.2:
#     cpc = 4
# else: 
#     cpc = 5

In [None]:
################ end test

In [46]:
models = load_challenge_models(model_folder, verbose)

In [45]:
sub_root_mat = []
p = os.path.join(data_folder,"ICARE_0284")
p1 = os.listdir(p)
for y in p1:
  z = os.path.join(p,y)
  if z.endswith('.mat'):
    sub_root_mat.append(z) 

In [56]:
results = []
for x in sub_root_mat:
    a = loadmat(x)['val']
    a = tf.expand_dims(a, 0)
    result = models.predict(a, verbose=0)
    outcome = np.argmax(result)
    results.append(outcome)

In [70]:
results = np.array(results)

In [67]:
total = results.size

In [64]:
good_count = np.sum(results == 0)
poor_count = np.sum(results == 1)

In [65]:
outcome = 0 if good_count >= poor_count else 1

In [69]:
good_prob = float(good_count / total)
outcome_probability = float(poor_count / total)

In [54]:
a =  loadmat(sub_root_mat[0])['val']

In [50]:
a = tf.expand_dims(a, 0)

In [51]:
result = models.predict(a)



In [52]:
result

array([[0.67552423, 0.32447574]], dtype=float32)

In [53]:
outcome = np.argmax(result)

In [None]:


good_prob = result[0][0]
outcome_probability = result[0][1]

In [19]:
################################# edit this
def run_challenge_models(models, data_folder, patient_id, verbose):


    sub_root_mat = []
    p = os.path.join(data_folder,patient_id)
    p1 = os.listdir(p)
    for y in p1:
      z = os.path.join(p,y)
      if z.endswith('.mat'):
        sub_root_mat.append(z) 
    
    results = []
    for x in sub_root_mat:
        a = loadmat(x)['val']
        a = tf.expand_dims(a, 0)
        result = models.predict(a, verbose=0)
        outcome = np.argmax(result)
        results.append(outcome)
        
    results = np.array(results)

    total = results.size

    if total > 0:

        good_count = np.sum(results == 0)
        poor_count = np.sum(results == 1)

        outcome = 0 if good_count >= poor_count else 1

        good_prob = float(good_count / total)
        outcome_probability = float(poor_count / total)

        if good_prob >=0.8:
            cpc = 1
        elif good_prob >=0.6:
            cpc = 2
        elif good_prob >=0.4:
            cpc = 3
        elif good_prob >=0.2:
            cpc = 4
        else: 
            cpc = 5

        cpc = np.clip(cpc, 1, 5)

        return outcome, outcome_probability, cpc
    else:
        return float(0), float(0.5), float(2.5)

In [20]:
models = load_challenge_models(model_folder, verbose)

In [23]:
patient_ids = find_data_folders(data_folder)
num_patients = len(patient_ids)

In [21]:
os.makedirs(output_folder, exist_ok=True)

In [24]:
# Iterate over the patients.
for i in range(num_patients):
    if verbose >= 2:
        print('    {}/{}...'.format(i+1, num_patients))

    patient_id = patient_ids[i]

    # Allow or disallow the models to fail on parts of the data; this can be helpful for debugging.
    try:
        outcome_binary, outcome_probability, cpc = run_challenge_models(models, data_folder, patient_id, verbose) ### Teams: Implement this function!!!
    except:
        if allow_failures:
            if verbose >= 2:
                print('... failed.')
            outcome_binary, outcome_probability, cpc = float('nan'), float('nan'), float('nan')
        else:
            raise

    # Save Challenge outputs.

    # Create a folder for the Challenge outputs if it does not already exist.
    os.makedirs(os.path.join(output_folder, patient_id), exist_ok=True)
    output_file = os.path.join(output_folder, patient_id, patient_id + '.txt')
    save_challenge_outputs(output_file, patient_id, outcome_binary, outcome_probability, cpc)

# **Evaluate Helper**

In [25]:
def evaluate_model(label_folder, output_folder):
    # Load labels and model outputs.
    patient_ids, label_outcomes, label_cpcs = load_challenge_labels(label_folder)
    output_outcomes, output_outcome_probabilities, output_cpcs = load_challenge_outputs(output_folder, patient_ids)

    # Evaluate the models.
    challenge_score = compute_challenge_score(label_outcomes, output_outcome_probabilities)
    auroc_outcomes, auprc_outcomes = compute_auc(label_outcomes, output_outcome_probabilities)
    accuracy_outcomes, _, _ = compute_accuracy(label_outcomes, output_outcomes)
    f_measure_outcomes, _, _ = compute_f_measure(label_outcomes, output_outcomes)

    mse_cpcs = compute_mse(label_cpcs, output_cpcs)
    mae_cpcs = compute_mae(label_cpcs, output_cpcs)

    # Return the results.
    return challenge_score, auroc_outcomes, auprc_outcomes, accuracy_outcomes, f_measure_outcomes, mse_cpcs, mae_cpcs

# Compute the Challenge score.
def compute_challenge_score(labels, outputs):
    assert len(labels) == len(outputs)
    num_instances = len(labels)

    # Use the unique output values as the thresholds for the positive and negative classes.
    thresholds = np.unique(outputs)
    thresholds = np.append(thresholds, thresholds[-1]+1)
    thresholds = thresholds[::-1]
    num_thresholds = len(thresholds)

    idx = np.argsort(outputs)[::-1]

    # Initialize the TPs, FPs, FNs, and TNs with no positive outputs.
    tp = np.zeros(num_thresholds)
    fp = np.zeros(num_thresholds)
    fn = np.zeros(num_thresholds)
    tn = np.zeros(num_thresholds)

    tp[0] = 0
    fp[0] = 0
    fn[0] = np.sum(labels == 1)
    tn[0] = np.sum(labels == 0)

    # Update the TPs, FPs, FNs, and TNs using the values at the previous threshold.
    i = 0
    for j in range(1, num_thresholds):
        tp[j] = tp[j-1]
        fp[j] = fp[j-1]
        fn[j] = fn[j-1]
        tn[j] = tn[j-1]

        while i < num_instances and outputs[idx[i]] >= thresholds[j]:
            if labels[idx[i]]:
                tp[j] += 1
                fn[j] -= 1
            else:
                fp[j] += 1
                tn[j] -= 1
            i += 1

    # Compute the TPRs and FPRs.
    tpr = np.zeros(num_thresholds)
    fpr = np.zeros(num_thresholds)
    for j in range(num_thresholds):
        if tp[j] + fn[j] > 0:
            tpr[j] = float(tp[j]) / float(tp[j] + fn[j])
            fpr[j] = float(fp[j]) / float(fp[j] + tn[j])
        else:
            tpr[j] = float('nan')
            fpr[j] = float('nan')

    # Find the largest TPR such that FPR <= 0.05.
    max_fpr = 0.05
    max_tpr = float('nan')
    if np.any(fpr <= max_fpr):
        indices = np.where(fpr <= max_fpr)
        max_tpr = np.max(tpr[indices])

    return max_tpr

# Compute area under the receiver operating characteristic curve (AUROC) and area under the precision recall curve (AUPRC).
def compute_auc(labels, outputs):
    assert len(labels) == len(outputs)
    num_instances = len(labels)

    # Use the unique output values as the thresholds for the positive and negative classes.
    thresholds = np.unique(outputs)
    thresholds = np.append(thresholds, thresholds[-1]+1)
    thresholds = thresholds[::-1]
    num_thresholds = len(thresholds)

    idx = np.argsort(outputs)[::-1]

    # Initialize the TPs, FPs, FNs, and TNs with no positive outputs.
    tp = np.zeros(num_thresholds)
    fp = np.zeros(num_thresholds)
    fn = np.zeros(num_thresholds)
    tn = np.zeros(num_thresholds)

    tp[0] = 0
    fp[0] = 0
    fn[0] = np.sum(labels == 1)
    tn[0] = np.sum(labels == 0)

    # Update the TPs, FPs, FNs, and TNs using the values at the previous threshold.
    i = 0
    for j in range(1, num_thresholds):
        tp[j] = tp[j-1]
        fp[j] = fp[j-1]
        fn[j] = fn[j-1]
        tn[j] = tn[j-1]

        while i < num_instances and outputs[idx[i]] >= thresholds[j]:
            if labels[idx[i]]:
                tp[j] += 1
                fn[j] -= 1
            else:
                fp[j] += 1
                tn[j] -= 1
            i += 1

    # Compute the TPRs, TNRs, and PPVs at each threshold.
    tpr = np.zeros(num_thresholds)
    tnr = np.zeros(num_thresholds)
    ppv = np.zeros(num_thresholds)
    for j in range(num_thresholds):
        if tp[j] + fn[j]:
            tpr[j] = float(tp[j]) / float(tp[j] + fn[j])
        else:
            tpr[j] = float('nan')
        if fp[j] + tn[j]:
            tnr[j] = float(tn[j]) / float(fp[j] + tn[j])
        else:
            tnr[j] = float('nan')
        if tp[j] + fp[j]:
            ppv[j] = float(tp[j]) / float(tp[j] + fp[j])
        else:
            ppv[j] = float('nan')

    # Compute AUROC as the area under a piecewise linear function with TPR/sensitivity (x-axis) and TNR/specificity (y-axis) and
    # AUPRC as the area under a piecewise constant with TPR/recall (x-axis) and PPV/precision (y-axis).
    auroc = 0.0
    auprc = 0.0
    for j in range(num_thresholds-1):
        auroc += 0.5 * (tpr[j+1] - tpr[j]) * (tnr[j+1] + tnr[j])
        auprc += (tpr[j+1] - tpr[j]) * ppv[j+1]

    return auroc, auprc

# Construct the one-hot encoding of data for the given classes.
def compute_one_hot_encoding(data, classes):
    num_instances = len(data)
    num_classes = len(classes)

    one_hot_encoding = np.zeros((num_instances, num_classes), dtype=np.bool_)
    unencoded_data = list()
    for i, x in enumerate(data):
        for j, y in enumerate(classes):
            if (x == y) or (is_nan(x) and is_nan(y)):
                one_hot_encoding[i, j] = 1

    return one_hot_encoding

# Compute the binary confusion matrix, where the columns are the expert labels and the rows are the classifier labels for the given
# classes.
def compute_confusion_matrix(labels, outputs, classes):
    assert np.shape(labels) == np.shape(outputs)

    num_instances = len(labels)
    num_classes = len(classes)

    A = np.zeros((num_classes, num_classes))
    for k in range(num_instances):
        for i in range(num_classes):
            for j in range(num_classes):
                if outputs[k, i] == 1 and labels[k, j] == 1:
                    A[i, j] += 1

    return A

# Construct the binary one-vs-rest confusion matrices, where the columns are the expert labels and the rows are the classifier
# for the given classes.
def compute_one_vs_rest_confusion_matrix(labels, outputs, classes):
    assert np.shape(labels) == np.shape(outputs)

    num_instances = len(labels)
    num_classes = len(classes)

    A = np.zeros((num_classes, 2, 2))
    for i in range(num_instances):
        for j in range(num_classes):
            if labels[i, j] == 1 and outputs[i, j] == 1: # TP
                A[j, 0, 0] += 1
            elif labels[i, j] == 0 and outputs[i, j] == 1: # FP
                A[j, 0, 1] += 1
            elif labels[i, j] == 1 and outputs[i, j] == 0: # FN
                A[j, 1, 0] += 1
            elif labels[i, j] == 0 and outputs[i, j] == 0: # TN
                A[j, 1, 1] += 1

    return A

# Compute accuracy.
def compute_accuracy(labels, outputs):
    # Compute the confusion matrix.
    classes = np.unique(np.concatenate((labels, outputs)))
    labels = compute_one_hot_encoding(labels, classes)
    outputs = compute_one_hot_encoding(outputs, classes)
    A = compute_confusion_matrix(labels, outputs, classes)

    # Compute accuracy.
    if np.sum(A) > 0:
        accuracy = np.trace(A) / np.sum(A)
    else:
        accuracy = float('nan')

    # Compute per-class accuracy.
    num_classes = len(classes)
    per_class_accuracy = np.zeros(num_classes)
    for i in range(num_classes):
        if np.sum(labels[:, i]) > 0:
            per_class_accuracy[i] = A[i, i] / np.sum(A[:, i])
        else:
            per_class_accuracy[i] = float('nan')

    return accuracy, per_class_accuracy, classes

# Compute macro F-measure.
def compute_f_measure(labels, outputs):
    # Compute confusion matrix.
    classes = np.unique(np.concatenate((labels, outputs)))
    labels = compute_one_hot_encoding(labels, classes)
    outputs = compute_one_hot_encoding(outputs, classes)
    A = compute_one_vs_rest_confusion_matrix(labels, outputs, classes)

    num_classes = len(classes)
    per_class_f_measure = np.zeros(num_classes)
    for k in range(num_classes):
        tp, fp, fn, tn = A[k, 0, 0], A[k, 0, 1], A[k, 1, 0], A[k, 1, 1]
        if 2 * tp + fp + fn > 0:
            per_class_f_measure[k] = float(2 * tp) / float(2 * tp + fp + fn)
        else:
            per_class_f_measure[k] = float('nan')

    if np.any(np.isfinite(per_class_f_measure)):
        macro_f_measure = np.nanmean(per_class_f_measure)
    else:
        macro_f_measure = float('nan')

    return macro_f_measure, per_class_f_measure, classes

# Compute mean-squared error.
def compute_mse(labels, outputs):
    assert len(labels) == len(outputs)

    labels = np.asarray(labels, dtype=np.float64)
    outputs = np.asarray(outputs, dtype=np.float64)
    mse = np.mean((labels - outputs)**2)

    return mse

# Compute mean-absolute error.
def compute_mae(labels, outputs):
    assert len(labels) == len(outputs)

    labels = np.asarray(labels, dtype=np.float64)
    outputs = np.asarray(outputs, dtype=np.float64)
    mae = np.mean(np.abs(labels - outputs))

    return mae

# **Evaluate**

In [26]:
test_data = "/content/example/test_data"
test_outputs = "/content/example/test_outputs"

In [27]:
scores = evaluate_model(test_data, test_outputs)

# Unpack the scores.
challenge_score, auroc_outcomes, auprc_outcomes, accuracy_outcomes, f_measure_outcomes, mse_cpcs, mae_cpcs = scores

# Construct a string with scores.
output_string = \
    'Challenge Score: {:.3f}\n'.format(challenge_score) + \
    'Outcome AUROC: {:.3f}\n'.format(auroc_outcomes) + \
    'Outcome AUPRC: {:.3f}\n'.format(auprc_outcomes) + \
    'Outcome Accuracy: {:.3f}\n'.format(accuracy_outcomes) + \
    'Outcome F-measure: {:.3f}\n'.format(f_measure_outcomes) + \
    'CPC MSE: {:.3f}\n'.format(mse_cpcs) + \
    'CPC MAE: {:.3f}\n'.format(mae_cpcs)

print(output_string)


Challenge Score: 0.000
Outcome AUROC: 0.200
Outcome AUPRC: 0.200
Outcome Accuracy: 0.167
Outcome F-measure: 0.143
CPC MSE: 10.708
CPC MAE: 3.083



In [None]:
!rm -r /content/example/test_data/.ipynb_checkpoints/

# **Test For Submission**

In [None]:
!ls

4.zip  aa.zip  drive  example  sample_data


In [None]:
!pip install mne

In [None]:
%cd /content

/content


In [29]:
!unzip /content/inc.zip

Archive:  /content/inc.zip
   creating: python-example-2023-master/
  inflating: python-example-2023-master/Dockerfile  
  inflating: python-example-2023-master/evaluate_model.py  
  inflating: python-example-2023-master/helper_code.py  
  inflating: python-example-2023-master/LICENSE  
  inflating: python-example-2023-master/README.md  
  inflating: python-example-2023-master/remove_data.py  
  inflating: python-example-2023-master/remove_labels.py  
  inflating: python-example-2023-master/requirements.txt  
  inflating: python-example-2023-master/run_model.py  
  inflating: python-example-2023-master/team_code.py  
  inflating: python-example-2023-master/train_model.py  
  inflating: python-example-2023-master/truncate_data.py  


In [30]:
%cd /content/python-example-2023-master

/content/python-example-2023-master


In [31]:
!rm -r /content/example/test_outputs

In [36]:
!python train_model.py /content/example/training_data /content/example/model

Finding the Challenge data...
Extracting features and labels from the Challenge data...
2023-04-24 23:30:36.185704: W tensorflow/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 456840000 exceeds 10% of free system memory.
Epoch 1/1500
Epoch 2/1500
Epoch 3/1500
Epoch 4/1500
  File "/content/python-example-2023-master/train_model.py", line 30, in <module>
    train_challenge_model(data_folder, model_folder, verbose) ### Teams: Implement this function!!!
  File "/content/python-example-2023-master/team_code.py", line 178, in train_challenge_model
    hist = model.fit(X_train, y_train, batch_size=mini_batch_size, epochs=nb_epochs,  validation_data=(X_test, y_test), shuffle=True,verbose=verbose, callbacks = callbacks)
  File "/usr/local/lib/python3.9/dist-packages/keras/utils/traceback_utils.py", line 65, in error_handler
    return fn(*args, **kwargs)
  File "/usr/local/lib/python3.9/dist-packages/keras/engine/training.py", line 1685, in fit
    tmp_logs = self.train_function(iterato

In [37]:
!python run_model.py /content/example/model /content/example/test_data /content/example/test_outputs

Loading the Challenge models...
Finding the Challenge data...
Running the Challenge models on the Challenge data...
Done.


In [38]:
!python evaluate_model.py /content/example/test_data /content/example/test_outputs

Challenge Score: 0.000
Outcome AUROC: 0.400
Outcome AUPRC: 0.250
Outcome Accuracy: 0.333
Outcome F-measure: 0.250
CPC MSE: 5.375
CPC MAE: 2.083



In [None]:
python evaluate_model.py test_data test_outputs