In [1]:
import os
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"  # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"] = '-1'
import math

import numpy as np
import pandas as pd
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()
from matplotlib import pyplot as plt
plt.ioff()

from sklearn.preprocessing import StandardScaler, RobustScaler, MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, TimeDistributed, BatchNormalization
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.optimizers import Adam

import datetime as dt

In [2]:
path = "C:/Users/OPTIMUSPRIME/Desktop/Studia/Magisterka/Faints-Prediction/"
BP_filename = "BP.csv"
HR_filename = "HR.csv"

train_indices =  [str(indx[0]) for indx in pd.read_csv(path + "DATA/training_set.txt", header=None).values.tolist()]
test_indices =  [str(indx[0]) for indx in pd.read_csv(path + "DATA/test_set.txt", header=None).values.tolist()]
validation_indices =  [str(indx[0]) for indx in pd.read_csv(path + "DATA/validation_set.txt", header=None).values.tolist()]
all_indices = train_indices + test_indices + validation_indices

In [3]:
def shift(xs, n):
    if n == 0:
        return xs
    elif n > 0:
        return np.concatenate((np.full(n, np.nan), xs[:-n]))
    else:
        return np.concatenate((xs[-n:], np.full(-n, np.nan)))

    
def series_to_supervised(share_prices, timestamps, input_time_steps, dropnan=True):
    share_prices_df = pd.DataFrame(share_prices)
    timestamps_df = pd.DataFrame(timestamps)
    share_prices_timeseries = list()
    timestamps_timeseries = list()
    
    for i in range(input_time_steps-1, -1, -1):
        share_prices_timeseries.append(share_prices_df.shift(i))
        timestamps_timeseries.append(timestamps_df.shift(i))
    
    aggregated_share_prices = pd.concat(share_prices_timeseries, axis=1)
    aggregated_timestamps = pd.concat(timestamps_timeseries, axis=1)
    
    if dropnan:
        aggregated_share_prices.dropna(inplace=True)
        aggregated_timestamps.dropna(inplace=True)

    aggregated_timestamps = aggregated_timestamps.values
    aggregated_share_prices = aggregated_share_prices.values
    
    not_overlapping_indexes = range(0, 
                                    len(aggregated_share_prices), 
                                    input_time_steps)
    
    aggregated_timestamps = aggregated_timestamps[not_overlapping_indexes]
    aggregated_share_prices = aggregated_share_prices[not_overlapping_indexes]
    return aggregated_share_prices, aggregated_timestamps
 

def split(BP_data, HR_data, col, time_steps):
    X = []
    y = []

    BP_supervised, HR_supervised = series_to_supervised(BP_data[col], HR_data[col], time_steps)
    for BP_interval, HR_interval in zip(BP_supervised, HR_supervised):
        BP_HR_interval = []
        for BP_time_step, HR_time_step in zip(BP_interval, HR_interval):
            BP_HR_interval.append([BP_time_step, HR_time_step])
        X.append(BP_HR_interval)
        if labels[col] == 'Synkope': label = [0., 1.] 
        else: label = [1., 0.]
        y.append(label)
    return np.array(X), np.array(y)


def split_df(BP_data, HR_data, time_steps):
    X = np.array([])
    y = np.array([])
    for col in BP_data:
        X_single, y_single = split(BP_data, HR_data, col, time_steps)
        if X.size == 0:
            X = X_single
            y = y_single
        else:
            X = np.concatenate((X, X_single))
            y = np.concatenate((y, y_single))
    return np.array(X), np.array(y)

In [4]:
BP_data = pd.read_csv(path + "DATA/" + BP_filename, low_memory=False)[all_indices]
HR_data = pd.read_csv(path + "DATA/" + HR_filename, low_memory=False)[all_indices]
labels = {col: BP_data[col].iloc[0] for col in BP_data}

BP_max_value = BP_data.iloc[1:].astype(np.float32).max().max()
BP_min_value = BP_data.iloc[1:].astype(np.float32).min().min()
HR_max_value = HR_data.iloc[1:].astype(np.float32).max().max()
HR_min_value = HR_data.iloc[1:].astype(np.float32).min().min()

BP_scaler = MinMaxScaler().fit(np.array([BP_min_value, BP_max_value]).reshape(-1,1))
HR_scaler = MinMaxScaler().fit(np.array([HR_min_value, HR_max_value]).reshape(-1,1))

BP_data_scaled = BP_data.iloc[1:].astype(np.float32).copy()
BP_data_scaled[all_indices] = BP_scaler.transform(BP_data_scaled[all_indices])

HR_data_scaled = HR_data.iloc[1:].astype(np.float32).copy()
HR_data_scaled[all_indices] = HR_scaler.transform(HR_data_scaled[all_indices])

In [5]:
# BP_data_scaled = BP_data_scaled.rolling(50).mean()
# HR_data_scaled = HR_data_scaled.rolling(50).mean()

# Drop Some Unrelevant Data From The Beginning To Improve Learning

In [6]:
part_to_drop = 0.0
BP_data_scaled_trimmed = BP_data_scaled.copy()
HR_data_scaled_trimmed = HR_data_scaled.copy()
for col in BP_data_scaled_trimmed:
    n_rows_to_drop = int(part_to_drop * np.count_nonzero(~np.isnan(BP_data_scaled_trimmed[col])))

    BP_data_scaled_trimmed[col] = shift(BP_data_scaled_trimmed[col], -n_rows_to_drop)
    HR_data_scaled_trimmed[col] = shift(HR_data_scaled_trimmed[col], -n_rows_to_drop)

In [7]:
time_steps = [32] 
# learing_rates = [0.0005, 0.001, 0.005, 0.01]
learning_rates = [0.0005, 0.001, 0.005, 0.01]
# r_layers = [0, 2, 3, 4]
r_layers = [1, 2]
# LSTM_cells = [16, 64, 256]
LSTM_cells = [16, 64, 256]
# neurons = [5, 20, 50]
neurons = [10]
# epochs = [1, 2, 3, 4, 5]
epochs = [5, 10, 20]
# batch_size = [32, 64, 128]
batch_size = [128]

In [8]:
def sample_timeseries(x):
    x = np.array(x)
    x = x[~np.isnan(x)]
    if x.shape[0] % 32 != 0:
        x = x[:-(x.shape[0] % 32)]
    return np.array(np.array_split(x, int(len(x)/32)))

def build_model(rl, c, n, lr, X_train, y_train):
    model = Sequential()
    if rl == 1:
        model.add(LSTM(c, input_shape=(X_train.shape[1], X_train.shape[2])))
        model.add(BatchNormalization())
    elif rl == 2:
        model.add(LSTM(c, input_shape=(X_train.shape[1], X_train.shape[2]), return_sequences=True))
        model.add(BatchNormalization())
        model.add(LSTM(c))
        model.add(BatchNormalization())
#     else:
#         model.add(LSTM(c, input_shape=(X_train.shape[1], X_train.shape[2]), return_sequences=True))
#         for _ in range(rl - 2):
#             model.add(LSTM(c, return_sequences=True))
#         model.add(LSTM(c))
    model.add(Dense(n, activation='relu'))
    model.add(BatchNormalization())
    model.add(Dense(y_train.shape[1], activation='softmax'))
    optimizer = Adam(lr)
    model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])
    return model

def plot_classified_timeseries(BP_data_scaled, HR_data_scaled, y_pred):
    BP = sample_timeseries(BP_data_scaled[ind])
    HR = sample_timeseries(HR_data_scaled[ind])
    print
    t = 0
    BP_last_elem = None
    plt.figure(figsize=(12,8))
    for HRv, BPv, y in zip(HR, BP, y_pred):
        if BP_last_elem == None:
            c = 'r' if y[1] > 0.5 else 'g' 
            plt.plot(range(t, t + len(HRv)), HRv, color=f'{c}', linestyle='-')
            plt.plot(range(t, t + len(BPv)), BPv, color=f'{c}', linestyle='-')
            t += len(BPv)
        else:
            time_range = range(t-1, t + len(BPv)) 
            BPv = np.concatenate(([BP_last_elem], BPv))
            HRv = np.concatenate(([HR_last_elem], HRv))
            c = 'r' if y[1] > 0.5 else 'g'
            plt.plot(time_range, HRv, color=f'{c}', linestyle='-')
            plt.plot(time_range, BPv, color=f'{c}', linestyle='-')
            t += len(BPv) - 1
        BP_last_elem = BPv[-1]
        HR_last_elem = HRv[-1] 


    plt.title(labels[ind])
    plt.ylim((0,1))

In [11]:
plt.ioff()
for ts in time_steps:
    X_train, y_train = split_df(BP_data_scaled_trimmed[train_indices], HR_data_scaled_trimmed[train_indices], ts)
    X_test, y_test = split_df(BP_data_scaled_trimmed[test_indices], HR_data_scaled_trimmed[test_indices], ts)
    for rl in r_layers:
        for c in LSTM_cells:
            for n in neurons: 
                for lr in learning_rates:
                    model = build_model(rl, c, n, lr, X_train, y_train)
                    for e in epochs:
                        for bs in batch_size:
                            hist = model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=e, batch_size=bs, verbose=0)
                            acc = int(hist.history['accuracy'][-1]*100)
                            val_acc = int(hist.history['val_accuracy'][-1]*100) 
                            directory = "LSTM_RESULTS_TRIMMED_NONAVERAGED/" + f"ACC{acc}_VALACC{val_acc}_RL{rl}_C{c}_N{n}_E{e}_BS{bs}_LR{lr}"
                            os.mkdir(directory)
                            model.save(directory + f"/model")
                            for ind in validation_indices:                            
                                X_val, y_val = split(BP_data_scaled, HR_data_scaled, ind, ts)
                                print(.X_val.shape)
                                y_val_pred = model.predict(X_val)

                                plot_classified_timeseries(BP_data_scaled, HR_data_scaled, y_val_pred)
                                plt.savefig(directory + f"/{ind}.png")        
                                plt.close('all')

(130, 32, 2)
(129, 32, 2)
(83, 32, 2)
(85, 32, 2)
(92, 32, 2)
(118, 32, 2)
(74, 32, 2)
(132, 32, 2)
(156, 32, 2)
(91, 32, 2)
(111, 32, 2)
(145, 32, 2)
(146, 32, 2)
(47, 32, 2)
(202, 32, 2)
(127, 32, 2)
(169, 32, 2)
(136, 32, 2)
(130, 32, 2)
(129, 32, 2)
(83, 32, 2)

KeyboardInterrupt: 