# Load libraries, configuration

In [None]:
import os
import json

import tensorflow as tf
import keras
from keras import backend as K
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import IPython
import IPython.display
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns

mpl.rcParams['figure.figsize'] = (8, 6)
mpl.rcParams['axes.grid'] = False

# Configs

In [None]:
user = 'participant10'
interval = '15min'

columns = [ 'timestamp', 'heart_rate', 'steps', 'stress_score',
            'awake', 'deep', 'light', 'rem', 
           'nonrem_total', 'total', 'nonrem_percentage', 'sleep_efficiency']

# Include FonLog data
# columns += ['time_from_last_drug_taken'] #, 'wo_duration']

# Additional data
columns += ['timestamp_dayofweek', 'timestamp_hour_sin', 'timestamp_hour_cos']

# 'wearing_off' | 'wearing_off_post_meds' | 'wearing_off_lead60'
target_column = 'wearing_off' 
columns.append(target_column)

participant_dictionary = json.load(open(f'./data/participant_dictionary.json'))

# CV splits
if interval == '15min':
    record_size_per_day = 96
elif interval == '15s':
    record_size_per_day = 5760
elif interval == '1min':
    record_size_per_day = 1440

METRICS = [
      tf.keras.metrics.TruePositives(name='tp'),
      tf.keras.metrics.FalsePositives(name='fp'),
      tf.keras.metrics.TrueNegatives(name='tn'),
      tf.keras.metrics.FalseNegatives(name='fn'), 
      tf.keras.metrics.BinaryAccuracy(name='accuracy'),
      tf.keras.metrics.Precision(name='precision'),
      tf.keras.metrics.Recall(name='recall'),
      tf.keras.metrics.AUC(name='auc'),
      tf.keras.metrics.AUC(name='prc', curve='PR'), # precision-recall curve
]
#       BalancedSparseCategoricalAccuracy(),
#       BalancedAccuracy()]

BATCH_SIZE = 1
MAX_EPOCHS = 10
LEARNING_RATE = 1e-3
SHIFT = 4 # 1 = 15 min, 2 = 30 min, 4 = 1 hour
MULTI_STEP_WIDTH = 36 # input 36 = 9 hours, input 96 = 24 hours
USE_HOURLY = False
SAVEFIG = False
EXPERIMENT_NAME = 'with wearing-off'
REMOVE_WEARING_OFF_IN_PREVIOUS_STEP = False

# features to normalize
# timestamp_dayofweek, wearing_off were not normalized
normalize_features = ['heart_rate', 'steps', 'stress_score', 'awake', 'deep', 
                      'light', 'rem', 'nonrem_total', 'total', 'nonrem_percentage',
                      'sleep_efficiency', 'timestamp_hour_sin', 'timestamp_hour_cos']
def normalize_data(df, mean, std, normalize_features=normalize_features):
    df_to_normalize = df.copy()
    df_to_normalize.loc[:, normalize_features] = ((
        df_to_normalize.loc[:, normalize_features] - mean
    ) / std)
    
    return df_to_normalize

# Load & process data

In [None]:
# dataset = pd.read_excel(f'./data/4-combined_data_{user}_{interval}.xlsx',
#                               index_col="timestamp",
#                               usecols=columns,
#                               engine='openpyxl')
# # Fill missing data with 0
# dataset.fillna(0, inplace=True)

# # Filter data based on participants' dictionary
# dataset = dataset.loc[
#     (dataset.index >= participant_dictionary[user]['start_date']) &
#     (dataset.index < participant_dictionary[user]['end_date_plus_two'])
# ]

# column_indices = { name: i for i, name in enumerate(dataset.columns) }
# df = dataset

In [None]:
combined_dataset = None
for participant_number in range(1,11):
    user = f'participant{participant_number}'
    dataset = pd.read_excel(f'./data/4-combined_data_{user}_{interval}.xlsx',
                                  index_col="timestamp",
                                  usecols=columns,
                                  engine='openpyxl')
        
    # Fill missing data with 0
    dataset.fillna(0, inplace=True)

    # Filter data based on participants' dictionary
    dataset = dataset.loc[
        (dataset.index >= participant_dictionary[user]['start_date']) &
        (dataset.index < participant_dictionary[user]['end_date_plus_two'])
    ].assign(pid=participant_number)
    combined_dataset = pd.concat([combined_dataset, dataset])

dataset = combined_dataset
column_indices = { name: i for i, name in enumerate(dataset.columns) }
df = dataset

In [None]:
dataset.to_excel("4-combined_data_15min.xlsx")

# Split dataset

In [None]:
# training data 60% 
TRAINING_PERCENTAGE = 0.6
# validation data 20%
VALIDATION_PERCENTAGE = 0.2

column_indices = { name: i for i, name in enumerate(df.columns) }
total_rows = len(df)
num_features = len(df.columns)

training_end_index = int(total_rows * TRAINING_PERCENTAGE)
validation_end_index = int(total_rows * (TRAINING_PERCENTAGE + VALIDATION_PERCENTAGE))

train_df = df[0:training_end_index].copy()
val_df = df[training_end_index:validation_end_index].copy()
test_df = df[validation_end_index:].copy()

print(f"Training data: {round(len(train_df)/record_size_per_day, 3)} days")
print(f"Validation data: {round(len(val_df)/record_size_per_day, 3)} days")
print(f"Test data: {round(len(test_df)/record_size_per_day, 3)} days")
print(f"Total data: {round(len(df)/record_size_per_day, 3)} days")

# Normalize dataset

In [None]:
# train_mean = train_df.loc[:, normalize_features].mean()
# train_std = train_df.loc[:, normalize_features].std()

# train_df = normalize_data(train_df, train_mean, train_std)
# val_df = normalize_data(val_df, train_mean, train_std)
# test_df = normalize_data(test_df, train_mean, train_std)

# df_std = (df - train_mean) / train_std
# df_std = df_std.melt(var_name='Column', value_name='Normalized')
# plt.figure(figsize=(12, 6))
# ax = sns.violinplot(x='Column', y='Normalized', data=df_std)
# _ = ax.set_xticklabels(df.keys(), rotation=90)

# WindowGenerator

In [None]:
BATCH_SIZE = 8
class WindowGenerator():
  def __init__(self, input_width, label_width, shift,
               train_df, val_df, test_df,
               label_columns=None):
    # Store the raw data.
    self.train_df = train_df.reindex(columns=[x for x in list(train_df.columns) if x not in label_columns] + label_columns)
    self.val_df = val_df.reindex(columns=[x for x in list(val_df.columns) if x not in label_columns] + label_columns)
    self.test_df = test_df.reindex(columns=[x for x in list(test_df.columns) if x not in label_columns] + label_columns)

    # Work out the label column indices.
    self.label_columns = label_columns
    if label_columns is not None:
      self.label_columns_indices = {name: i for i, name in
                                    enumerate(label_columns)}
    self.column_indices = {name: i for i, name in
                           enumerate(self.train_df.columns)}
    self.input_columns = {x: self.column_indices[x] for x in
                          self.column_indices
                          if x not in self.label_columns}

    # Work out the window parameters.
    self.input_width = input_width
    self.label_width = label_width
    self.shift = shift

    self.total_window_size = input_width + shift

    self.input_slice = slice(0, input_width)
    self.input_indices = np.arange(self.total_window_size)[self.input_slice]

    self.label_start = self.total_window_size - self.label_width
    self.labels_slice = slice(self.label_start, None)
    self.label_indices = np.arange(self.total_window_size)[self.labels_slice]

  def __repr__(self):
    return '\n'.join([
        f'Total window size: {self.total_window_size}',
        f'Input indices: {self.input_indices}',
        f'Label indices: {self.label_indices}',
        f'Label column name(s): {self.label_columns}'])

  def split_window(self, features):
    inputs = features[:, self.input_slice, list(self.input_columns.values())[0]:(list(self.input_columns.values())[-1]+1)]
    labels = features[:, self.labels_slice, :]
    if self.label_columns is not None:
      labels = tf.stack(
          [labels[:, :, self.column_indices[name]] for name in self.label_columns],
          axis=-1)

    # Slicing doesn't preserve static shape information, so set the shapes
    # manually. This way the `tf.data.Datasets` are easier to inspect.
    inputs.set_shape([None, self.input_width, None])
    labels.set_shape([None, self.label_width, None])

    return inputs, labels

  def plot(self, model=None, plot_col='T (degC)', max_subplots=3):
    inputs, labels = self.example
    plt.figure(figsize=(12, 8))
    plot_col_index = self.column_indices[plot_col]
    max_n = min(max_subplots, len(inputs))
    for n in range(max_n):
      plt.subplot(max_n, 1, n+1)
      plt.ylabel(f'{plot_col} [normed]')
      plt.ylim(-0.1,1.1)
      ax.set_yticks(
          [0.0, 0.5, 1.0]
      )
      plt.plot(self.input_indices, inputs[n, :, plot_col_index],
               label='Inputs', marker='.', zorder=-10)

      if self.label_columns:
        label_col_index = self.label_columns_indices.get(plot_col, None)
      else:
        label_col_index = plot_col_index

      if label_col_index is None:
        continue

      plt.scatter(self.label_indices, labels[n, :, label_col_index],
                  edgecolors='k', label='Labels', c='#2ca02c', s=64)
      if model is not None:
        predictions = model(inputs)
        plt.scatter(self.label_indices, predictions[n, :, label_col_index],
                    marker='X', edgecolors='k', label='Predictions',
                    c='#ff7f0e', s=64)

      if n == 0:
        plt.legend()

    plt.xlabel('Time [h]')

  def make_dataset(self, data):
    data = np.array(data, dtype=np.float32)
    ds = tf.keras.utils.timeseries_dataset_from_array(
        data=data,
        targets=None,
        sequence_length=self.total_window_size,
        sequence_stride=1,
        shuffle=True,
        batch_size=BATCH_SIZE,)

    ds = ds.map(self.split_window)

    return ds

  @property
  def train(self):
    return self.make_dataset(self.train_df)

  @property
  def val(self):
    return self.make_dataset(self.val_df)

  @property
  def test(self):
    return self.make_dataset(self.test_df)

  @property
  def example(self):
    """Get and cache an example batch of `inputs, labels` for plotting."""
    result = getattr(self, '_example', None)
    if result is None:
      # No example batch was found, so get one from the `.train` dataset
      result = next(iter(self.train))
      # And cache it for next time
      self._example = result
    return result

# Compile & Fit

In [None]:
def compile_and_fit(model, window, patience=2):
  early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss',
                                                    patience=patience,
                                                    mode='min',
                                                    restore_best_weights=True)

  # model.compile(loss=tf.keras.losses.MeanSquaredError(),
  #               optimizer=tf.keras.optimizers.Adam(),
  #               metrics=[tf.keras.metrics.MeanAbsoluteError()])
  model.compile(loss=tf.keras.losses.BinaryCrossentropy(from_logits=False),
                optimizer=tf.keras.optimizers.Adam(learning_rate=LEARNING_RATE),
                metrics=METRICS)

  history = model.fit(window.train, epochs=MAX_EPOCHS,
                      validation_data=window.val,
                      callbacks=[early_stopping])
  return history

# Multi-step

In [None]:
CONV_WIDTH = MULTI_STEP_WIDTH
OUT_STEPS = SHIFT * 24
multi_window = WindowGenerator(input_width=MULTI_STEP_WIDTH,
                               train_df=train_df, val_df=val_df, test_df=test_df,
                               label_width=OUT_STEPS,
                               shift=OUT_STEPS,
                               label_columns=['wearing_off']
                              )
multi_window

In [None]:
K.clear_session()
multi_conv_model = tf.keras.Sequential([
    tf.keras.layers.BatchNormalization(),
    # Shape [batch, time, features] => [batch, CONV_WIDTH, features]
    tf.keras.layers.Lambda(lambda x: x[:, -CONV_WIDTH:, :]),
    # Shape => [batch, 1, conv_units]
    tf.keras.layers.Conv1D(256, activation='relu', kernel_size=(CONV_WIDTH)),
    # Shape => [batch, 1,  out_steps*features]
    tf.keras.layers.Dense(OUT_STEPS,
                          activation='sigmoid',
                          kernel_initializer=tf.initializers.zeros()),
    # Shape => [batch, out_steps, features]
    tf.keras.layers.Reshape([OUT_STEPS, -1])


    # tf.keras.layers.Conv1D(filters=64,
    #                        kernel_size=(MULTI_STEP_WIDTH,),
    #                        activation='relu'),
    # tf.keras.layers.Dense(units=64, activation='relu'),
    # tf.keras.layers.Dense(units=4, activation='sigmoid', name="output"),
])

history = compile_and_fit(multi_conv_model, multi_window)

In [None]:
print(multi_conv_model.evaluate(multi_window.val))
print(multi_conv_model.evaluate(multi_window.test, verbose=0))

In [None]:
for name, value in zip(multi_conv_model.metrics_names, multi_conv_model.evaluate(multi_window.test, verbose=0)):
  print(name, ': ', value)
print()

In [None]:
metrics = ['loss'] #, 'balanced_accuracy', 'auc', 'prc', 'precision', 'recall']
plt.figure(figsize=(25, 10))
for n, metric in enumerate(metrics):
    name = metric.replace("_"," ").capitalize()
    plt.subplot(3,3,n+1)
    plt.plot(history.epoch, history.history[metric], label='Train')
    plt.plot(history.epoch, history.history['val_'+metric], label='Validation')
    plt.xlabel('Epoch')
    plt.ylabel(name)
    # if metric == 'loss':
    #   plt.ylim([0, plt.ylim()[1]])
    # # elif metric == 'auc':
    # #   plt.ylim([0.8,1])
    # else:
    #   plt.ylim([0,1])
    plt.legend()
plt.show()

In [None]:
def data_loader(new_df):
    return np.array(new_df, dtype=np.float32)[np.newaxis, ...]

In [None]:
data_loader(train_df.loc[:, train_df.columns != 'wearing_off'].iloc[0:36]).shape

In [None]:
multi_conv_model.predict(data_loader(train_df.loc[:, train_df.columns != 'wearing_off'].iloc[0:36])).shape

# Export Model

In [None]:
base_path = "models"
model_version = "4"
model_name = "multi_conv_model"
model_path = os.path.join(base_path, model_name, model_version)
tf.saved_model.save(multi_conv_model, model_path)

In [None]:
saved_model = tf.saved_model.load(model_path)

In [None]:
saved_model(data_loader(train_df.loc[:, test_df.columns != 'wearing_off'].iloc[0:36]), training=False)

In [None]:
saved_model(data_loader(test_df.iloc[0:24, :]).tolist(), training=False)[:,:,14]

In [None]:
print(data_loader(train_df.loc[:, test_df.columns != 'wearing_off'].iloc[0:36]).tolist())

In [None]:
data_loader(test_df.iloc[0:36, :]).shape