Assignment
# A multimodal sensor dataset for continuous stress detection of nurses in a hospital

## Loading required libraries

In [None]:
import pandas as pd
pd.set_option('display.float_format', lambda x: '%.3f' % x)

import numpy as np
np.random.seed(1234)

from datetime import timedelta, datetime

from sklearn.preprocessing import MinMaxScaler

from keras.preprocessing.sequence import TimeseriesGenerator
from keras.wrappers.scikit_learn import KerasRegressor
from sklearn.model_selection import RandomizedSearchCV

from tensorflow import keras
from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout, LSTM

TRAIN_TEST_SPLIT = 0.7

## 1. Loading the data for participant 5C

In [None]:
df = None
chunksize = 10 ** 5

with pd.read_csv("5C.csv", chunksize=chunksize, index_col = None, engine = 'python') as reader:
    for chunk in reader:
        df = pd.concat([df, chunk])

In [None]:
df = df.drop(columns = 'id')

## 2. Machine Learning Model

Function to split the data into train/test and converting it into a Keras sequence
Can be re-used to train the same model on all participants

In [None]:
def data_to_keras(df, scaled = True, lookback = 7):
    data = df.copy()
    data = data.drop(columns = 'id')
    data['datetime'] = pd.to_datetime(data['datetime'])
    data = data.set_index('datetime')

    col_names = data.columns[: -1]
    if scaled:
        data[col_names] = MinMaxScaler().fit_transform(data[col_names])

    size = int(TRAIN_TEST_SPLIT * len(data))
    train = data[: size]
    test = data[size: ]

    train = TimeseriesGenerator(train.iloc[:, :-1], train.iloc[:, -1], length = lookback, batch_size = 32)
    test = TimeseriesGenerator(test.iloc[:, :-1], test.iloc[:, -1], length = lookback, batch_size = 32)

    return train, test

In [None]:
train, test = data_to_keras(df)

In [None]:
# Define function to create LSTM model with given hyperparameters
def create_model(dropout_rate, units):
    model = Sequential()
    model.add(LSTM(units, input_shape=(7, 8)))
    model.add(Dropout(dropout_rate))
    model.add(Dense(1))
    model.compile(loss='mse', optimizer='adam')
    return model

# Create KerasRegressor wrapper function with create_model function
model = KerasRegressor(build_fn=create_model, verbose=0)

# Define hyperparameters to search over
param_grid = {'dropout_rate': [0.1, 0.2, 0.3], 'units': [32, 64, 128]}

# Define randomized search over hyperparameters
search = RandomizedSearchCV(estimator=model, param_distributions=param_grid, n_iter=9, cv=5)

# Fit search on train generator and validate on test generator
search.fit(train, validation_data=test, epochs=10)

  model = KerasRegressor(build_fn=create_model, verbose=0)


ERROR! Session/line number was not unique in database. History logging moved to new session 19


In [None]:
print(search.best_params_)
print(search.best_score_)

In [None]:
model = Sequential()
model.add(LSTM(64, input_shape=(7, 8)))
model.add(Dropout(0.2))
model.add(Dense(1, activation='relu'))

model.summary()

model.compile(optimizer = keras.optimizers.Adam(learning_rate=0.001),
            loss = keras.losses.MeanSquaredError(),
            metrics=['mse'])

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 64)                18688     
                                                                 
 dropout (Dropout)           (None, 64)                0         
                                                                 
 dense (Dense)               (None, 1)                 65        
                                                                 
Total params: 18,753
Trainable params: 18,753
Non-trainable params: 0
_________________________________________________________________


In [None]:
from keras.callbacks import EarlyStopping

early_stopping = EarlyStopping(monitor='val_loss', patience=5, verbose=1, mode='auto')

# train the model with early stopping
history = model.fit(train, epochs=20, validation_data=test, callbacks=[early_stopping])

In [None]:
# plot the training and validation loss curves
plt.plot(history.history['loss'], label='train')
plt.plot(history.history['val_loss'], label='test')
plt.legend()
plt.show()