<a href="https://colab.research.google.com/github/hydradon/clever-challenge/blob/master/part3_LSTM_sliding_windows_5.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Using LSTM with sliding windows size 5

I attempted to predict an event class using information of the previous 5 events using a LSTM neural network.

However, the accuracy remains very low. I have tried tuning with different numbers of layers and layer sizes. It seems that the network cannot capture the time series pattern. 

In [33]:
import pandas as pd

# Initial inspection shows that the timestamp column is Unix epoch time up to the Second granularity
dateparse = lambda timestamp: pd.to_datetime(timestamp, unit='s')

data = pd.read_csv("https://raw.githubusercontent.com/hydradon/clever-challenge/master/seq/sample.csv", 
                    index_col="timestamp",
                    date_parser=dateparse,
                    infer_datetime_format=True,
                    parse_dates=['timestamp'])

In [34]:
# Looks like the below features only have constant 0 for all rows f11, f16, f19, f22 - f30
const_col = ["f11", "f16", "f19"] + ["f{}".format(num) for num in range(22,31)]

# dropping these cols
data.drop(columns=const_col, inplace=True, errors='ignore')

data.drop(columns=["event_id"], inplace=True, errors='ignore') # Trial without using res_id
data["time_stamp"] = data.index
data.head(5)

Unnamed: 0_level_0,class,f1,f2,f3,f3.1,f4,f5,f6,f7,f8,f9,f10,f12,f13,f14,f15,f17,f18,f20,f21,time_stamp
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2016-11-21 14:17:52,1,9,16257,172188,10.287316,45344815,0,0.0,1,0.0,0,0,0,9230,0,0.0,0.0,0.0,0.0,0,2016-11-21 14:17:52
2016-12-01 16:23:01,0,1,1,1,0.0,2,1,3.0,2,10.08691,1,0,0,9231,0,0.0,0.0,0.0,0.0,0,2016-12-01 16:23:01
2016-12-02 09:14:02,1,7,1731,10212,7.460379,2000330,84524,157.627693,1,2.579979,2442,172188,172076,9230,0,0.0,0.0,0.0,0.0,0,2016-12-02 09:14:02
2016-12-02 10:18:05,0,1,4,12,1.665603,353,0,0.0,1,0.0,0,0,0,9232,0,0.0,0.0,0.0,0.0,0,2016-12-02 10:18:05
2016-12-02 10:18:39,0,2,4,22,0.919741,6263,4346,684.409091,2,10.341428,21,12,0,9232,0,0.0,0.0,0.0,0.0,0,2016-12-02 10:18:39


In [35]:
from sklearn.preprocessing import MinMaxScaler, LabelEncoder

encoder = LabelEncoder()

# scale feature columns
ft_cols = ['f1', 'f2', 'f3', 'f3.1', 'f4', 'f5', 'f6', 'f7',
            'f8', 'f9', 'f10', 'f12', 'f13', 'f14', 'f15', 
            'f17', 'f18', 'f20','f21'] # 19 features

# ensure all data is float
data[ft_cols] = data[ft_cols].astype('float32')

scaler = MinMaxScaler(feature_range=(0, 1))

data[ft_cols] = scaler.fit_transform(data[ft_cols])

In [36]:
data

Unnamed: 0_level_0,class,f1,f2,f3,f3.1,f4,f5,f6,f7,f8,f9,f10,f12,f13,f14,f15,f17,f18,f20,f21,time_stamp
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2016-11-21 14:17:52,1,1.000000,1.000000,1.000000,1.000000,1.000000e+00,0.000000e+00,0.005420,0.006135,0.000000,0.000000,0.000000,0.000000,0.924543,0.000000,0.000000,0.000000,0.0,0.0,0.000000,2016-11-21 14:17:52
2016-12-01 16:23:01,0,0.111111,0.000062,0.000006,0.000000,4.410647e-08,2.660611e-07,0.005534,0.012270,0.014108,0.000062,0.000000,0.000000,0.924644,0.000000,0.000000,0.000000,0.0,0.0,0.000000,2016-12-01 16:23:01
2016-12-02 09:14:02,1,0.777778,0.106477,0.059307,0.725202,4.411375e-02,2.248855e-02,0.011436,0.006135,0.003608,0.151724,0.828237,0.835616,0.924543,0.000000,0.000000,0.000000,0.0,0.0,0.000000,2016-12-02 09:14:02
2016-12-02 10:18:05,0,0.111111,0.000246,0.000070,0.161908,7.784793e-06,0.000000e+00,0.005420,0.006135,0.000000,0.000000,0.000000,0.000000,0.924744,0.000000,0.000000,0.000000,0.0,0.0,0.000000,2016-12-02 10:18:05
2016-12-02 10:18:39,0,0.222222,0.000246,0.000128,0.089405,1.381194e-04,1.156302e-03,0.031541,0.012270,0.014464,0.001305,0.000058,0.000000,0.924744,0.000000,0.000000,0.000000,0.0,0.0,0.000000,2016-12-02 10:18:39
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2018-11-06 14:15:03,0,0.111111,0.000062,0.000035,0.111751,4.829659e-06,2.660611e-05,0.028498,0.012270,1.000000,0.000373,0.011688,0.006303,0.950532,0.083333,0.009090,0.006669,0.0,0.0,0.083333,2018-11-06 14:15:03
2018-11-06 14:16:49,0,0.111111,0.000062,0.000012,0.019441,4.322435e-06,5.321222e-07,0.007309,0.030675,0.331678,0.000124,0.000197,0.000199,0.042043,0.083333,0.015483,0.016645,0.0,0.0,0.083333,2018-11-06 14:16:49
2018-11-06 14:17:33,0,0.111111,0.000185,0.000029,0.107489,1.279088e-06,9.578200e-06,0.042586,0.153374,0.380918,0.000311,0.011448,0.011524,0.932872,0.083333,0.137395,0.146791,0.0,0.0,0.083333,2018-11-06 14:17:33
2018-11-06 14:35:56,0,0.111111,0.000123,0.000017,0.050759,5.733842e-07,2.660611e-07,0.009211,0.036810,0.289239,0.000186,0.005916,0.005876,0.930363,0.083333,0.009664,0.010191,0.0,0.0,0.083333,2018-11-06 14:35:56


In [37]:
cur_d = data.drop(columns=["time_stamp", "class"]).values
cur_d_output = data["class"].values

# Making sequences of 5 events, 
# The class of the fifth one is used while 
# the features of previous events are input
x = list()
y = list()
for i in range(5, len(cur_d)):
    sub_x = cur_d[i-5 : i]
    x.append(sub_x.reshape(1, sub_x.shape[0], sub_x.shape[1]))

    sub_y = cur_d_output[i]
    y.append(sub_y)

In [38]:
import numpy as np

t_x = np.concatenate(x, axis=0)
t_y = np.array(y)
t_x.shape, t_y.shape

((18412, 5, 19), (18412,))

In [39]:
# Separate into train and test set
np.random.seed(0)
orders = np.random.permutation(np.arange(len(t_x)))
train_idx = orders[:int(0.8 * t_x.shape[0])]
test_idx = orders[int(0.8 * t_x.shape[0]):]

t_x_train = t_x[train_idx]
t_x_test = t_x[test_idx]

t_y_train = t_y[train_idx]
t_y_test = t_y[test_idx]

In [40]:
from keras.models import Sequential
from keras.layers import Dense, Activation, LSTM, Dropout
from keras.metrics import binary_accuracy
from keras.optimizers import Adam

np.random.seed(7)

model = Sequential()
model.add(LSTM(128, 
               input_shape=(t_x.shape[1], t_x.shape[2]), # Input: time step * number of features
               activation='relu'))
# model.add(Dense(128, 
#                input_dim=train_X_val.shape[1],
#                activation='relu')) 

# model.add(Dropout(0.2))


model.add(Dense(128, activation = 'relu'))
model.add(Dropout(0.2))


# Output layer with binary Classification 0 or 1
model.add(Dense(1, activation='sigmoid')) 

opt = Adam(learning_rate=3e-4)

# Compile
model.compile(loss='binary_crossentropy', 
              optimizer=opt, 
              metrics=['accuracy']) # binary_crossentropy for binary categorical label (0, 1)

print(model.summary())

Model: "sequential_9"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_9 (LSTM)                (None, 128)               75776     
_________________________________________________________________
dense_20 (Dense)             (None, 128)               16512     
_________________________________________________________________
dropout_11 (Dropout)         (None, 128)               0         
_________________________________________________________________
dense_21 (Dense)             (None, 1)                 129       
Total params: 92,417
Trainable params: 92,417
Non-trainable params: 0
_________________________________________________________________
None


In [41]:
from keras.callbacks import ModelCheckpoint
import os

# checkpoint_path = "./part3models/training_1/cp.ckpt"
# checkpoint_dir = os.path.dirname(checkpoint_path)

# # Create a callback that saves the model's weights
# cp_callback = ModelCheckpoint(filepath=checkpoint_path,
#                               save_weights_only=True,
#                               verbose=1)

# Fitting
history = model.fit(t_x_train, t_y_train,
                    epochs=100, batch_size=512,
                    validation_data=(t_x_test, t_y_test),
                    # callbacks=[cp_callback],
                    verbose=2, shuffle=False)

Epoch 1/100
29/29 - 1s - loss: 0.6888 - accuracy: 0.5487 - val_loss: 0.6882 - val_accuracy: 0.5479
Epoch 2/100
29/29 - 0s - loss: 0.6883 - accuracy: 0.5486 - val_loss: 0.6880 - val_accuracy: 0.5479
Epoch 3/100
29/29 - 0s - loss: 0.6881 - accuracy: 0.5486 - val_loss: 0.6879 - val_accuracy: 0.5479
Epoch 4/100
29/29 - 0s - loss: 0.6879 - accuracy: 0.5486 - val_loss: 0.6877 - val_accuracy: 0.5479
Epoch 5/100
29/29 - 0s - loss: 0.6876 - accuracy: 0.5485 - val_loss: 0.6876 - val_accuracy: 0.5479
Epoch 6/100
29/29 - 0s - loss: 0.6874 - accuracy: 0.5486 - val_loss: 0.6874 - val_accuracy: 0.5471
Epoch 7/100
29/29 - 0s - loss: 0.6873 - accuracy: 0.5493 - val_loss: 0.6872 - val_accuracy: 0.5463
Epoch 8/100
29/29 - 0s - loss: 0.6868 - accuracy: 0.5496 - val_loss: 0.6870 - val_accuracy: 0.5441
Epoch 9/100
29/29 - 0s - loss: 0.6865 - accuracy: 0.5489 - val_loss: 0.6867 - val_accuracy: 0.5466
Epoch 10/100
29/29 - 0s - loss: 0.6864 - accuracy: 0.5495 - val_loss: 0.6864 - val_accuracy: 0.5479
Epoch 11/

In [42]:
_, accuracy = model.evaluate(t_x_test, t_y_test)
print('Accuracy: %.2f %%' % (accuracy*100)) 

Accuracy: 55.96 %
