In [1]:
## Importing Libraries

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import time
import sklearn
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score, accuracy_score
from sklearn.preprocessing import StandardScaler

import tensorflow as tf
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import Dense, LSTM, Bidirectional, Dropout 
from tensorflow.keras import metrics, regularizers
from tensorflow.keras.layers import BatchNormalization




In [3]:
## importing the data and preview
df_train = pd.read_csv('train.csv')
#df_test = pd.read_csv('test.csv')
df_y= pd.read_csv('train_labels.csv')
df_train.head()

Unnamed: 0,sequence,subject,step,sensor_00,sensor_01,sensor_02,sensor_03,sensor_04,sensor_05,sensor_06,sensor_07,sensor_08,sensor_09,sensor_10,sensor_11,sensor_12
0,0,47,0,-0.196291,0.112395,1.0,0.329204,-1.00466,-0.131638,-0.127505,0.368702,-0.1,-0.963873,-0.985069,0.531893,4.751492
1,0,47,1,-0.44745,0.134454,1.0,-0.658407,0.162495,0.340314,-0.209472,-0.867176,0.2,-0.301301,0.082733,-0.231481,0.45439
2,0,47,2,0.326893,-0.694328,1.0,0.330088,0.473678,1.280479,-0.094718,0.535878,1.4,1.002168,0.449221,-0.58642,-4.736147
3,0,47,3,0.523184,0.75105,1.0,0.976991,-0.563287,-0.720269,0.79326,0.951145,-0.3,-0.995665,-0.43429,1.34465,0.429241
4,0,47,4,0.272025,1.07458,1.0,-0.136283,0.398579,0.044877,0.560109,-0.541985,-0.9,1.055636,0.812631,0.123457,-0.223359


In [4]:
## Data description
df_train.describe()

Unnamed: 0,sequence,subject,step,sensor_00,sensor_01,sensor_02,sensor_03,sensor_04,sensor_05,sensor_06,sensor_07,sensor_08,sensor_09,sensor_10,sensor_11,sensor_12
count,1558080.0,1558080.0,1558080.0,1558080.0,1558080.0,1558080.0,1558080.0,1558080.0,1558080.0,1558080.0,1558080.0,1558080.0,1558080.0,1558080.0,1558080.0,1558080.0
mean,12983.5,331.6331,29.5,0.0004365526,-0.001034982,-0.2178045,-0.002156555,-0.001828903,-0.001651785,-0.0004122917,-2.620665e-05,-0.0001298393,0.001365584,0.0003315801,-0.003733291,-0.01172605
std,7496.318,195.8257,17.31811,2.658684,4.4042,2.298002,3.934184,1.683685,1.590818,3.345143,3.243428,4.501534,2.592913,1.917333,4.532568,39.11767
min,0.0,0.0,0.0,-375.0634,-434.5977,-31.65948,-408.3761,-23.62601,-74.9828,-470.5046,-407.0115,-536.1,-270.3468,-43.41271,-427.0586,-612.5494
25%,6491.75,161.75,14.75,-0.5,-0.4831933,-0.6461531,-0.4929204,-0.4729928,-0.4786836,-0.492714,-0.5022901,-0.5,-0.5151734,-0.4787939,-0.4835391,-0.5805627
50%,12983.5,335.0,29.5,-0.00309119,0.003151261,0.0,0.0,-0.001589577,0.002991773,0.0009107468,-0.002290076,0.0,-0.001445087,-0.001655822,0.00308642,0.0
75%,19475.25,501.0,44.25,0.484544,0.4926471,0.3338469,0.4893805,0.4701565,0.5056096,0.492714,0.4847328,0.5,0.5086705,0.4780386,0.4938272,0.5703325
max,25967.0,671.0,59.0,335.8246,449.5914,1.666667,436.6504,24.87286,77.91548,442.5009,331.2542,630.1,367.9812,41.86559,448.0206,630.5111


In [5]:
##  Checking for null values
df_train.isnull().sum()

sequence     0
subject      0
step         0
sensor_00    0
sensor_01    0
sensor_02    0
sensor_03    0
sensor_04    0
sensor_05    0
sensor_06    0
sensor_07    0
sensor_08    0
sensor_09    0
sensor_10    0
sensor_11    0
sensor_12    0
dtype: int64

In [6]:
df_y.shape

(25968, 2)

In [7]:
df_y.isnull().sum()

sequence    0
state       0
dtype: int64

In [8]:
df_y.tail()

Unnamed: 0,sequence,state
25963,25963,1
25964,25964,0
25965,25965,1
25966,25966,1
25967,25967,0


In [9]:
# train test split 20%
c = round(len(df_train.sequence.unique())*0.8)
df_Xtrain = df_train[df_train['sequence'] <= c]
df_Xtest = df_train[df_train['sequence'] > c]
df_ytrain = df_y[df_y.sequence <= (df_Xtrain.iloc[-1,0])]
df_ytest = df_y[df_y.sequence > (df_Xtrain.iloc[-1,0])]

In [10]:
# Scaling
scaler = StandardScaler()
df_Xtrain.iloc[:,3:] = scaler.fit_transform(df_Xtrain.iloc[:,3:])
df_Xtest.iloc[:,3:] = scaler.fit_transform(df_Xtest.iloc[:,3:])

In [11]:
## Preparing the data for making 3D arrays for LSTM

X_train=[]
for i in df_Xtrain.sequence.unique():
    X_train.append(df_Xtrain[df_Xtrain.sequence == i].iloc[:,3:])

X_test=[]
for i in df_Xtest.sequence.unique():
    X_test.append(df_Xtest[df_Xtest.sequence == i].iloc[:,3:])

target=[]
for i in df_test.sequence.unique():
    target.append(df_test[df_test.sequence == i].iloc[:,3:]) 


X_train = np.array(X_train)
X_test = np.array(X_test)
Y_train = np.array(df_ytrain.state)
Y_test = np.array(df_ytest.state)
target = np.array(target)

NameError: name 'df_test' is not defined

In [15]:
## Data shape
X_train.shape

(20775, 60, 13)

In [26]:
# model defining

model = Sequential()
model.add(Bidirectional(LSTM(units=256, return_sequences=True), input_shape=[X_train.shape[1],X_train.shape[2]]))
model.add(Bidirectional(LSTM(units=128, return_sequences=True)))
model.add(Bidirectional(LSTM(units=64, return_sequences=False)))
#model.add(Dropout(0.2))
#model.add(Dense(4, activation='relu'))
model.add(Dense(1, activation= 'sigmoid'))

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.summary()

Model: "sequential_5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 bidirectional_15 (Bidirect  (None, 60, 512)           552960    
 ional)                                                          
                                                                 
 bidirectional_16 (Bidirect  (None, 60, 256)           656384    
 ional)                                                          
                                                                 
 bidirectional_17 (Bidirect  (None, 128)               164352    
 ional)                                                          
                                                                 
 dense_5 (Dense)             (None, 1)                 129       
                                                                 
Total params: 1373825 (5.24 MB)
Trainable params: 1373825 (5.24 MB)
Non-trainable params: 0 (0.00 Byte)
________________

In [28]:
## training the model
training_start = time.perf_counter()
model.fit(X_train,Y_train, epochs=15,batch_size=256, validation_split=0.1, verbose=1)
training_end = time.perf_counter()

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


In [29]:
## model evaluation

test_loss, test_accuracy = model.evaluate(X_test, Y_test)
test_pred = model.predict(X_test)
test_pred = np.round(test_pred)
precision = precision_score(test_pred, Y_test)
f_score = f1_score(test_pred, Y_test)
recall = recall_score(test_pred, Y_test)
accuracy = accuracy_score(test_pred, Y_test)

print("Precision:", precision)
print("Recall:", recall)
print("F1-score:", f_score)
print("Accuracy:", accuracy)

Precision: 0.8728189220628151
Recall: 0.8858717040535222
F1-score: 0.879296875
Accuracy: 0.8809936452917388
