### Import Libraries

In [365]:
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report,confusion_matrix
from sklearn.preprocessing import StandardScaler
from tensorflow import keras
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

### Read in Dataset

In [366]:
data = pd.read_csv('fbdh1.csv')

In [367]:
data.head()

Unnamed: 0,Flow,D1,D2,P1,P2,dD1,dD2,dP1,dP2
0,-16.69,99.49,-145.71,48.04,-15.9,81.17,12.26,36.71,2.21
1,-16.23,172.78,-118.66,79.84,-11.94,80.09,18.49,35.48,2.91
2,-15.54,242.66,-81.28,109.77,-7.75,75.74,24.65,33.03,3.26
3,-15.17,316.42,-55.5,140.84,-5.3,74.07,28.43,31.89,3.37
4,-14.51,392.4,-31.67,171.92,-1.24,73.96,29.7,31.32,3.68


In [368]:
def label_fix(label):
    if label < -7.5:
        return 0
    elif label > 7.5:
        return 1
    else:
        return 2

df['Class'] = df['Flow'].apply(label_fix)

In [369]:
df.head()

Unnamed: 0,Flow,D1,D2,P1,P2,dD1,dD2,dP1,dP2,Class
0,-16.69,99.49,-145.71,48.04,-15.9,81.17,12.26,36.71,2.21,0
1,-16.23,172.78,-118.66,79.84,-11.94,80.09,18.49,35.48,2.91,0
2,-15.54,242.66,-81.28,109.77,-7.75,75.74,24.65,33.03,3.26,0
3,-15.17,316.42,-55.5,140.84,-5.3,74.07,28.43,31.89,3.37,0
4,-14.51,392.4,-31.67,171.92,-1.24,73.96,29.7,31.32,3.68,0


### Create Feature Matrix / Gather Training and Testing Data

In [370]:
X = df.drop('Class', axis = 1)
X.drop('Flow', axis = 1, inplace = True)

scaler = StandardScaler()
scaler.fit(X)
scaled_features = scaler.transform(X)
X = pd.DataFrame(scaled_features, columns = X.columns[:])

y = df['Class']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

In [371]:
# Cuts data into length evenly divisible by the window sizes

def windowSize(data):
    
    window = 15
    
    data.reset_index(drop = True, inplace = True)
    new_data = data
    
    for i in range(len(new_data) - window, len(new_data)):
        
        if i % window == 0:
            new_data = data.truncate(after = i - 1)
    
    return new_data

In [372]:
# Creates Classifiers for each window based on mode of that window

def classWindow(data):
    
    Class = []
    
    for i in range(0, len(data), 15):
        
        Class.append(int(data[i:i+1].mode()))
        
    return pd.DataFrame(Class)

In [373]:
trainY = classWindow(windowSize(y_train))

In [374]:
trainX = windowSize(X_train).values.reshape(int(len(windowSize(X_train))/15), 15, 8)

In [375]:
trainX.shape

(310, 15, 8)

### Develop LSTM Model

In [376]:
time_steps = trainX.shape[1]
features = trainX.shape[2]

LSTM = keras.Sequential()
LSTM.add(keras.layers.LSTM(175, input_shape = (time_steps, features)))
LSTM.add(keras.layers.Dropout(0.3))
LSTM.add(keras.layers.Dense(50, activation = tf.nn.relu))
LSTM.add(keras.layers.Dropout(0.4))
LSTM.add(keras.layers.Dense(3, activation = tf.nn.softmax))
LSTM.compile(loss = 'sparse_categorical_crossentropy', optimizer = 'adam', metrics = ['accuracy'])

LSTM.summary()

Model: "sequential_9"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_9 (LSTM)                (None, 175)               128800    
_________________________________________________________________
dropout_16 (Dropout)         (None, 175)               0         
_________________________________________________________________
dense_16 (Dense)             (None, 50)                8800      
_________________________________________________________________
dropout_17 (Dropout)         (None, 50)                0         
_________________________________________________________________
dense_17 (Dense)             (None, 3)                 153       
Total params: 137,753
Trainable params: 137,753
Non-trainable params: 0
_________________________________________________________________


In [377]:
LSTM.fit(trainX, trainY, epochs = 50)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<tensorflow.python.keras.callbacks.History at 0x1a43029320>

### Evaluate Model on Testing Split

In [378]:
testX = windowSize(X_test).values.reshape(int(len(windowSize(X_test)) / 15), 15, 8)

In [379]:
testY = classWindow(windowSize(y_test))

In [380]:
testX.shape

(133, 15, 8)

In [381]:
testY.shape

(133, 1)

In [382]:
predictions = LSTM.predict(testX)

In [383]:
len(predictions)

133

In [384]:
final_pred = []
for score in range(0, len(predictions)):
    final_pred.append(np.argmax(predictions[score]))
    
print(classification_report(testY, final_pred))

              precision    recall  f1-score   support

           0       0.81      0.71      0.76        62
           1       0.60      0.79      0.68        43
           2       0.45      0.36      0.40        28

   micro avg       0.66      0.66      0.66       133
   macro avg       0.62      0.62      0.61       133
weighted avg       0.67      0.66      0.66       133



### Test on New Dataset

In [19]:
# Read in dataframe
test = pd.read_csv('test_data_2.csv')
test['Class'] = test['Flow'].apply(label_fix)

# Create the Feature Matrix and Scale Features
X_1 = test.drop('Class', axis = 1)
X_1.drop('Flow', axis = 1, inplace = True)

scaler = StandardScaler()
scaler.fit(X_1)
scaled_features = scaler.transform(X_1)
X_1 = pd.DataFrame(scaled_features, columns = X_1.columns[:])

# Create the classification matrix
y_1 = test['Class']

X_LSTM = X_1.values.reshape(5019, 1, 8)

In [20]:
test_predictions = LSTM.predict(X_LSTM)

final_pred_test = []
for score in range(0, len(test_predictions)):
    final_pred_test.append(np.argmax(test_predictions[score]))
    
print(classification_report(y_1, final_pred_test))

              precision    recall  f1-score   support

           0       0.87      0.90      0.88      2145
           1       0.87      0.89      0.88      2074
           2       0.73      0.64      0.68       800

   micro avg       0.85      0.85      0.85      5019
   macro avg       0.82      0.81      0.81      5019
weighted avg       0.85      0.85      0.85      5019

