### Import Libraries

In [2]:
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report,confusion_matrix
from sklearn.preprocessing import StandardScaler
from tensorflow import keras
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats

### Read in Dataset

In [3]:
df = pd.read_csv('fbdh1.csv')

In [4]:
df.head()

Unnamed: 0,Flow,D1,D2,P1,P2,dD1,dD2,dP1,dP2
0,-16.69,99.49,-145.71,48.04,-15.9,81.17,12.26,36.71,2.21
1,-16.23,172.78,-118.66,79.84,-11.94,80.09,18.49,35.48,2.91
2,-15.54,242.66,-81.28,109.77,-7.75,75.74,24.65,33.03,3.26
3,-15.17,316.42,-55.5,140.84,-5.3,74.07,28.43,31.89,3.37
4,-14.51,392.4,-31.67,171.92,-1.24,73.96,29.7,31.32,3.68


In [5]:
def label_fix(label):
    if label < -7.5:
        return 0
    elif label > 7.5:
        return 1
    else:
        return 2

df['Class'] = df['Flow'].apply(label_fix)

In [6]:
df.head()

Unnamed: 0,Flow,D1,D2,P1,P2,dD1,dD2,dP1,dP2,Class
0,-16.69,99.49,-145.71,48.04,-15.9,81.17,12.26,36.71,2.21,0
1,-16.23,172.78,-118.66,79.84,-11.94,80.09,18.49,35.48,2.91,0
2,-15.54,242.66,-81.28,109.77,-7.75,75.74,24.65,33.03,3.26,0
3,-15.17,316.42,-55.5,140.84,-5.3,74.07,28.43,31.89,3.37,0
4,-14.51,392.4,-31.67,171.92,-1.24,73.96,29.7,31.32,3.68,0


In [7]:
# Create Function to Gather Data into Overlapping Windows
# Cuts data into size compatible with window length and step size
# Gathers data into results output
# Takes the mode of the classes for each window and saves in classes output

def windowData(df, windowSize, stepSize):
    
    df.reset_index(drop = True, inplace = True)
    new_data = df
    
    for i in range(len(new_data) - windowSize, len(new_data)):
        
        if (i - windowSize) % stepSize  == 0:
            new_data = df.truncate(after = i - 1)
    
    windf = new_data
    
    length = len(windf['D1'])
    
    results = np.zeros([int((length - windowSize) / stepSize), windowSize, 8])
    classes = np.zeros([int((length - windowSize) / stepSize), windowSize, 1])
    
    for j in range(0, results.shape[0]):
        
        for i in range(0, windowSize):
            
            results[j][i] = [windf['D1'][i + (j * stepSize)], 
                                 windf['D2'][i + (j * stepSize)], 
                                 windf['P1'][i + (j * stepSize)], 
                                 windf['P2'][i + (j * stepSize)], 
                                 windf['dD1'][i + (j * stepSize)], 
                                 windf['dD2'][i + (j * stepSize)], 
                                 windf['dP1'][i + (j * stepSize)], 
                                 windf['dP2'][i + (j * stepSize)]]
            
            classes[j][i] = windf['Class'][i + (j * stepSize)]
        
    return results, classes

In [9]:
results, classes = windowData(df, 10, 2)

### Create Feature Matrix / Gather Training and Testing Data

In [24]:
df.head()

Unnamed: 0,Flow,D1,D2,P1,P2,dD1,dD2,dP1,dP2,Class
0,-16.69,99.49,-145.71,48.04,-15.9,81.17,12.26,36.71,2.21,0
1,-16.23,172.78,-118.66,79.84,-11.94,80.09,18.49,35.48,2.91,0
2,-15.54,242.66,-81.28,109.77,-7.75,75.74,24.65,33.03,3.26,0
3,-15.17,316.42,-55.5,140.84,-5.3,74.07,28.43,31.89,3.37,0
4,-14.51,392.4,-31.67,171.92,-1.24,73.96,29.7,31.32,3.68,0


In [15]:
scaleData = df.drop('Flow', axis = 1)
scaleData.drop('Class', axis = 1, inplace = True)

scaler = StandardScaler()
scaler.fit(scaleData)
scaled_features = scaler.transform(scaleData)

X = pd.DataFrame(scaled_features, columns = df.columns[1:9])
X['Class'] = df['Class']

y = df['Class']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3)

In [16]:
trainX, trainY = windowData(X_train, 5, 1)
testX, testY = windowData(X_test, 5, 1)

In [17]:
trainX.shape

(4658, 5, 8)

In [18]:
trainY.shape

(4658, 5, 1)

In [54]:
trainY

array([[0.],
       [1.],
       [0.],
       ...,
       [0.],
       [0.],
       [0.]])

### Develop LSTM Model

In [19]:
time_steps = trainX.shape[1]
features = trainX.shape[2]

LSTM = keras.Sequential()
LSTM.add(keras.layers.LSTM(175, input_shape = (time_steps, features)))
LSTM.add(keras.layers.Dropout(0.3))
LSTM.add(keras.layers.Dense(50, activation = tf.nn.relu))
LSTM.add(keras.layers.Dropout(0.4))
LSTM.add(keras.layers.Dense(3, activation = tf.nn.softmax))
LSTM.compile(loss = 'sparse_categorical_crossentropy', optimizer = 'adam', metrics = ['accuracy'])

LSTM.summary()

W0916 15:39:16.841998 4555544000 deprecation.py:506] From /Users/mikefurr/anaconda3/lib/python3.7/site-packages/tensorflow/python/ops/init_ops.py:1251: calling VarianceScaling.__init__ (from tensorflow.python.ops.init_ops) with dtype is deprecated and will be removed in a future version.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor


Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm (LSTM)                  (None, 175)               128800    
_________________________________________________________________
dropout (Dropout)            (None, 175)               0         
_________________________________________________________________
dense (Dense)                (None, 50)                8800      
_________________________________________________________________
dropout_1 (Dropout)          (None, 50)                0         
_________________________________________________________________
dense_1 (Dense)              (None, 3)                 153       
Total params: 137,753
Trainable params: 137,753
Non-trainable params: 0
_________________________________________________________________


In [20]:
LSTM.fit(trainX, trainY, epochs = 50)

W0916 15:39:20.181264 4555544000 deprecation.py:323] From /Users/mikefurr/anaconda3/lib/python3.7/site-packages/tensorflow/python/ops/math_grad.py:1250: add_dispatch_support.<locals>.wrapper (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


Epoch 1/50


InvalidArgumentError: Incompatible shapes: [32] vs. [32,5]
	 [[{{node metrics/acc/Equal}}]]

### Evaluate Model on Testing Split

In [57]:
testX.shape

(1994, 5, 8)

In [58]:
testY.shape

(1994, 1)

In [59]:
predictions = LSTM.predict(testX)

In [61]:
final_pred = []
for score in range(0, len(predictions)):
    final_pred.append(np.argmax(predictions[score]))
    
print(classification_report(testY, final_pred))

              precision    recall  f1-score   support

         0.0       0.86      0.85      0.85      1129
         1.0       0.78      0.83      0.81       768
         2.0       0.29      0.15      0.20        97

   micro avg       0.81      0.81      0.81      1994
   macro avg       0.64      0.61      0.62      1994
weighted avg       0.80      0.81      0.80      1994



### Test on New Dataset

In [19]:
# Read in dataframe
test = pd.read_csv('test_data_2.csv')
test['Class'] = test['Flow'].apply(label_fix)

# Create the Feature Matrix and Scale Features
X_1 = test.drop('Class', axis = 1)
X_1.drop('Flow', axis = 1, inplace = True)

scaler = StandardScaler()
scaler.fit(X_1)
scaled_features = scaler.transform(X_1)
X_1 = pd.DataFrame(scaled_features, columns = X_1.columns[:])

# Create the classification matrix
y_1 = test['Class']

X_LSTM = X_1.values.reshape(5019, 1, 8)

In [20]:
test_predictions = LSTM.predict(X_LSTM)

final_pred_test = []
for score in range(0, len(test_predictions)):
    final_pred_test.append(np.argmax(test_predictions[score]))
    
print(classification_report(y_1, final_pred_test))

              precision    recall  f1-score   support

           0       0.87      0.90      0.88      2145
           1       0.87      0.89      0.88      2074
           2       0.73      0.64      0.68       800

   micro avg       0.85      0.85      0.85      5019
   macro avg       0.82      0.81      0.81      5019
weighted avg       0.85      0.85      0.85      5019

