### Import Libraries

In [2]:
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report,confusion_matrix
from sklearn.preprocessing import StandardScaler
from tensorflow import keras
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

### Read in Dataset and Add Class Label

In [3]:
df = pd.read_csv('fbdh1.csv')

def label_fix(label):
    if label < -7.5:
        return 0
    elif label > 7.5:
        return 1
    else:
        return 2

df['Class'] = df['Flow'].apply(label_fix)

### Create Feature Matrix / Gather Training and Testing Data

In [4]:
# Create the Feature Matrix and Scale Features
X = df.drop('Class', axis = 1)
X.drop('Flow', axis = 1, inplace = True)

scaler = StandardScaler()
scaler.fit(X)
scaled_features = scaler.transform(X)
X = pd.DataFrame(scaled_features, columns = X.columns[:])

# Create the classification matrix
y = df['Class']

# Perform train test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

In [5]:
X_train.head()

Unnamed: 0,D1,D2,P1,P2,dD1,dD2,dP1,dP2
444,1.178579,-0.223814,1.192568,-0.283248,0.95724,0.136326,0.916936,0.280479
6593,-1.569557,-0.162236,-1.524327,-0.240768,-0.095965,-0.015353,-0.112203,0.010169
4568,1.512751,0.171241,1.676542,0.905516,-0.986103,-0.696264,-0.943607,-0.842346
3604,-0.638754,0.231635,-0.537954,0.388277,-1.471377,-0.593027,-1.500482,-0.792762
954,1.068579,0.245603,1.131168,0.541318,-0.334582,0.244668,-0.308558,0.402038


In [25]:
4664/53

88.0

In [27]:
# Reshape data for LSTM input
X_train_LSTM = X_train.values.reshape(88, 53, 8)

In [28]:
X_train_LSTM.shape

(88, 53, 8)

In [29]:
y_train_LSTM = y_train.values.reshape(88, 1)

ValueError: cannot reshape array of size 4664 into shape (88,1)

In [16]:
y_train_LSTM.shape

(4664, 1)

### Develop LSTM Model

In [11]:
LSTM = keras.Sequential()
LSTM.add(keras.layers.LSTM(175, input_shape = (1, 8)))
LSTM.add(keras.layers.Dropout(0.3))
LSTM.add(keras.layers.Dense(50, activation = tf.nn.relu))
LSTM.add(keras.layers.Dropout(0.4))
LSTM.add(keras.layers.Dense(3, activation = tf.nn.softmax))
LSTM.compile(loss = 'sparse_categorical_crossentropy', optimizer = 'adam', metrics = ['accuracy'])

LSTM.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_1 (LSTM)                (None, 175)               128800    
_________________________________________________________________
dropout_2 (Dropout)          (None, 175)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 50)                8800      
_________________________________________________________________
dropout_3 (Dropout)          (None, 50)                0         
_________________________________________________________________
dense_3 (Dense)              (None, 3)                 153       
Total params: 137,753
Trainable params: 137,753
Non-trainable params: 0
_________________________________________________________________


In [17]:
LSTM.fit(X_train_LSTM, y_train_LSTM, epochs = 50)

W0912 11:13:07.054593 4673078720 deprecation.py:323] From /Users/mikefurr/anaconda3/lib/python3.7/site-packages/tensorflow/python/ops/math_grad.py:1250: add_dispatch_support.<locals>.wrapper (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<tensorflow.python.keras.callbacks.History at 0x1a36c7d5c0>

### Evaluate Model on Testing Split

In [18]:
X_test_LSTM = X_test.values.reshape(2000, 1, 8)

predictions = LSTM.predict(X_test_LSTM)

final_pred = []
for score in range(0, len(predictions)):
    final_pred.append(np.argmax(predictions[score]))
    
print(classification_report(y_test, final_pred))

              precision    recall  f1-score   support

           0       0.95      0.92      0.93       837
           1       0.94      0.93      0.94       801
           2       0.73      0.80      0.76       362

   micro avg       0.90      0.90      0.90      2000
   macro avg       0.87      0.88      0.88      2000
weighted avg       0.91      0.90      0.90      2000



### Test on New Dataset

In [19]:
# Read in dataframe
test = pd.read_csv('test_data_2.csv')
test['Class'] = test['Flow'].apply(label_fix)

# Create the Feature Matrix and Scale Features
X_1 = test.drop('Class', axis = 1)
X_1.drop('Flow', axis = 1, inplace = True)

scaler = StandardScaler()
scaler.fit(X_1)
scaled_features = scaler.transform(X_1)
X_1 = pd.DataFrame(scaled_features, columns = X_1.columns[:])

# Create the classification matrix
y_1 = test['Class']

X_LSTM = X_1.values.reshape(5019, 1, 8)

In [20]:
test_predictions = LSTM.predict(X_LSTM)

final_pred_test = []
for score in range(0, len(test_predictions)):
    final_pred_test.append(np.argmax(test_predictions[score]))
    
print(classification_report(y_1, final_pred_test))

              precision    recall  f1-score   support

           0       0.87      0.90      0.88      2145
           1       0.87      0.89      0.88      2074
           2       0.73      0.64      0.68       800

   micro avg       0.85      0.85      0.85      5019
   macro avg       0.82      0.81      0.81      5019
weighted avg       0.85      0.85      0.85      5019

