<a href="https://colab.research.google.com/github/Haakam21/unbump-api/blob/master/model/LSTM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import auth
auth.authenticate_user()

import gspread
from oauth2client.client import GoogleCredentials

spreadsheets = gspread.authorize(GoogleCredentials.get_application_default())

In [None]:
sheet = spreadsheets.open('Pothole Data').sheet1
rows = sheet.get_all_values()
columns = rows.pop(0)

import pandas as pd

features = ['acc_y', 'acc_z', 'acc_dy', 'acc_dz']#, 'spd']
target = 'pothole'

df = pd.DataFrame(rows, columns=columns)
print(df)

In [None]:
data = df[features].astype('float64').to_numpy()
targets = df[target].astype('int64').to_numpy()


window_size = 10
train_split = 8000
batch_size = 256


from tensorflow.keras import preprocessing

train_data_gen = preprocessing.sequence.TimeseriesGenerator(data, targets, window_size, start_index=0, end_index=train_split, shuffle=False, batch_size=batch_size)
val_data_gen = preprocessing.sequence.TimeseriesGenerator(data, targets, window_size, start_index=train_split, end_index=None, shuffle=False, batch_size=batch_size)


In [None]:
from tensorflow.keras import models, layers

model = models.Sequential([
    layers.LSTM(64, return_sequences=True, input_shape=(window_size, len(features))),
    layers.LSTM(32, return_sequences=True),
    layers.LSTM(16),
    layers.Dense(1, activation='sigmoid')
])

import tensorflow.keras.backend as K

def f1_loss(y_true, y_pred):
    
    tp = K.sum(K.cast(y_true*y_pred, 'float'), axis=0)
    tn = K.sum(K.cast((1-y_true)*(1-y_pred), 'float'), axis=0)
    fp = K.sum(K.cast((1-y_true)*y_pred, 'float'), axis=0)
    fn = K.sum(K.cast(y_true*(1-y_pred), 'float'), axis=0)

    p = tp / (tp + fp + K.epsilon())
    r = tp / (tp + fn + K.epsilon())

    f1 = 2*p*r / (p+r+K.epsilon())
    f1 = tf.where(tf.math.is_nan(f1), tf.zeros_like(f1), f1)
    return 1 - K.mean(f1)

def f1(y_true, y_pred):
    y_pred = K.round(y_pred)
    tp = K.sum(K.cast(y_true*y_pred, 'float'), axis=0)
    tn = K.sum(K.cast((1-y_true)*(1-y_pred), 'float'), axis=0)
    fp = K.sum(K.cast((1-y_true)*y_pred, 'float'), axis=0)
    fn = K.sum(K.cast(y_true*(1-y_pred), 'float'), axis=0)

    p = tp / (tp + fp + K.epsilon())
    r = tp / (tp + fn + K.epsilon())

    f1 = 2*p*r / (p+r+K.epsilon())
    f1 = tf.where(tf.math.is_nan(f1), tf.zeros_like(f1), f1)
    return K.mean(f1)

import tensorflow.keras.metrics as metrics

model.compile(optimizer='adam', loss=f1_loss, metrics=[f1, 'accuracy', metrics.AUC()])

model.summary()

epochs = 50
train = model.fit(train_data_gen, validation_data=val_data_gen, epochs=epochs)

In [None]:
history = pd.DataFrame(train.history)

from matplotlib import pyplot as plt

plt.figure(figsize=(8, 8))

plt.subplot(2, 2, 1)
plt.plot(history['loss'], label='train')
plt.plot(history['val_loss'], label='val')
plt.xlabel('epoch')
plt.legend(loc='upper right')
plt.title('loss')

plt.subplot(2, 2, 2)
plt.plot(history['f1'], label='train')
plt.plot(history['val_f1'], label='val')
plt.xlabel('epoch')
plt.legend(loc='lower right')
plt.title('f1')

plt.subplot(2, 2, 3)
plt.plot(history['accuracy'], label='train')
plt.plot(history['val_accuracy'], label='val')
plt.xlabel('epoch')
plt.legend(loc='lower right')
plt.title('accuracy')

plt.subplot(2, 2, 4)
plt.plot(history['auc'], label='train')
plt.plot(history['val_auc'], label='val')
plt.xlabel('epoch')
plt.legend(loc='upper right')
plt.title('auc')

plt.tight_layout()
plt.show()

In [None]:
train_data = data[:train_split]
train_targets = targets[:train_split]

val_data = data[train_split:]
val_targets = targets[train_split:]

from imblearn.over_sampling import SMOTE
#NOW TRYING THIS WITH OVERSAMPLING THE DATA
sm = SMOTE(random_state=12, ratio = 1)
SMOTE_data, SMOTE_targets = sm.fit_sample(train_data, train_targets)

train_data_SMOTE = preprocessing.sequence.TimeseriesGenerator(SMOTE_data, SMOTE_targets, window_size, start_index=0, end_index=None, shuffle=False, batch_size=batch_size)
val_data_SMOTE = preprocessing.sequence.TimeseriesGenerator(val_data, val_targets, window_size, start_index=0, end_index=None, shuffle=False, batch_size=batch_size)



countPotholePos = 0
countPotholeNeg = 0
for indexForPotCount in range(len(y_res)):
  if y_res[indexForPotCount] < 0.5:
    countPotholeNeg = countPotholeNeg + 1
  else:
    countPotholePos = countPotholePos + 1
print('number of negative potholes is', countPotholeNeg)
print('number of positive potholes is', countPotholePos)
print('Percentage of potholes data that are have a pothole', (countPotholePos)/(countPotholeNeg + countPotholePos) * 100)

epochs = 50
train = model.fit(train_data_SMOTE, validation_data=val_data_SMOTE, epochs=epochs)

In [None]:
history = pd.DataFrame(train.history)

print(history)

from matplotlib import pyplot as plt

plt.figure(figsize=(8, 8))

plt.subplot(2, 2, 1)
plt.plot(history['loss'], label='train')
plt.plot(history['val_loss'], label='val')
plt.xlabel('epoch')
plt.legend(loc='upper right')
plt.title('loss')

plt.subplot(2, 2, 2)
plt.plot(history['f1'], label='train')
plt.plot(history['val_f1'], label='val')
plt.xlabel('epoch')
plt.legend(loc='lower right')
plt.title('f1')

plt.subplot(2, 2, 3)
plt.plot(history['accuracy'], label='train')
plt.plot(history['val_accuracy'], label='val')
plt.xlabel('epoch')
plt.legend(loc='lower right')
plt.title('accuracy')

plt.subplot(2, 2, 4)
plt.plot(history[history.keys()[3]], label='train')
plt.plot(history[history.keys()[7]], label='val')
plt.xlabel('epoch')
plt.legend(loc='upper right')
plt.title('auc')

plt.tight_layout()
plt.show()