#### Load and Resample Dataset

The code below loads the data from Google Drive and resamples the data with 30 Hz.

In [None]:
NORMAL_DATA_ID = "13A6usZ2Agu2aCqUg2Ye6cDUied5x51gq"
CRASH_DATA_ID = "1Os9E_WN8BaGKXA30kUrLn08dsnbM529h"

In [None]:
from urllib.request import urlopen
import numpy as np
import pandas as pd

load_data = lambda x: pd.read_csv(
    urlopen(f'https://drive.google.com/uc?export=download&id={x}')
).assign(
    timestamp=lambda x: pd.TimedeltaIndex(x['timestamp'], unit='ms')
).set_index(
    'timestamp'
).resample(
    '33ms'
).interpolate('linear').values

NORMAL_DATA = load_data(NORMAL_DATA_ID)
CRASH_DATA = load_data(CRASH_DATA_ID)

In [None]:
import math
HZ = 30
print(NORMAL_DATA.shape)
print(CRASH_DATA.shape)

print(math.floor(NORMAL_DATA.shape[0]/HZ-1))
print(math.floor(CRASH_DATA.shape[0]/HZ-1))

(18398, 6)
(18725, 6)
612
623


#### Split Data

Here, we will use 90% of the whole data as the training data and remainings as the test data.

In [None]:
import numpy as np
import random

random.seed(1004)

HZ = 30
X, y = [], []
X_train, y_train = [], []
X_test, y_test = [], []

for l, d, (secs_train, secs_test) in zip([0, 1], [NORMAL_DATA, CRASH_DATA], [(500, 600), (500, 600)]): # Label: 0 - Normal, 1 - Crash
    count = 0
    for i in np.arange(0, secs_test, 0.33):
        s, e = int(i * HZ), int((i + 1) * HZ)
        # print(s, e)
        X.append(d[s:e, :])
        count += 1
    y.append(np.repeat(l, count))

X, y = np.asarray(X), np.concatenate(y, axis=0)

train_idxs = []
test_idxs = []

train_idx = random.sample(range(y.shape[0]), (int) (y.shape[0] * 9 / 10))
train_idx = sorted(train_idx)

test_idx = list(set(range(y.shape[0])) - set(train_idx))
test_idx = sorted(test_idx)

X_train, X_test = X, X[test_idx]
y_train, y_test = y, y[test_idx]

print(f'X_train: {X_train.shape}')
print(f'X_test: {X_test.shape}')
print(f'y_train: {y_train.shape}')
print(f'y_test: {y_test.shape}')

X_train: (3638, 30, 6)
X_test: (364, 30, 6)
y_train: (3638,)
y_test: (364,)


#### Model Definition
Define the model, using tensorflow keras.

About the model structure, since the model extract features from data, it contains convolution layers.

---
```
_________________________________________________________________
 Layer (type)                Output Shape              Param #
=================================================================
 batch_normalization_3 (Batc  (None, 30, 6)            24
 hNormalization)

 conv1d_3 (Conv1D)           (None, 30, 32)            1568

 max_pooling1d_3 (MaxPooling  (None, 15, 32)           0
 1D)

 flatten_3 (Flatten)         (None, 480)               0

 dense_6 (Dense)             (None, 64)                30784

 dropout_3 (Dropout)         (None, 64)                0

 dense_7 (Dense)             (None, 1)                 65

=================================================================
Total params: 32,441
Trainable params: 32,429
Non-trainable params: 12
_________________________________________________________________
```

In [None]:
from tensorflow import keras

model = keras.models.Sequential([
      keras.layers.InputLayer(input_shape=(30, 6)),
      keras.layers.BatchNormalization(),
      keras.layers.Conv1D(
          filters=32, kernel_size=8, padding = "same",
          activation = keras.activations.relu,
          kernel_initializer=keras.initializers.HeNormal(seed=1004),
      ),
      keras.layers.MaxPooling1D(
          pool_size=2
      ),
      keras.layers.Flatten(),
      keras.layers.Dense(
          units = 64,
          activation = keras.activations.relu,
          kernel_initializer = keras.initializers.HeNormal(seed=1004)
      ),
      keras.layers.Dropout(
          rate = .5,
      ),
      keras.layers.Dense(
          units = 1,
          activation=keras.activations.sigmoid,
          kernel_initializer=keras.initializers.GlorotUniform(seed=1004)
      )
  ])

#### Model Training
Now compile and train the defined model.

As our model classifies the crash, we will use a binary crossentropy  as the loss function.

In [None]:
from tensorflow import keras

model.compile(
    loss=keras.losses.BinaryCrossentropy (),
    optimizer=keras.optimizers.Adam(),
    metrics=[
            keras.metrics.BinaryAccuracy(),
    ]
)

model.fit(
    x=X_train,
    y=y_train,
    validation_data=(X_test, y_test),
    batch_size=64,
    epochs=2,
)

Epoch 1/2
Epoch 2/2


<keras.callbacks.History at 0x7f0f112352a0>

#### Evaluation
Now using the test data, evaluate our model.

In [None]:
from sklearn.metrics import accuracy_score

y_pred = np.argmax(model.predict(X_test), axis=1)
y_pred = np.round(model.predict(X_test))
acc = accuracy_score(y_test, y_pred)

print(f'Accuracy: {acc * 100:.2f} %.')

Accuracy: 99.73 %.


#### Deployment
Now with the tensorflow, deploy our model as .tf model.

In [None]:
import tensorflow as tf

tf.saved_model.save(model, "saved_model_keras_dir")

# Convert the model
converter = tf.lite.TFLiteConverter.from_saved_model("saved_model_keras_dir") # path to the SavedModel directory
tflite_model = converter.convert()

# Save the model.
with open('model.tflite', 'wb') as f:
  f.write(tflite_model)



#### K-Fold Validation (K=10)

Here, we will use data for first {x} jumps as training data and remainings as test data.

In [None]:
import numpy as np
import random

random.seed(1004)

HZ = 30
X, y = [], []
X_train, y_train = [], []
X_test, y_test = [], []

for l, d, (secs_train, secs_test) in zip([0, 1], [NORMAL_DATA, CRASH_DATA], [(500, 600), (500, 600)]): # Label: 0 - Normal, 1 - Crash
    count = 0
    for i in np.arange(0, secs_test, 0.33):
        s, e = int(i * HZ), int((i + 1) * HZ)
        # print(s, e)
        X.append(d[s:e, :])
        count += 1
    y.append(np.repeat(l, count))

X, y = np.asarray(X), np.concatenate(y, axis=0)

train_idxs = []
test_idxs = []

for fold in range(10):
    mother_set = set(range(y.shape[0]))
    for test_idx in test_idxs:
      mother_set -= set(test_idx)

    test_idx = random.sample(list(mother_set), (int) (y.shape[0] / 10))
    test_idx = sorted(test_idx)

    train_idx = list(set(range(y.shape[0])) - set(test_idx))
    train_idx = sorted(train_idx)

    train_idxs.append(train_idx)
    test_idxs.append(test_idx)

In [None]:
from tensorflow import keras
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

accs = []
precisions = []
recalls = []
f1s = []

for fold_num, (train_idx, test_idx) in enumerate(zip(train_idxs, test_idxs)):
  X_train, X_test = X[train_idx], X[test_idx]
  y_train, y_test = y[train_idx], y[test_idx]

  print(f'X_train: {X_train.shape}')
  print(f'X_test: {X_test.shape}')
  print(f'y_train: {y_train.shape}')
  print(f'y_test: {y_test.shape}')

  model = keras.models.Sequential([
      keras.layers.InputLayer(input_shape=(30, 6)),
      keras.layers.BatchNormalization(),
      keras.layers.Conv1D(
          filters=32, kernel_size=8, padding = "same",
          activation = keras.activations.relu,
          kernel_initializer=keras.initializers.HeNormal(seed=1004),
      ),
      keras.layers.MaxPooling1D(
          pool_size=2
      ),
      keras.layers.Flatten(),
      keras.layers.Dense(
          units = 64,
          activation = keras.activations.relu,
          kernel_initializer = keras.initializers.HeNormal(seed=1004)
      ),
      keras.layers.Dropout(
          rate = .5,
      ),
      keras.layers.Dense(
          units = 1,
          activation=keras.activations.sigmoid,
          kernel_initializer=keras.initializers.GlorotUniform(seed=1004)
      )
  ])

  model.compile(
    loss=keras.losses.BinaryCrossentropy (),
    optimizer=keras.optimizers.Adam(),
    metrics=[
            keras.metrics.BinaryAccuracy(),
    ]
  )

  model.fit(
      x=X_train,
      y=y_train,
      validation_data=(X_test, y_test),
      batch_size=64,
      epochs=2,
  )

  y_pred = np.argmax(model.predict(X_test), axis=1)
  y_pred = np.round(model.predict(X_test))
  acc, precision, recall, f1 = accuracy_score(y_test, y_pred), precision_score(y_test, y_pred), recall_score(y_test, y_pred), f1_score(y_test, y_pred)

  print(f'[{fold_num}] Accuracy: {acc * 100:.2f} %.')
  print(f'[{fold_num}] Precision: {precision * 100:.2f} %.')
  print(f'[{fold_num}] Recall: {recall * 100:.2f} %.')
  print(f'[{fold_num}] F1: {f1 * 100:.2f} %.')

  accs.append(acc)
  precisions.append(precision)
  recalls.append(recall)
  f1s.append(f1)

X_train: (3275, 30, 6)
X_test: (363, 30, 6)
y_train: (3275,)
y_test: (363,)
Epoch 1/2
Epoch 2/2
[0] Accuracy: 99.45 %.
[0] Precision: 100.00 %.
[0] Recall: 98.91 %.
[0] F1: 99.45 %.
X_train: (3275, 30, 6)
X_test: (363, 30, 6)
y_train: (3275,)
y_test: (363,)
Epoch 1/2
Epoch 2/2
[1] Accuracy: 99.72 %.
[1] Precision: 100.00 %.
[1] Recall: 99.50 %.
[1] F1: 99.75 %.
X_train: (3275, 30, 6)
X_test: (363, 30, 6)
y_train: (3275,)
y_test: (363,)
Epoch 1/2
Epoch 2/2
[2] Accuracy: 99.72 %.
[2] Precision: 100.00 %.
[2] Recall: 99.45 %.
[2] F1: 99.72 %.
X_train: (3275, 30, 6)
X_test: (363, 30, 6)
y_train: (3275,)
y_test: (363,)
Epoch 1/2
Epoch 2/2
[3] Accuracy: 99.72 %.
[3] Precision: 100.00 %.
[3] Recall: 99.38 %.
[3] F1: 99.69 %.
X_train: (3275, 30, 6)
X_test: (363, 30, 6)
y_train: (3275,)
y_test: (363,)
Epoch 1/2
Epoch 2/2
[4] Accuracy: 100.00 %.
[4] Precision: 100.00 %.
[4] Recall: 100.00 %.
[4] F1: 100.00 %.
X_train: (3275, 30, 6)
X_test: (363, 30, 6)
y_train: (3275,)
y_test: (363,)
Epoch 1/2
E

#### Evaluation
Now with K-Fold validation data, evaluate our model.

In [None]:
tot_acc, tot_precision, tot_recall, tot_f1 = 0.0, 0.0, 0.0, 0.0
cnt = 0

print("Accuracy | Precision | Recall | F1")

for fold_num, (acc, precision, reacll, f1) in enumerate(zip(accs, precisions, recalls, f1s)):
  cnt += 1
  tot_acc += acc
  tot_precision += precision
  tot_recall += recall
  tot_f1 += f1

  print(f"[{fold_num}] {acc * 100:.2f} %, {precision * 100:.2f} %, {recall * 100:.2f} %, {f1 * 100:.2f} %")

tot_acc /= cnt
tot_precision /= cnt
tot_recall /= cnt
tot_f1 /= cnt

print(f"[Total] {tot_acc * 100:.2f} %, {tot_precision * 100:.2f} %, {tot_recall * 100:.2f} %, {tot_f1 * 100:.2f} %")

Accuracy | Precision | Recall | F1
[0] 99.45 %, 100.00 %, 100.00 %, 99.45 %
[1] 99.72 %, 100.00 %, 100.00 %, 99.75 %
[2] 99.72 %, 100.00 %, 100.00 %, 99.72 %
[3] 99.72 %, 100.00 %, 100.00 %, 99.69 %
[4] 100.00 %, 100.00 %, 100.00 %, 100.00 %
[5] 98.90 %, 100.00 %, 100.00 %, 98.86 %
[6] 98.62 %, 100.00 %, 100.00 %, 98.66 %
[7] 100.00 %, 100.00 %, 100.00 %, 100.00 %
[8] 99.72 %, 100.00 %, 100.00 %, 99.73 %
[9] 100.00 %, 100.00 %, 100.00 %, 100.00 %
[Total] 99.59 %, 100.00 %, 100.00 %, 99.59 %
