See https://blog.goodaudience.com/introduction-to-1d-convolutional-neural-networks-in-keras-for-time-sequences-3a7ff801a2cf for more information about 1d convolutions and time series classification

In [None]:
# Install packages
!pip install samplerate keras-tuner

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting samplerate
  Downloading samplerate-0.1.0-py2.py3-none-any.whl (4.0 MB)
[K     |████████████████████████████████| 4.0 MB 5.9 MB/s 
[?25hCollecting keras-tuner
  Downloading keras_tuner-1.1.2-py3-none-any.whl (133 kB)
[K     |████████████████████████████████| 133 kB 55.1 MB/s 
Collecting kt-legacy
  Downloading kt_legacy-1.0.4-py3-none-any.whl (9.6 kB)
Installing collected packages: kt-legacy, samplerate, keras-tuner
Successfully installed keras-tuner-1.1.2 kt-legacy-1.0.4 samplerate-0.1.0


In [1]:
# Check if tensorflow finds the runtime GPU
import tensorflow as tf


print(tf.__version__)
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
  print('GPU device not found')
else:
  print('Found GPU at: {}'.format(device_name))

2.8.2
Found GPU at: /device:GPU:0


In [None]:
# Clone the repository

Cloning into 'fc_crash-detection'...
remote: Enumerating objects: 18779, done.[K
remote: Counting objects: 100% (30/30), done.[K
remote: Compressing objects: 100% (26/26), done.[K
remote: Total 18779 (delta 13), reused 9 (delta 4), pack-reused 18749[K
Receiving objects: 100% (18779/18779), 2.00 GiB | 22.35 MiB/s, done.
Resolving deltas: 100% (835/835), done.
Checking out files: 100% (16051/16051), done.


In [None]:
# Mount Google Drive
from google.colab import drive


drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# Set params
dataset_path = '/content/fc_crash-detection/crash-dataset-csv/infobox'
results_path = '/content/drive/MyDrive'
model_name = 'infobox_xy_1200_hyperparameters_1' # In results_path
test_split = 0.05
validation_split = 0.1
max_epoch = 1000
min_delta = 0
patience = 100

In [None]:
# Define some functions
from matplotlib import pyplot as plt
from tqdm.notebook import tqdm
import numpy as np
import pandas as pd
import samplerate
import glob
import os


def preprocess_infobox(entry):
  # Decide hardware version based on recording lenght
  # rev 1
  if (len(entry)) > 3100:
    # Get records for which sampling freq is 400Hz
    # Get only col 4 and 5 (filtered x and y accelerometers)
    #loaded_entry = entry.values[500:2900,4:6]
    # Make infobox sample similar to microbox
    loaded_entry = entry.values[900:2100,4:6]

  # rev 2,3
  else:
    if (len(entry)) > 3050:
      loaded_entry = entry.values[59:3059,4:6]
    else:
      loaded_entry = entry.values[0:3000,4:6]

    # All records have sampling freq of 1000Hz
    # Get only col 4 and 5 (filtered x and y accelerometers)
    #loaded_entry = entry.values[59:3059,4:6]
    
    # Resample 1000Hz to 400Hz -> ratio = out_freq / in_freq
    ratio = 0.4
    converter = 'sinc_best'  #or 'sinc_fastest', ...
    loaded_entry = samplerate.resample(loaded_entry, ratio, converter)

    # Pad matrix length difference with the last recorded value for each axis with mode = 'edge'
    #diff = 2400 - len(loaded_entry)
    # Pad_width format for 2d vectors: ((top, bottom), (left, right))
    #loaded_entry = np.pad(loaded_entry, ((0,diff),(0,0)), mode = 'edge')

  # 3000 length recording have cleaner sensor data!!

  return loaded_entry


def get_dataset(path):

  entry_list = []

  files_list = glob.glob(os.path.join(path, "*.csv"))
  # Load every csv inside path as a numpy matrix, preprocess and create a list
  for filename in tqdm(files_list):
    ds_entry = pd.read_csv(filename, index_col=None, header=0)
    ds_entry_processed = preprocess_infobox(ds_entry)
    entry_list.append(ds_entry_processed)
  
  # Convert list to a numpy array. Cast to float32 to avoid errors on fit
  ds_array = np.asarray(entry_list).astype('float32')

  return ds_array

In [None]:
# Load data
import numpy as np


ds_false = get_dataset(dataset_path + '/false')
ds_true = get_dataset(dataset_path + '/true')

# Build all-false labels (one hot encoding, two classes, two columns)
ds_false_labels = np.zeros((len(ds_false),2))
# Set all first column to one
ds_false_labels[:,0] = 1
print("False-class data:")
print(ds_false.shape)
print(ds_false_labels.shape)

# Build all-true labels (one hot encoding, two classes, two columns)
ds_true_labels = np.zeros((len(ds_true),2))
# Set all second column to one
ds_true_labels[:,1] = 1
print("\nTrue-class data:")
print(ds_true.shape)
print(ds_true_labels.shape)

# Concatenate false-class/true-class data and labels
ds_all = np.concatenate((ds_false, ds_true), axis = 0)
ds_all_labels = np.concatenate((ds_false_labels, ds_true_labels), axis = 0)
print("\nAll data:")
print(ds_all.shape)
print(ds_all_labels.shape)
del ds_false, ds_true, ds_false_labels, ds_true_labels

  0%|          | 0/3045 [00:00<?, ?it/s]

  0%|          | 0/3045 [00:00<?, ?it/s]

False-class data:
(3045, 1200, 2)
(3045, 2)

True-class data:
(3045, 1200, 2)
(3045, 2)

All data:
(6090, 1200, 2)
(6090, 2)


In [None]:
# Create train and test splits
from sklearn.model_selection import train_test_split


X_train, X_test, y_train, y_test = train_test_split(ds_all, ds_all_labels, test_size = test_split)

print("Train split:")
print(X_train.shape)
print(y_train.shape)

print("\nTest split:")
print(X_test.shape)
print(y_test.shape)

Train split:
(5785, 1200, 2)
(5785, 2)

Test split:
(305, 1200, 2)
(305, 2)


In [None]:
# Create the model
import tensorflow as tf
from tensorflow.keras.layers import Activation,Dense,Dropout,Conv1D,Flatten,MaxPooling1D,GlobalAveragePooling1D


def model_builder(hp):

  # Hypermodel definition
  model = tf.keras.Sequential()

  input_shape=(X_train.shape[1],X_train.shape[2])

  model.add(Conv1D(hp.Int('conv1d1', min_value=100, max_value=120, step=10), hp.Int('conv1d1_size', min_value=10, max_value=20, step=10), activation='relu', input_shape=input_shape)) #120, 20
  #model.add(Conv1D(100, 10, activation='relu'))
  model.add(MaxPooling1D(hp.Int('maxpooling', min_value=3, max_value=20, step=1)))
  model.add(Dropout(0.5))
  model.add(Conv1D(hp.Int('conv1d2', min_value=50, max_value=100, step=10), hp.Int('conv1d2_size', min_value=5, max_value=10, step=5), activation='relu')) #100, 10
  #model.add(Conv1D(80, 10, activation='relu'))
  model.add(GlobalAveragePooling1D())
  model.add(Dropout(0.5))
  model.add(Dense(y_train.shape[1], activation='softmax'))

  # Define optimizer, learning rate, and compile model
  initial_learning_rate = 0.001
  lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
      initial_learning_rate,
      decay_steps=100000,
      decay_rate=0.96,
      staircase=True)

  model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=lr_schedule),
                loss=tf.keras.losses.BinaryCrossentropy(),
                metrics=['accuracy'])

  return model

In [None]:
# Define or reload tuner
import keras_tuner as kt


tuner = kt.Hyperband(model_builder,
                     objective='val_accuracy',
                     max_epochs=max_epoch,
                     factor=3,
                     directory=results_path,
                     project_name=model_name)

INFO:tensorflow:Reloading Oracle from existing project /content/drive/MyDrive/infobox_xy_1200_hyperparameters_1/oracle.json
INFO:tensorflow:Reloading Tuner from /content/drive/MyDrive/infobox_xy_1200_hyperparameters_1/tuner0.json


In [None]:
# Search hyperparameters
import tensorflow as tf


early = tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', min_delta=min_delta, patience=patience, verbose=1, mode='auto')

tuner.search(X_train, y_train, epochs=1000, validation_split=validation_split, callbacks=[early])

print('The hyperparameter search is complete')

Trial 1312 Complete [00h 00m 11s]
val_accuracy: 0.8583765029907227

Best val_accuracy So Far: 0.9412780404090881
Total elapsed time: 00h 54m 32s
INFO:tensorflow:Oracle triggered exit
The hyperparameter search is complete


In [None]:
# Get the optimal hyperparameters
best_hps=tuner.get_best_hyperparameters(num_trials=1)[0]

print(f'''Best hyperparameters:\n
conv1d1: {best_hps.get('conv1d1')}
conv1d1_size: {best_hps.get('conv1d1_size')}
max_pooling: {best_hps.get('maxpooling')}
conv1d2: {best_hps.get('conv1d2')}
conv1d2_size: {best_hps.get('conv1d2_size')}''')

Best hyperparameters:

conv1d1: 110
conv1d1_size: 20
max_pooling: 16
conv1d2: 100
conv1d2_size: 10
