<a href="https://colab.research.google.com/github/NachoDave/Automated-Vehicle-Anomalous-Driving-Behaviour/blob/main/CAMs_Autoencoder_project_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Autoencoder project

## What do we need to do?


1.   Get and format the data
2.   Split the data into training, test and validation sets - initially will just add the data one measure at a time, but may need to combine them somehow
3. Define the model
4. Crea



### Import Libraries and connect to drive

In [9]:
import os
import pandas as pd
import numpy as np

import tensorflow as tf
from tensorflow.keras import layers, models
from sklearn.model_selection import train_test_split

from google.colab import drive

drive.mount('/content/drive')
dataset_dir = '/content/drive/MyDrive/datasets/car_hacking'

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


### Load the data

In [10]:
# Load standard dataset
rpm_df = pd.read_csv(f"{dataset_dir}/RPM_dataset.csv", header=None)
rpm_df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11
0,1478191000.0,0316,8,05,22,68,09,22,20,00,75,R
1,1478191000.0,018f,8,fe,3b,00,00,00,3c,00,00,R
2,1478191000.0,0260,8,19,22,22,30,ff,8f,6e,3f,R
3,1478191000.0,02a0,8,60,00,83,1d,96,02,bd,00,R
4,1478191000.0,0329,8,dc,b8,7e,14,11,20,00,14,R


### Format the data

In [12]:
def format_CAN_bus_data(df:pd.DataFrame) -> pd.DataFrame:
  # Rows with only two bytes have nans
  df.replace(to_replace = np.nan, value = '00', inplace=True) # replace the NaNs
  df[5].replace(to_replace = 'R', value = '00', inplace=True) # replace the R Values in the 5 column for the 2 byte channels

  # Get the bus Ids and convert to numbers then to one hot encoding
  bus_id = df[1].apply(int,base=16)
  bus_id_one_hot = pd.get_dummies(bus_id).astype(int)

  # Get the payload and convert to int
  payload = df.iloc[:,3:11].applymap(int,base=16)

  # Concat the bus_id and payload to give the final data frame
  return pd.concat([bus_id_one_hot, payload], axis=1)

rpm_df = format_CAN_bus_data(rpm_df)
rpm_df.head()

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df[5].replace(to_replace = 'R', value = '00', inplace=True) # replace the R Values in the 5 column for the 2 byte channels
  payload = df.iloc[:,3:11].applymap(int,base=16)


Unnamed: 0,2,160,161,304,305,320,339,399,497,608,...,1520,1680,3,4,5,6,7,8,9,10
0,0,0,0,0,0,0,0,0,0,0,...,0,0,5,34,104,9,34,32,0,117
1,0,0,0,0,0,0,0,1,0,0,...,0,0,254,59,0,0,0,60,0,0
2,0,0,0,0,0,0,0,0,0,1,...,0,0,25,34,34,48,255,143,110,63
3,0,0,0,0,0,0,0,0,0,0,...,0,0,96,0,131,29,150,2,189,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,220,184,126,20,17,32,0,20


### Get data ready for loading

In [13]:
X = rpm_df.values.astype("float32")   # your dataframe of 34 features
X_train, X_temp = train_test_split(X, test_size=0.3, random_state=42)
X_val, X_test = train_test_split(X_temp, test_size=0.5, random_state=42)

In [18]:
X_train.shape

(3235191, 34)

### Create the autoencoder

In [17]:
input_dim = X_train.shape[1]   # should be 34
encoding_dim = 16              # latent size (tune this)

input_layer = layers.Input(shape=(input_dim,)) # this is the
encoder = layers.Dense(64, activation="relu")(input_layer)
encoder = layers.Dense(32, activation="relu")(encoder)
latent = layers.Dense(encoding_dim, activation="relu")(encoder)

decoder = layers.Dense(32, activation="relu")(latent)
decoder = layers.Dense(64, activation="relu")(decoder)
output_layer = layers.Dense(input_dim, activation="sigmoid")(decoder)

autoencoder = models.Model(inputs=input_layer, outputs=output_layer)
autoencoder.compile(optimizer="adam", loss="mse")
autoencoder.summary()

In [1]:
X_train[0:100]

NameError: name 'X_train' is not defined

### Train the model

In [19]:
callback = tf.keras.callbacks.EarlyStopping(monitor="val_loss", patience=5, restore_best_weights=True)

history = autoencoder.fit(
    X_train[0:100], X_train[0:1000],
    epochs=5,
    batch_size=128,
    shuffle=True,
    validation_data=(X_val[0:1000], X_val[0:1000]),
    callbacks=[callback]
)

Epoch 1/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 24s/step - loss: 2485.3850 - val_loss: 2219.8584
Epoch 2/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 40s/step - loss: 2483.9094 - val_loss: 2218.8113
Epoch 3/50


KeyboardInterrupt: 