In [1]:
from trajectorylib.ml.model import CNN
from trajectorylib.ml.data_process import SlidingWindowDataProcessor
from trajectorylib.ml.trainer import SlidingWindowTrainer
from sklearn.metrics import accuracy_score

In [2]:
import os
import random
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import ReduceLROnPlateau

## Loading Data

In [3]:
data_folder = '/home/trijya/Dataset/DrivingData/'

In [4]:
import os
import random

data_folder = '/home/trijya/Dataset/DrivingData/'

# Get a list of all CSV files in the data folder with full paths
csv_files = [os.path.join(data_folder, f) for f in os.listdir(data_folder) if f.endswith('.csv')]

# Select 80% of the files randomly
training_set = random.sample(csv_files, int(len(csv_files) * 0.8))

print("Training set:")
print(training_set)

print("Number of files in training set:", len(training_set))
print("Total number of CSV files:", len(csv_files))

Training set:
['/home/trijya/Dataset/DrivingData/2020-06-19-11-44-25_2T3MWRFVXLW056972_CAN_Messages_20Hz.csv', '/home/trijya/Dataset/DrivingData/2020-07-25-10-03-01_2T3MWRFVXLW056972_CAN_Messages_20Hz.csv', '/home/trijya/Dataset/DrivingData/2020-06-19-11-57-48_2T3MWRFVXLW056972_CAN_Messages_20Hz.csv', '/home/trijya/Dataset/DrivingData/2020-07-25-15-25-40_2T3MWRFVXLW056972_CAN_Messages_20Hz.csv', '/home/trijya/Dataset/DrivingData/2020-07-19-13-58-00_2T3MWRFVXLW056972_CAN_Messages_20Hz.csv', '/home/trijya/Dataset/DrivingData/2020-07-25-19-22-30_2T3MWRFVXLW056972_CAN_Messages_20Hz.csv', '/home/trijya/Dataset/DrivingData/2020-07-26-18-54-32_2T3Y1RFV8KC014025_CAN_Messages_20Hz.csv', '/home/trijya/Dataset/DrivingData/2020-07-22-09-36-00_2T3MWRFVXLW056972_CAN_Messages_20Hz.csv', '/home/trijya/Dataset/DrivingData/2020-07-17-10-00-46_2T3MWRFVXLW056972_CAN_Messages_20Hz.csv', '/home/trijya/Dataset/DrivingData/2020-07-08-12-55-56_2T3MWRFVXLW056972_CAN_Messages_20Hz.csv', '/home/trijya/Dataset/Dri

In [5]:
# Get the remaining files for test set
test_set = [f for f in csv_files if f not in training_set]
print("Test set:")
print(test_set)

Test set:
['/home/trijya/Dataset/DrivingData/2020-07-19-14-31-15_2T3MWRFVXLW056972_CAN_Messages_20Hz.csv', '/home/trijya/Dataset/DrivingData/2020-07-27-07-20-11_2T3MWRFVXLW056972_CAN_Messages_20Hz.csv', '/home/trijya/Dataset/DrivingData/2020-07-18-13-18-27_2T3Y1RFV8KC014025_CAN_Messages_20Hz.csv', '/home/trijya/Dataset/DrivingData/2020-07-10-11-04-30_2T3MWRFVXLW056972_CAN_Messages_20Hz.csv', '/home/trijya/Dataset/DrivingData/2020-07-08-14-55-56_2T3MWRFVXLW056972_CAN_Messages_20Hz.csv', '/home/trijya/Dataset/DrivingData/2020-07-15-10-41-40_2T3Y1RFV8KC014025_CAN_Messages_20Hz.csv', '/home/trijya/Dataset/DrivingData/2020-07-19-14-01-00_2T3MWRFVXLW056972_CAN_Messages_20Hz.csv']


## Preparing data for the model

In [6]:
window_size = 6
dataprocessor = SlidingWindowDataProcessor(training_set, window_size=window_size)
dataprocessor.process_file_list()

In [7]:
# Check for GPU availability
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [8]:
# Create an instance of the CNN model
model = CNN(window_size=window_size, dropout_rate=0.2, n_features=dataprocessor.all_X[0].shape[1], n_filters=32, n_fc_unit = 64)

Window Size: 6
Number of input feature: 2


In [9]:
model

CNN(
  (conv1): Conv2d(1, 32, kernel_size=(2, 2), stride=(1, 1))
  (pool): MaxPool2d(kernel_size=(1, 1), stride=(1, 1), padding=0, dilation=1, ceil_mode=False)
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (fc1): Linear(in_features=160, out_features=64, bias=True)
  (dropout): Dropout(p=0.2, inplace=False)
  (fc2): Linear(in_features=64, out_features=1, bias=True)
  (sigmoid): Sigmoid()
)

## Training the model

In [10]:
result_folder = "acc_cnn_training"

In [11]:
trainer = SlidingWindowTrainer(result_folder=result_folder, 
                               model = model, 
                               device = device,
                               data_processor=dataprocessor)

In [12]:
len(trainer.data_processor.all_X)

302265

In [13]:
trainer.data_processor.all_X[0].shape

(6, 2)

In [14]:
# Define Loss and optimizer
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
#Define Learning Rate Scheduler
scheduler = ReduceLROnPlateau(optimizer, mode = 'min', factor=0.1, patience=10, min_lr=1e-6)

In [15]:
trainer.train(criterion=criterion, optimizer=optimizer, scheduler=scheduler, num_epochs=40)

Shape of X before reshaping: (302265, 6, 2)
Shape of Y before reshaping: (302265,)
Current learning rate: 0.001000
Epoch [1/40], Loss: 0.3539, Accuracy: 0.8720, Val Loss: 0.3404, Val Accuracy: 0.8695
Current learning rate: 0.001000
Epoch [2/40], Loss: 0.3368, Accuracy: 0.8724, Val Loss: 0.3334, Val Accuracy: 0.8695
Current learning rate: 0.001000
Epoch [3/40], Loss: 0.3324, Accuracy: 0.8725, Val Loss: 0.3278, Val Accuracy: 0.8695
Current learning rate: 0.001000
Epoch [4/40], Loss: 0.3301, Accuracy: 0.8725, Val Loss: 0.3302, Val Accuracy: 0.8695
Current learning rate: 0.001000
Epoch [5/40], Loss: 0.3287, Accuracy: 0.8725, Val Loss: 0.3283, Val Accuracy: 0.8695
Current learning rate: 0.001000
Epoch [6/40], Loss: 0.3274, Accuracy: 0.8726, Val Loss: 0.3273, Val Accuracy: 0.8700
Current learning rate: 0.001000
Epoch [7/40], Loss: 0.3269, Accuracy: 0.8727, Val Loss: 0.3269, Val Accuracy: 0.8696
Current learning rate: 0.001000
Epoch [8/40], Loss: 0.3264, Accuracy: 0.8729, Val Loss: 0.3291, Va

## Making inferences on unseen datasets

In [16]:
testdata_processor = SlidingWindowDataProcessor(test_set, window_size=window_size)
testdata_processor.process_file_list()
trainer.predict(testdata_processor)

Shape of X before reshaping: (76494, 6, 2)
Shape of Y before reshaping: (76494,)


  self.model.load_state_dict(torch.load(self.torch_model_path))


Shape of X before reshaping: (76494, 6, 2)
Shape of Y before reshaping: (76494,)
Test Loss: 0.3798, Accuracy: 0.6993


0.6992966768635449

## Making inferences on unseen datasets by loading ONNX

In [17]:
trainer.predict_onnx(testdata_processor)

Shape of X before reshaping: (76494, 6, 2)
Shape of Y before reshaping: (76494,)
Test Loss: 0.3798, Accuracy: 0.6993


0.6992966768635449