# Driver Aggression Neural Network (DANN)

Driver Aggression Neural Network is assigning an aggression value to a sorted set of sensory data. Driving is simulated in BeamNG v0.27 using their BeamNGpy open-source library.

In [151]:
import pandas as pd

parquet_file_path = 'imu_data_2023_04_30_19_56_06.parquet'
data = pd.read_parquet(parquet_file_path)

## Modify training data

- Group together data recorded from the same sensor
- Take around 100-1000 recorded data without the aggression values
- Make aggression values the label of the dataset
- Create a lot of training data by chunking the sorted (by timestamp) records.

In [152]:
import numpy as np

# Assuming 'data' is your Scheme
# data = pd.DataFrame(columns=[
#     'imuId',
#     'vehicleAggression',
#     'time',
#     'pos',
#     'dirX',
#     'dirY',
#     'dirZ',
#     'angVel',
#     'angAccel',
#     'mass',
#     'accRaw',
#     'accSmooth'
# ])

# Function to split the data into chunks
def split_into_chunks(data, chunk_size):
    return [data[i:i+chunk_size] for i in range(0, len(data), chunk_size)]

# Group the data by 'imuId' and sort within each group by 'time'
grouped_data = data.groupby('imuId').apply(lambda x: x.sort_values('time')).reset_index(drop=True)

# Set the desired chunk size (number of records per chunk)
chunk_size = 400

# Split the data into chunks and assign the 'vehicleAggression' value as the label
training_data = []
for imu_id, group in grouped_data.groupby('imuId'):
    chunks = split_into_chunks(group, chunk_size)
    for chunk in chunks:
        if len(chunk) >= chunk_size:
            label = chunk['vehicleAggression'].iloc[0]
            first_timestamp = chunk['time'].iloc[0]
            adjusted_time = chunk['time'] - first_timestamp
            
            # Separate list columns into individual columns
            pos_df = pd.DataFrame(chunk['pos'].tolist(), columns=['posX', 'posY', 'posZ'], index=chunk.index)
            dir_x_df = pd.DataFrame(chunk['dirX'].tolist(), columns=['dirXX', 'dirXY', 'dirXZ'], index=chunk.index)
            dir_y_df = pd.DataFrame(chunk['dirY'].tolist(), columns=['dirYX', 'dirYY', 'dirYZ'], index=chunk.index)
            dir_z_df = pd.DataFrame(chunk['dirZ'].tolist(), columns=['dirZX', 'dirZY', 'dirZZ'], index=chunk.index)
            acc_raw_df = pd.DataFrame(chunk['accRaw'].tolist(), columns=['accRawX', 'accRawY', 'accRawZ'], index=chunk.index)
            acc_smooth_df = pd.DataFrame(chunk['accSmooth'].tolist(), columns=['accSmoothX', 'accSmoothY', 'accSmoothZ'], index=chunk.index)
            ang_vel_df = pd.DataFrame(chunk['angVel'].tolist(), columns=['angVelX', 'angVelY', 'angVelZ'], index=chunk.index)
            ang_accel_df = pd.DataFrame(chunk['angAccel'].tolist(), columns=['angAccelX', 'angAccelY', 'angAccelZ'], index=chunk.index)
            
            expanded_chunk = pd.concat([chunk, pos_df, dir_x_df, dir_y_df, dir_z_df, acc_raw_df, acc_smooth_df, ang_vel_df, ang_accel_df], axis=1)
            
            updated_chunk = (
                expanded_chunk.assign(time=adjusted_time)
                .drop(['imuId', 'vehicleAggression', 'pos', 'dirX', 'dirY', 'dirZ', 'angVel', 'angAccel', 'accRaw', 'accSmooth'], axis=1)
            )
            
            training_data.append({'data': updated_chunk, 'label': label})

# Convert the list of dictionaries to a DataFrame
training_data_df = pd.DataFrame(training_data)

# Example of a single training set
# Set display options
pd.set_option("display.max_rows", None)
pd.set_option("display.max_columns", None)
pd.set_option("display.width", None)
pd.set_option("display.max_colwidth", 100)  # Set the maximum column width to 100 characters
pd.set_option("display.expand_frame_repr", False)
print(training_data_df.loc[0, 'data'])
pd.reset_option("all")


          time      mass        posX        posY        posZ     dirXX     dirXY     dirXZ     dirYX     dirYY     dirYZ     dirZX     dirZY     dirZZ    accRawX    accRawY    accRawZ  accSmoothX  accSmoothY  accSmoothZ   angVelX   angVelY   angVelZ  angAccelX  angAccelY  angAccelZ
0     0.000000  2.466599 -717.071518  100.832048  120.028431 -0.708909 -0.705289 -0.003978  0.004391  0.001227 -0.999990  0.705287 -0.708919  0.002227   2.168954  -1.299789   2.416748    2.232556   -1.351844    2.351287 -0.013904  0.025935 -0.000358  -3.398869   4.775077   4.165862
1     0.010250  2.466599 -717.082836  100.820940  120.028434 -0.708909 -0.705289 -0.003978  0.004391  0.001227 -0.999990  0.705287 -0.708919  0.002227   4.330267  -0.970009   1.874721    3.910087   -1.046492    1.970179 -0.002422  0.001659  0.000139   1.322726  -2.077105   0.535162
2     0.020500  2.466599 -717.093703  100.809312  120.028065 -0.708860 -0.705341 -0.003545  0.003733  0.001275 -0.999992  0.705340 -0.708867  0.001729 

## Translating the training data to learn

In [153]:
from sklearn.model_selection import train_test_split

# Get the data and labels from the training_data_df
X = np.stack(training_data_df['data'].apply(lambda x: x.to_numpy()).to_numpy())
y = training_data_df['label'].to_numpy()

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print("X_train shape:", X_train.shape)
print("X_test shape:", X_test.shape)
print("y_train shape:", y_train.shape)
print("y_test shape:", y_test.shape)

X_train shape: (140, 400, 26)
X_test shape: (35, 400, 26)
y_train shape: (140,)
y_test shape: (35,)


## Learn with PyTorch

- Create a TensorDataset
- Create a DataLoader, which shuffles the data
- Create a simple neural net (torch.nn.Sequential) which uses CUDA while training
- Train the neural net with the data provided
- Evaluate the net with the test data

In [154]:
import torch
from torch.utils.data import TensorDataset, DataLoader

# # Pad sequences to the same length
# X_train_padded = pad_sequences(X_train, dtype='float32', padding='post')
# y_train_padded = pad_sequences(y_train, dtype='float32', padding='post')
# X_test_padded = pad_sequences(X_test, dtype='float32', padding='post')
# y_test_padded = pad_sequences(y_test, dtype='float32', padding='post')

# Create tensors from the padded data
X_train_tensor = torch.tensor(X_train).permute(0, 2, 1)
y_train_tensor = torch.tensor(y_train)
X_test_tensor = torch.tensor(X_test).permute(0, 2, 1)
y_test_tensor = torch.tensor(y_test)

train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

batch_size = 32
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size)

In [155]:
import torch.nn as nn

# Define the CNN architecture
class CNNRegressor(nn.Module):
    def __init__(self, input_channels, num_filters, kernel_size, pool_size, hidden_units, dropout_rate, device):
        super(CNNRegressor, self).__init__()

        self.conv1 = nn.Sequential(
            nn.Conv1d(input_channels, num_filters, kernel_size),
            nn.BatchNorm1d(num_filters),
            nn.ReLU(),
            nn.MaxPool1d(pool_size)
        ).to(device)

        self.conv2 = nn.Sequential(
            nn.Conv1d(num_filters, num_filters * 2, kernel_size),
            nn.BatchNorm1d(num_filters * 2),
            nn.ReLU(),
            nn.MaxPool1d(pool_size)
        ).to(device)

        self.conv3 = nn.Sequential(
            nn.Conv1d(num_filters * 2, num_filters * 4, kernel_size),
            nn.BatchNorm1d(num_filters * 4),
            nn.ReLU(),
            nn.MaxPool1d(pool_size)
        ).to(device)

        self.flatten = nn.Flatten()

        # Calculate the output size of the third max-pooling layer
        conv1_out_size = (100 - kernel_size + 1) // pool_size
        conv2_out_size = (conv1_out_size - kernel_size + 1) // pool_size
        conv3_out_size = (conv2_out_size - kernel_size + 1) // pool_size

        self.fc1 = nn.Sequential(
            nn.Linear(num_filters * 4 * conv3_out_size, hidden_units),
            nn.ReLU(),
            nn.Dropout(dropout_rate)
        ).to(device)

        self.fc2 = nn.Linear(hidden_units, 1).to(device)

    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        x = self.flatten(x)
        x = self.fc1(x)
        x = self.fc2(x)
        return x

In [156]:
class CNNRegressor(nn.Module):
    def __init__(self, input_channels, num_filters, kernel_size, pool_size, hidden_units, dropout_rate, device):
        super(CNNRegressor, self).__init__()

        self.conv1 = nn.Sequential(
            nn.Conv1d(input_channels, num_filters, kernel_size),
            nn.BatchNorm1d(num_filters),
            nn.ReLU(),
            nn.MaxPool1d(pool_size)
        ).to(device)

        self.conv2 = nn.Sequential(
            nn.Conv1d(num_filters, num_filters * 2, kernel_size),
            nn.BatchNorm1d(num_filters * 2),
            nn.ReLU(),
            nn.MaxPool1d(pool_size)
        ).to(device)

        self.conv3 = nn.Sequential(
            nn.Conv1d(num_filters * 2, num_filters * 4, kernel_size),
            nn.BatchNorm1d(num_filters * 4),
            nn.ReLU(),
            nn.MaxPool1d(pool_size)
        ).to(device)

        self.conv4 = nn.Sequential(
            nn.Conv1d(num_filters * 4, num_filters * 8, kernel_size),
            nn.BatchNorm1d(num_filters * 8),
            nn.ReLU(),
            nn.MaxPool1d(pool_size)
        ).to(device)

        self.flatten = nn.Flatten()

        conv1_out_size = (chunk_size - kernel_size + 1) // pool_size
        conv2_out_size = (conv1_out_size - kernel_size + 1) // pool_size
        conv3_out_size = (conv2_out_size - kernel_size + 1) // pool_size
        conv4_out_size = (conv3_out_size - kernel_size + 1) // pool_size

        self.fc1 = nn.Sequential(
            nn.Linear(num_filters * 8 * conv4_out_size, hidden_units),
            nn.ReLU(),
            nn.Dropout(dropout_rate)
        ).to(device)

        self.fc2 = nn.Sequential(
            nn.Linear(hidden_units, hidden_units // 2),
            nn.ReLU(),
            nn.Dropout(dropout_rate)
        ).to(device)

        self.fc3 = nn.Linear(hidden_units // 2, 1).to(device)

    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        x = self.conv4(x)
        x = self.flatten(x)
        x = self.fc1(x)
        x = self.fc2(x)
        x = self.fc3(x)
        return x

In [164]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Define hyperparameters
input_channels = 26
num_filters = 1024
kernel_size = 6
pool_size = 4
hidden_units = 1024
dropout_rate = 0.5

# Create the model
net = CNNRegressor(input_channels, num_filters, kernel_size, pool_size, hidden_units, dropout_rate, device)

RuntimeError: Trying to create tensor with negative dimension -8192: [1024, -8192]

In [162]:
import torch.optim as optim

criterion = nn.HuberLoss()
optimizer = optim.Adam(net.parameters(), lr=0.001)
num_epochs = 100

net.train()
for epoch in range(num_epochs):
    running_loss = 0.0
    for i, (inputs, labels) in enumerate(train_loader):
        inputs, labels = inputs.float().to(device), labels.float().to(device)

        optimizer.zero_grad()
        outputs = net(inputs)
        loss = criterion(outputs.squeeze(), labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
    print(f"Epoch {epoch+1}/{num_epochs} Loss: {running_loss/len(train_loader)}")

Epoch 1/100 Loss: 8.876630847156047
Epoch 2/100 Loss: 0.13997825533151625
Epoch 3/100 Loss: 0.09791632741689682
Epoch 4/100 Loss: 0.1837238609790802
Epoch 5/100 Loss: 0.07509234063327312
Epoch 6/100 Loss: 0.048456411063671115
Epoch 7/100 Loss: 0.03360163159668446
Epoch 8/100 Loss: 0.02060399539768696
Epoch 9/100 Loss: 0.02114388681948185
Epoch 10/100 Loss: 0.020459798350930213
Epoch 11/100 Loss: 0.01474019791930914
Epoch 12/100 Loss: 0.01777251102030277
Epoch 13/100 Loss: 0.02031529750674963
Epoch 14/100 Loss: 0.011894077993929386
Epoch 15/100 Loss: 0.014547750353813171
Epoch 16/100 Loss: 0.012259533442556859
Epoch 17/100 Loss: 0.010106644686311483
Epoch 18/100 Loss: 0.012176467385143042
Epoch 19/100 Loss: 0.012861981429159642
Epoch 20/100 Loss: 0.00950536085292697
Epoch 21/100 Loss: 0.009201431274414062
Epoch 22/100 Loss: 0.010095319896936416
Epoch 23/100 Loss: 0.008876287471503019
Epoch 24/100 Loss: 0.007912615220993758
Epoch 25/100 Loss: 0.011527815088629722
Epoch 26/100 Loss: 0.011

In [163]:
net.eval()
with torch.no_grad():
    total_difference = 0.0
    num_samples = 0
    test_loss = 0.0
    for inputs, labels in test_loader:
        inputs, labels = inputs.float().to(device), labels.float().to(device)
        outputs = net(inputs)
        
        # Calculate the absolute difference between the predicted and real labels
        difference = torch.abs(outputs.squeeze() - labels)
        
        # Update the total difference and the number of samples
        total_difference += difference.sum().item()
        num_samples += len(labels)
        
        loss = criterion(outputs.squeeze(), labels)
        test_loss += loss.item()

    print(f"Test Loss: {test_loss/len(test_loader)}")
    
    # Calculate the average absolute difference
    average_difference = total_difference / num_samples
    print(f"Average Absolute Difference: {average_difference}")

Test Loss: 0.008245083037763834
Average Absolute Difference: 0.10397903919219971
