1. Colab Settings - comment | do not run | configure with your settings if GOOGLE COLAB is used

In [62]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [63]:
cd "/content/drive/Othercomputers/My Laptop/DrivingBehaviour"

/content/drive/Othercomputers/My Laptop/DrivingBehaviour


2. Import dataset

In [1]:
import pandas as pd
df_train = pd.read_csv("train_motion_data.csv")
df_test = pd.read_csv("Test.csv")

In [2]:
df_train.head()

Unnamed: 0,AccX,AccY,AccZ,GyroX,GyroY,GyroZ,Class,Timestamp
0,0.0,0.0,0.0,0.059407,-0.174707,0.101938,NORMAL,3581629
1,-1.624864,-1.082492,-0.204183,-0.028558,0.051313,0.135536,NORMAL,3581630
2,-0.59466,-0.12241,0.220502,-0.019395,-0.029322,0.087888,NORMAL,3581630
3,0.738478,-0.228456,0.667732,0.069791,-0.029932,0.054902,NORMAL,3581631
4,0.101741,0.777568,-0.06673,0.030696,-0.003665,0.054902,NORMAL,3581631


In [3]:
df_train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3644 entries, 0 to 3643
Data columns (total 8 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   AccX       3644 non-null   float64
 1   AccY       3644 non-null   float64
 2   AccZ       3644 non-null   float64
 3   GyroX      3644 non-null   float64
 4   GyroY      3644 non-null   float64
 5   GyroZ      3644 non-null   float64
 6   Class      3644 non-null   object 
 7   Timestamp  3644 non-null   int64  
dtypes: float64(6), int64(1), object(1)
memory usage: 227.9+ KB


In [4]:
# checking for the missing values
print(df_train.isna().sum())
print(df_train.duplicated().sum())

AccX         0
AccY         0
AccZ         0
GyroX        0
GyroY        0
GyroZ        0
Class        0
Timestamp    0
dtype: int64
0


3. Preprocess the dataset

In [5]:
import torch
from sklearn.preprocessing import LabelEncoder
from torch.utils.data import TensorDataset
import numpy as np

# Extract the acceleration and gyroscope data and labels from the dataframe

train_data = df_train[['AccX', 'AccY', 'AccZ', 'GyroX', 'GyroY', 'GyroZ']].values

acc_data = df_train[['AccX', 'AccY', 'AccZ']].values

gyr_data = df_train[['GyroX', 'GyroY', 'GyroZ']].values

acc_train_data = []
gyr_train_data = []
for i, data in enumerate(train_data):
    acceleration = np.sqrt(np.square(data[0]) + np.square(data[1]) + np.square(data[2]))
    angular_velocity = np.sqrt(np.square(data[3]) + np.square(data[4]) + np.square(data[5]))
    acc_train_data.append(np.concatenate((acc_data[i], [acceleration])))
    gyr_train_data.append(np.concatenate((gyr_data[i], [angular_velocity])))

acc_data = np.asarray(acc_train_data)
gyr_data = np.asarray(gyr_train_data)
acc_data = torch.from_numpy(acc_data).to(torch.float)
gyr_data = torch.from_numpy(gyr_data).to(torch.float)

In [6]:
acc_data

tensor([[ 0.0000,  0.0000,  0.0000,  0.0000],
        [-1.6249, -1.0825, -0.2042,  1.9631],
        [-0.5947, -0.1224,  0.2205,  0.6459],
        ...,
        [-0.2228,  0.7473, -0.8874,  1.1814],
        [-0.3494,  0.0673,  0.3944,  0.5312],
        [-0.4024,  0.4062, -0.4230,  0.7113]])

In [7]:
# Create the input data
train_data = torch.stack([acc_data, gyr_data], dim=1)
train_data = train_data.unsqueeze(1)
train_data.to(torch.double)

labelEncoder = LabelEncoder()
df_train["Class"] = labelEncoder.fit_transform(df_train["Class"])
train_labels = df_train['Class'].values

In [8]:
train_data

tensor([[[[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
          [ 5.9407e-02, -1.7471e-01,  1.0194e-01,  2.1082e-01]]],


        [[[-1.6249e+00, -1.0825e+00, -2.0418e-01,  1.9631e+00],
          [-2.8558e-02,  5.1313e-02,  1.3554e-01,  1.4771e-01]]],


        [[[-5.9466e-01, -1.2241e-01,  2.2050e-01,  6.4593e-01],
          [-1.9395e-02, -2.9322e-02,  8.7888e-02,  9.4659e-02]]],


        ...,


        [[[-2.2284e-01,  7.4730e-01, -8.8743e-01,  1.1814e+00],
          [ 3.6117e-01, -4.0684e-01,  5.4291e-02,  5.4673e-01]]],


        [[[-3.4942e-01,  6.7261e-02,  3.9437e-01,  5.3117e-01],
          [-1.3241e-01,  2.0159e-02, -4.9633e-03,  1.3402e-01]]],


        [[[-4.0243e-01,  4.0622e-01, -4.2301e-01,  7.1126e-01],
          [-5.3603e-02, -6.7195e-03,  1.1454e-03,  5.4035e-02]]]])

Convert the data and labels to tensors

In [9]:
# Convert the data and labels to tensors
train_labels = torch.from_numpy(train_labels).to(torch.float)

In [10]:
train_labels

tensor([1., 1., 1.,  ..., 2., 2., 2.])

4. Define the dataset and the dataloader

In [34]:
from torch.utils.data import DataLoader

# Define the number of epoches and batch size
BATCH_SIZE = 32
NO_EPOCH = 5


# Define a PyTorch dataset and dataloader for the training set
train_dataset = TensorDataset(train_data, train_labels)

train_dataloader  = DataLoader (
            train_dataset,
            shuffle=True,
            batch_size  = BATCH_SIZE
)

# calculate steps per epoch for training and validation set
trainSteps = len(train_dataloader.dataset) // BATCH_SIZE
valSteps = len(train_dataloader.dataset) // BATCH_SIZE


5. Define the CNN arhitecture + training loop

In [36]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch import flatten

# Define the CNN architecture
class CNN(nn.Module):
    def __init__(self, input_size, num_classes):
        super(CNN, self).__init__()

        self.conv1 = nn.Conv2d(in_channels=input_size, out_channels=32, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1)
        self.conv4 = nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1)
        self.conv5 = nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1)

        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(4096, 2048)
        self.fc2 = nn.Linear(2048, 1024)
        self.fc3 = nn.Linear(1024, 512)
        self.fc4 = nn.Linear(512, num_classes)

        # Define the dropout layer
        self.dropout = nn.Dropout(p=0.15)

    def forward(self, x):

        x = nn.functional.relu(self.conv1(x))
        x = nn.functional.relu(self.conv2(x))
        x = nn.functional.relu(self.conv3(x))
        x = nn.functional.relu(self.conv4(x))
        x = nn.functional.relu(self.conv5(x))


        # Flatten the data for the fully connected layers
        x = flatten(x, 1)

        x = nn.functional.relu(self.fc1(x))
        x = self.dropout(x)
        x = nn.functional.relu(self.fc2(x))
        x = self.dropout(x)
        x = nn.functional.relu(self.fc3(x))
        x = self.dropout(x)
        x = nn.functional.relu(self.fc4(x))

        return x

# Instantiate the CNN
model = CNN(input_size=1, num_classes=3)


# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()

optimizer = torch.optim.AdamW(model.parameters(), lr=2e-5)

last_train_acc = 0

# Train the model
for epoch in range(NO_EPOCH):

    totalTrainLoss = 0
    trainCorrect = 0

    # Set the model to training mode
    model.train()

    for i, data in enumerate(train_dataloader):
        inputs, labels = data
        optimizer.zero_grad()
        outputs = model(inputs)
        labels = labels.to(torch.long)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        totalTrainLoss += loss

        predicted_class = outputs.argmax(1)
        trainCorrect += (predicted_class == labels).sum().item()

    print(f'Epoch {epoch+1}/10: loss = {totalTrainLoss / trainSteps}')

    # Print the accuracy
    acc = trainCorrect / len(train_dataset)
    print(f'Train accuracy: {acc:.4f}')

PATH = './DrivingBehaviourModel.pth'
torch.save(model.state_dict(), PATH)

Epoch 1/10: loss = 1.107056736946106
Train accuracy: 0.3699
Epoch 2/10: loss = 1.0900765657424927
Train accuracy: 0.4163
Epoch 3/10: loss = 1.07150399684906
Train accuracy: 0.4232
Epoch 4/10: loss = 1.0661251544952393
Train accuracy: 0.4311
Epoch 5/10: loss = 1.0637160539627075
Train accuracy: 0.4306


6. Start processing the test dataset

Preprocess the test dataset

In [15]:
df_test.head()

Unnamed: 0,ID,AccX,AccY,AccZ,GyroX,GyroY,GyroZ
0,1,-2.426553,5.780482,-1.139655,0.009392,-0.01237,-0.008247
1,2,0.083292,0.060946,0.25138,-0.011988,-0.008094,-0.000305
2,3,-0.190249,0.052715,-0.022909,0.001451,0.000458,0.056505
3,4,-0.038543,-0.162965,0.083716,0.029551,0.025504,0.153633
4,5,-0.137203,0.328793,0.092976,0.064981,-0.025809,0.244652


In [16]:
df_test.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3475 entries, 0 to 3474
Data columns (total 7 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   ID      3475 non-null   int64  
 1   AccX    3475 non-null   float64
 2   AccY    3475 non-null   float64
 3   AccZ    3475 non-null   float64
 4   GyroX   3475 non-null   float64
 5   GyroY   3475 non-null   float64
 6   GyroZ   3475 non-null   float64
dtypes: float64(6), int64(1)
memory usage: 190.2 KB


In [18]:
# checking for the missing values
print(df_test.isna().sum())
print(df_test.duplicated().sum())

ID       0
AccX     0
AccY     0
AccZ     0
GyroX    0
GyroY    0
GyroZ    0
dtype: int64
0


In [24]:
test_df_data = df_test[['AccX', 'AccY', 'AccZ', 'GyroX', 'GyroY', 'GyroZ']].values

test_acc_data = df_train[['AccX', 'AccY', 'AccZ']].values

test_gyr_data = df_train[['GyroX', 'GyroY', 'GyroZ']].values

acc_test_data = []
gyr_test_data = []
for i, data in enumerate(test_df_data):
    acceleration = np.sqrt(np.square(data[0]) + np.square(data[1]) + np.square(data[2]))
    angular_velocity = np.sqrt(np.square(data[3]) + np.square(data[4]) + np.square(data[5]))
    acc_test_data.append(np.concatenate((test_acc_data[i], [acceleration])))
    gyr_test_data.append(np.concatenate((test_gyr_data[i], [angular_velocity])))

test_acc_data = np.asarray(acc_test_data)
test_gyr_data = np.asarray(gyr_test_data)
test_acc_data = torch.from_numpy(test_acc_data).to(torch.float)
test_gyr_data = torch.from_numpy(test_gyr_data).to(torch.float)

# Create the input data
test_data = torch.stack([test_acc_data, test_gyr_data], dim=1)
test_data = test_data.unsqueeze(1)
test_data.to(torch.float)

test_labels = torch.zeros(len(test_data)).to(torch.float)

# Define a PyTorch dataset and dataloader for the training set
test_dataset = TensorDataset(test_data, test_labels)

test_dataloader  = DataLoader (
            test_dataset,                                  # The training samples.
            batch_size  = BATCH_SIZE
)


7. Load the model and predict data

In [37]:

myModel = CNN(input_size=1, num_classes=3)
PATH = './DrivingBehaviourModel.pth'
myModel.load_state_dict(torch.load(PATH))

# Set the model to evaluation mode
myModel.eval()

labels = np.array([])

with torch.no_grad():
    for data in test_dataloader:
        inputs, _ = data
        # Forward pass
        outputs = myModel(inputs)
        output_probs = F.softmax(outputs, dim=1)
        _, predicted = torch.max(output_probs, 1)
        labels = np.append(labels, predicted.to(torch.long).numpy(), axis=0)


result = []
for label in labels:
    if label == 2:
        result.append("SLOW")
    elif label == 1:
        result.append("NORMAL")
    elif label == 0:
        result.append("AGGRESSIVE")


result_df = pd.DataFrame(data=result, columns=['Class'], index=range(1, 3476))
result_df.to_csv('submision.csv', index=True)
