In [99]:
import torch
import torch.nn as nn
from torch.optim import Adam
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler

In [100]:
device = 'cpu'

In [101]:
train_df = pd.read_csv(r"D:\Datasets\Kaggle\Driving _behaviour_dataset\train_motion_data.csv")
test_df = pd.read_csv(r"D:\Datasets\Kaggle\Driving _behaviour_dataset\test_motion_data.csv")
og_train_df = train_df.copy()
og_test_df = test_df.copy()

train_df = train_df.dropna()
train_df = train_df.drop(columns= 'Timestamp')
test_df = test_df.dropna()
test_df = test_df.drop(columns= 'Timestamp')

class_mapping = {
    'SLOW' : 0,
    'NORMAL' : 1,
    'AGGRESSIVE' : 2
}

train_df['Class'] = train_df['Class'].replace(class_mapping)
test_df['Class'] = test_df['Class'].replace(class_mapping)


  train_df['Class'] = train_df['Class'].replace(class_mapping)
  test_df['Class'] = test_df['Class'].replace(class_mapping)


In [102]:
X = train_df.drop(columns='Class')
Y = train_df['Class']

X_train, X_val, Y_train, Y_val = train_test_split(X,Y, train_size=0.8, stratify=Y, random_state=42)

scaler = StandardScaler()
def add_rolling_feats(df, window = 50):
    columns = ['AccX','AccY', 'AccZ','GyroX', 'GyroY', 'GyroZ']
    for col in columns:
        df[f"{col}_mean"] = df[col].rolling(window).mean()
        df[f"{col}_std"] = df[col].rolling(window).std()
    
    df = df.dropna().reset_index(drop=True)
    return df

In [103]:
train_full = pd.concat([X_train, Y_train], axis=1)
val_full = pd.concat([X_val,Y_val], axis= 1)

train_full = add_rolling_feats(train_full, window=50)
val_full = add_rolling_feats(val_full, window=50)
test_df = add_rolling_feats(test_df, window=50)

X_train = train_full.drop(columns='Class')
Y_train = train_full['Class']
X_val = val_full.drop(columns='Class')
Y_val = val_full['Class']
X_test = test_df.drop(columns='Class')
Y_test = test_df['Class']

In [104]:
scaler = StandardScaler()
scaler.fit(X_train)
X_train = scaler.transform(X_train)
X_val = scaler.transform(X_val)
X_test = scaler.transform(X_test)


In [105]:
class DriveDataset(Dataset):
    def __init__(self, X, Y):
        self.X = torch.tensor(X, dtype=torch.float32)
        self.Y = torch.tensor(Y, dtype=torch.long)
    
    def __len__(self):
        return len(self.X)
    
    def __getitem__(self, index):
        return self.X[index], self.Y[index]
    
train_dataset = DriveDataset(X_train, Y_train)
val_dataset = DriveDataset(X_val, Y_val)
test_dataset = DriveDataset(X_test, Y_test)

In [106]:
train_dataloader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_dataloader = DataLoader(val_dataset, batch_size= 64)
test_dataloader = DataLoader(test_dataset, batch_size= 64)

In [107]:
class DriveModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.layer1 = nn.Linear(18, 64)
        self.relu1 = nn.ReLU()
        self.dropout1 = nn.Dropout(0.3)

        self.layer2 = nn.Linear(64,32)
        self.relu2 = nn.ReLU()
        self.dropout2 = nn.Dropout(0.3)
        self.output = nn.Linear(32, 3)

    def forward(self, x):
        x = self.layer1(x)
        x = self.relu1(x)
        x = self.dropout1(x)

        x = self.layer2(x)
        x = self.relu2(x)
        x = self.dropout2(x)
        output = self.output(x)
        return output

model = DriveModel().to(device)
print(model)
    

DriveModel(
  (layer1): Linear(in_features=18, out_features=64, bias=True)
  (relu1): ReLU()
  (dropout1): Dropout(p=0.3, inplace=False)
  (layer2): Linear(in_features=64, out_features=32, bias=True)
  (relu2): ReLU()
  (dropout2): Dropout(p=0.3, inplace=False)
  (output): Linear(in_features=32, out_features=3, bias=True)
)


In [108]:
criterion = nn.CrossEntropyLoss()
optimizer = Adam(model.parameters(), lr = 1e-3)

In [109]:
epochs = 50

for epoch in range(epochs):
    model.train()
    train_loss = 0
    train_correct = 0
    for input, labels in train_dataloader:
        input, labels = input.to(device), labels.to(device)

        optimizer.zero_grad()
        output = model(input)
        batch_loss = criterion(output, labels)
        batch_loss.backward()
        optimizer.step()

        train_loss += batch_loss.item()
        _, predicted = torch.max(output,1)
        correct = (predicted == labels).sum().item()
        train_correct  += correct

    with torch.no_grad():
        model.eval()
        val_loss = 0
        val_correct = 0
        for input, labels in val_dataloader:
            input, labels = input.to(device), labels.to(device)
            output = model(input)
            batch_loss = criterion(output, labels)
            val_loss += batch_loss.item()
            _, predicted = torch.max(output,1)
            correct = (predicted == labels).sum().item()
            val_correct  += correct
    
    avg_train_loss = train_loss/len(train_dataloader)
    avg_val_loss = val_loss/len(val_dataloader)
    train_acc = train_correct/len(train_dataset)
    val_acc = val_correct/len(val_dataset)
    print(f"Epoch [{epoch+1}/{epochs}]")
    print(f"  Train Loss: {avg_train_loss:.4f} | Train Acc: {train_acc:.4f}")
    print(f"  Val Loss: {avg_val_loss:.4f} | Val Acc: {val_acc:.4f}")
    print("-" * 30)


Epoch [1/50]
  Train Loss: 1.0991 | Train Acc: 0.3632
  Val Loss: 1.0951 | Val Acc: 0.3647
------------------------------
Epoch [2/50]
  Train Loss: 1.0953 | Train Acc: 0.3740
  Val Loss: 1.0951 | Val Acc: 0.3647
------------------------------
Epoch [3/50]
  Train Loss: 1.0897 | Train Acc: 0.3849
  Val Loss: 1.0949 | Val Acc: 0.3676
------------------------------
Epoch [4/50]
  Train Loss: 1.0873 | Train Acc: 0.4068
  Val Loss: 1.0924 | Val Acc: 0.3941
------------------------------
Epoch [5/50]
  Train Loss: 1.0812 | Train Acc: 0.4047
  Val Loss: 1.0898 | Val Acc: 0.3824
------------------------------
Epoch [6/50]
  Train Loss: 1.0776 | Train Acc: 0.4124
  Val Loss: 1.0864 | Val Acc: 0.3897
------------------------------
Epoch [7/50]
  Train Loss: 1.0657 | Train Acc: 0.4166
  Val Loss: 1.0838 | Val Acc: 0.4029
------------------------------
Epoch [8/50]
  Train Loss: 1.0650 | Train Acc: 0.4257
  Val Loss: 1.0821 | Val Acc: 0.4000
------------------------------
Epoch [9/50]
  Train Los

Seems like the data is hard to handle with for a beginner like me. Fed the output to Claude and it said LSTMs are the way to respect sequential data....which I shall learn eventually but not now. 