In [9]:
import numpy as np
import torch
import torch.nn as nn
from sklearn.model_selection import train_test_split

import server.cloudprocessing.dataprocessing as dp
import server.cloudprocessing.rnn as rnn
import server.cloudprocessing.util as util
import config as config
import surfacemodel as sf

### Data preparation
extract current data from the database and prepare for processing

In [10]:
df = dp.get_dataframe()

  return pd.read_json(raw_data)


In [11]:
df = dp.pre_processing(df)
print(df.head())

Asphalt Data points:  10089
Pavement Data points:  3485
Gravel Data points:  0
Grass Data points:  20511
Crash Data points:  6381
                     time  trip_id  vibration  latitude  longitude  \
0 2023-11-16 17:20:55.454        1          1       0.0        0.0   
1 2023-11-16 17:21:00.607        1          1       0.0        0.0   
2 2023-11-16 17:21:00.659        1          1       0.0        0.0   
3 2023-11-16 17:21:00.711        1          1       0.0        0.0   
4 2023-11-16 17:21:00.763        1          1       0.0        0.0   

   acceleration_x  acceleration_y  acceleration_z  gyroscope_x  gyroscope_y  \
0             261              -5              -5          -15           63   
1             259               0              -8          -16           66   
2             259               0              -9          -15           66   
3             258               0              -8          -16           67   
4             259               0              -9     

In [7]:
X, Y = sf.data_preparation(df)

TypeError: descriptor 'timestamp' for 'pandas._libs.tslibs.timestamps._Timestamp' objects doesn't apply to a 'float' object

### Training the Model

In [5]:
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader
from sklearn.preprocessing import StandardScaler


def gen_dataloader(x, y, test_size=0.2, random_state=0):
    # scaler = MinMaxScaler()  # TODO: choose scaler
    scaler = StandardScaler()
    scaler.fit(x)
    x = scaler.transform(x)

    x = torch.tensor(x)
    y = torch.tensor(y)

    train_x, test_x, train_y, test_y = train_test_split(x, y, test_size=test_size, random_state=random_state)

    train_x = torch.Tensor(train_x)
    train_y = F.one_hot(train_y.long(), len(config.classes)).to(torch.float32)

    test_x = torch.Tensor(test_x)
    test_y = F.one_hot(test_y.long(), len(config.classes)).to(torch.float32)

    train_dataset = TensorDataset(train_x, train_y)
    test_dataset = TensorDataset(test_x, test_y)

    train_loader = DataLoader(train_dataset)
    test_loader = DataLoader(test_dataset)

    return train_loader, test_loader


train_loader, test_loader = gen_dataloader(X, Y)  # with default values

In [6]:
from torch import optim


def train_model(model, train_loader, num_epochs=config.num_training_epochs, lr=config.learning_rate,
                momentum=config.momentum):
    criterion = nn.CrossEntropyLoss()
    # criterion = nn.MSELoss()
    # criterion = nn.NLLLoss()

    # optimizer = optim.SGD(model.parameters(), lr=lr, momentum=momentum)
    optimizer = optim.Adam(model.parameters(), lr=lr)

    print("Training STARTED")

    for e in range(0, num_epochs):
        model.train()  # set the model in training mode
        total_train_loss = 0  # initialize the total training and validation loss

        for i, (training_input, target) in enumerate(train_loader):  # loop over the training set
            hidden = model.initHidden()
            model.zero_grad()

            output = [.0, .0, .0, .0]
            for data_row in training_input:
                for i in range(0, len(data_row), config.n_training_cols):
                    model_input = data_row[None, i:i + config.n_training_cols].float()
                    output, hidden = model(model_input, hidden)

            optimizer.zero_grad()
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()

            # add the loss to the total training loss so far and calculate the number of correct predictions
            total_train_loss += loss.item()

        if (e + 1) % 10 == 0:
            print("Epoch", e + 1, "Training Loss:", total_train_loss)

    print("Training FINISHED")

    return model, criterion


model = rnn.RNN(config.n_training_cols, config.n_hidden_layers, len(config.classes))
model, criterion = train_model(model=model, train_loader=train_loader)


Training STARTED


KeyboardInterrupt: 

## Testing

In [None]:
def print_training_accuracy(model, train_loader, criterion, classes):
    training_loss, class_correct, class_total = util.compute_accuracy_rnn(model=model, loader=train_loader,
                                                                      criterion=criterion)

    # average training loss
    training_loss = training_loss / len(train_loader.dataset)
    print('Training Loss: {:.6f}\n'.format(training_loss))
    for i in range(len(classes)):
        if class_total[i] > 0:
            print('Training Accuracy of %5s: %2d%% (%2d/%2d)' % (
                classes[i], 100.0 * class_correct[i] / class_total[i],
                np.sum(class_correct[i]), np.sum(class_total[i])))
        else:
            print('Training Accuracy of %5s: N/A ' % (classes[i]))

    print('Training Accuracy (Overall): %2d%% (%2d/%2d)' % (
        100. * np.sum(class_correct) / np.sum(class_total),
        np.sum(class_correct), np.sum(class_total)))


print_training_accuracy(model=model, train_loader=train_loader, criterion=criterion, classes=config.classes)

In [None]:
def print_testing_accuracy(model, test_loader, classes, criterion):
    test_loss, class_correct, class_total = util.compute_accuracy_rnn(model=model, loader=test_loader, criterion=criterion)

    # average test loss
    test_loss = test_loss / len(test_loader.dataset)
    print('Test Loss: {:.6f}\n'.format(test_loss))

    for i in range(len(classes)):
        if class_total[i] > 0:
            print('Test Accuracy of %5s: %2d%% (%2d/%2d)' % (
                classes[i], 100.0 * class_correct[i] / class_total[i],
                np.sum(class_correct[i]), np.sum(class_total[i])))
        else:
            print('Test Accuracy of %5s: N/A (no testing examples)' % (classes[i]))

    print('\nTest Accuracy (Overall): %2d%% (%2d/%2d)' % (
        100. * np.sum(class_correct) / np.sum(class_total),
        np.sum(class_correct), np.sum(class_total)))


print_testing_accuracy(model=model, test_loader=test_loader, criterion=criterion, classes=config.classes)