In [None]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

import backend.config as config
import dataprocessing
import surfacemodelclass as sf
import util

### Data preparation
extract current data from the database and prepare for processing

In [None]:
CLASSES = config.classes

In [None]:
FAKE_DATE = pd.to_datetime("1.1.2000")

LR = 1e-2
MOMENTUM = 0.9
NUM_EPOCHS = 10
HIDDEN_LAYERS = 128
BATCH_SIZE = 20

NORMALIZE_SIZE = False

pavement_start = pd.Timestamp(year=2023, month=11, day=6, hour=18, minute=34)
pavement_end = pd.Timestamp(year=2023, month=11, day=6, hour=18, minute=54)
asphalt_start_1 = pd.Timestamp(year=2023, month=11, day=6, hour=19, minute=10)
asphalt_end_1 = pd.Timestamp(year=2023, month=11, day=6, hour=19, minute=16)
asphalt_start_2 = pd.Timestamp(year=2023, month=11, day=6, hour=19, minute=31)
asphalt_end_2 = pd.Timestamp(year=2023, month=11, day=6, hour=19, minute=50)


def data_preparation(data):
    df = pd.read_json(data)

    df['time'] = pd.to_datetime(df['time'], format='mixed')
    for i, row in df.iterrows():
        if (row.time >= pavement_start) and (row.time <= pavement_end):
            df.at[i, 'terrain'] = config.map_to_int('pavement')
        elif (row.time >= asphalt_start_1) and (row.time <= asphalt_end_1):
            df.at[i, 'terrain'] = config.map_to_int('asphalt')
        elif (row.time >= asphalt_start_2) and (row.time <= asphalt_end_2):
            df.at[i, 'terrain'] = config.map_to_int('asphalt')

    df.dropna(subset=['terrain'], inplace=True)

    df['time_second'] = df.time.map(lambda x: pd.Timestamp(x).floor(freq='S'))
    df['time'] = df.time.map(pd.Timestamp.timestamp)

    grouped = df.groupby([df.trip_id, df.time_second])  # grouped.get_group(1)
    x = []
    y = []
    for i, (trip_seconds, table) in enumerate(grouped):
        if (i + 1) % 100 == 0:
            print("N trip seconds: " + str(i + 1))

        train_input = table.drop(columns=['terrain', 'trip_id', 'crash', 'time_second', 'latitude', 'longitude'])
        n_cols = len(train_input.columns)

        train_input = train_input.to_numpy()

        # multiindex = pd.MultiIndex.from_product(
        #     [train_input['time_second'].unique(), train_input.time.to_list() [timedelta.min] * (20 - len(train_input.time))])
        # train_input = train_input.set_index(['time_second', 'time']).reindex(multiindex)
        # train_input = train_input.groupby(level=0).apply(lambda x: x.interpolate())

        if NORMALIZE_SIZE:
            n_missing_rows = 20 - len(train_input)
            for _ in range(n_missing_rows):
                fake_array = np.array([None] * n_cols)
                np.append(train_input, fake_array)

        train_target = table.terrain.min()

        x.append(train_input)
        y.append(train_target)

    return x, y


raw_data = dataprocessing.get_data_db()
X, Y = data_preparation(raw_data)

### Training the Model

In [None]:
def flatten(l):
    return [item for sublist in l for item in sublist]

In [None]:
def to_loader(x, y):
    return list(zip(x, y))

In [None]:
def gen_dataloader(x, y, test_size=0.2, random_state=0):
    scaler = MinMaxScaler()  # TODO: choose scaler
    scaler.fit(flatten(x))  # TODO: transform data without trip_id

    # x = torch.tensor(x)
    # y = torch.tensor(y)
    train_x, test_x, train_y, test_y = train_test_split(x, y, test_size=test_size, random_state=random_state)

    # train_x = torch.Tensor(train_x).to(torch.float32)
    # train_y = F.one_hot(torch.Tensor(train_y), len(CLASSES)).to(torch.float32)

    # test_x = torch.Tensor(test_x).to(torch.float32)
    # test_y = F.one_hot(torch.Tensor(test_y), len(CLASSES)).to(torch.float32)

    # train_dataset = TensorDataset(train_x, train_y)
    # test_dataset = TensorDataset(test_x, test_y)
    # train_loader_t = DataLoader(train_dataset)
    # test_loader_t = DataLoader(test_dataset)

    return to_loader(train_x, train_y), to_loader(test_x, test_y), scaler


train_loader, test_loader, scaler = gen_dataloader(X, Y)  # with default values

In [None]:
def train_model(model, device, train_loader, scaler, num_epochs=NUM_EPOCHS, lr=LR, momentum=MOMENTUM):
    # criterion = nn.CrossEntropyLoss()
    # criterion = nn.MSELoss()
    criterion = nn.NLLLoss()

    # optimizer = optim.SGD(model.parameters(), lr=lr, momentum=momentum)
    # optimizer = optim.Adam(model.parameters(), lr=lr)

    print("Training STARTED")
    model.to(device)

    for e in range(0, num_epochs):
        model.train()  # set the model in training mode
        total_train_loss = 0  # initialize the total training and validation loss

        for i, (training_input, target) in enumerate(train_loader):  # loop over the training set
            hidden = model.initHidden()
            model.zero_grad()

            print(target.shape)

            output = .0
            for data_row in training_input:
                # input = scaler.transform(training_input[data_row])
                input = torch.tensor(data_row)
                input = torch.tensor([ 1.6993e+09,  0.0000e+00,  2.5600e+02,  3.8000e+01, -3.3000e+01, -1.6000e+01,  6.5000e+01, -8.0000e+00], dtype=torch.float64)
                print(input)
                output, hidden = model(input, hidden)
                    
            loss = criterion(output, target)
            loss.backward()

            for p in model.parameters():
                p.data.add_(p.grad.data, alpha=-lr)

            # optimizer.step()
            # add the loss to the total training loss so far and calculate the number of correct predictions
            total_train_loss += loss.item()

        if (e + 1) % 10 == 0 | e == 0:
            print("Epoch", e, "Training Loss:", total_train_loss.item())

        print("Training FINISHED")

    return model, criterion, optimizer


model = sf.RNN(BATCH_SIZE, HIDDEN_LAYERS, len(CLASSES))
device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")  # for testing on my Mac
model, criterion, optimizer = train_model(model=model, device=device, train_loader=train_loader, scaler=scaler)


## Testing

In [None]:
def print_training_accuracy(model, train_loader, criterion, classes,
                            device=torch.device("mps" if torch.backends.mps.is_available() else "cpu")
                            ):
    training_loss, class_correct, class_total = util.compute_accuracy(model, train_loader, device, criterion)

    # average training loss
    training_loss = training_loss / len(train_loader.dataset)
    print('Training Loss: {:.6f}\n'.format(training_loss))
    for i in range(10):
        if class_total[i] > 0:
            print('Training Accuracy of %5s: %2d%% (%2d/%2d)' % (
                classes[i], 100.0 * class_correct[i] / class_total[i],
                np.sum(class_correct[i]), np.sum(class_total[i])))
        else:
            print('Training Accuracy of %5s: N/A ' % (classes[i]))

    print('\Training Accuracy (Overall): %2d%% (%2d/%2d)' % (
        100. * np.sum(class_correct) / np.sum(class_total),
        np.sum(class_correct), np.sum(class_total)))


print_training_accuracy(model, train_loader, criterion, CLASSES, device)

In [None]:
def print_testing_accuracy(model, test_loader, device, criterion, classes):
    test_loss, class_correct, class_total = util.compute_accuracy(model, test_loader, device, criterion)

    # average test loss
    test_loss = test_loss / len(test_loader.dataset)
    print('Test Loss: {:.6f}\n'.format(test_loss))

    for i in range(10):
        if class_total[i] > 0:
            print('Test Accuracy of %5s: %2d%% (%2d/%2d)' % (
                classes[i], 100.0 * class_correct[i] / class_total[i],
                np.sum(class_correct[i]), np.sum(class_total[i])))
        else:
            print('Test Accuracy of %5s: N/A (no training examples)' % (classes[i]))

    print('\nTest Accuracy (Overall): %2d%% (%2d/%2d)' % (
        100. * np.sum(class_correct) / np.sum(class_total),
        np.sum(class_correct), np.sum(class_total)))


print_testing_accuracy(model, test_loader, criterion, CLASSES, device)