In [1]:
# Dataset

from torch.utils.data import Dataset
import pandas as pd
import json


class TrajectoryDataset(Dataset):
    def __init__(self, data_path):
        # [10 numericals] # x / y / type / code / weather / plane_model / airport_origin / airport_dest / month / day_week / x' / y'
        self.samples = list()
        self.ground_truths = list()
        print("Reading LSTM data..")
        
        with open(data_path, "r") as f:
            data = json.load(f)
        
        for i,row in enumerate(data):
          self.samples.append(torch.tensor(row["sample"]))
          self.ground_truths.append(torch.tensor(row["labels"], dtype=torch.float32))

        # with open(data_path, "r", encoding="utf-8") as f:
            # csv_reader = csv.reader(f)
            # header = next(csv_reader)
            # for line in tqdm(csv_reader):
            #     # TODO make sure line is a list
            #     self.samples.append(line[:-2]) #TODO check everything apart from last element
            #     # TODO include ground truth separately
            #     self.ground_truths.append(line[-1])
                
    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        return self.samples[idx], self.ground_truths[idx]


ModuleNotFoundError: No module named 'torch'

In [2]:
# Loss

import numpy as np
from frechet import frechet_distance

trajectory_p = np.array([[80.0644976552576, 50.6552672944963],
                          [71.4585771784186, 63.2156178820878],
                          [19.9234400875866, 12.8415436018258]])

trajectory_q = np.array([[5.88378887623549, 11.4293440245092],
                          [84.2895035166293, 67.4984930083156],
                          [90.9000392071903, 36.4088270813227],
                          [34.2789062298834, 0.568102905526757],
                          [43.9584670122713, 75.5553565453738],
                          [24.4398877490312, 30.7297872845083],
                          [35.2576361969113, 39.8860249202698],
                          [62.438058713451, 44.4697478786111],
                          [38.4228205773979, 66.4192265830934]])

print(type(trajectory_p))
print(type(trajectory_q))
print(type(trajectory_p[0][0]))
print(type(trajectory_q[0][0]))
score = frechet_distance(trajectory_p, trajectory_q)
print(score)

ModuleNotFoundError: No module named 'frechet'

In [153]:
# Interpolation

def interpolate(inp, fi):
    i, f = int(fi // 1), fi % 1  # Split floating-point index into whole & fractional parts.
    j = i+1 if f > 0 else i  # Avoid index error.
    return (1-f) * inp[i] + f * inp[j]

# inp = [1, 3, 5, 7, 9]
# new_len = 10

# delta = (len(inp)-1) / (new_len-1)
# outp = [interpolate(inp, i*delta) for i in range(new_len)]

In [154]:
# Model

import torch
import torch.nn as nn


class LSTMModel(nn.Module):
    def __init__(self, hidden_dim, input_size):
        super(LSTMModel, self).__init__()
      
        self.hidden_dim = hidden_dim

        self.lstm = nn.LSTM(input_size=input_size, hidden_size=self.hidden_dim, num_layers=1)
        self.fc = nn.Linear(in_features=self.hidden_dim, out_features=2)
        # [SeqLength, Batch, hidden_size]

    def forward(self, x):
        out,_ = self.lstm(x)
        out_fc = self.fc(out)
        # predictions = out[:, :, -1]
        # TODO add interpolation
        # predictions_interpolated = predictions
        return out_fc#predictions_interpolated


In [160]:
import torchvision.transforms as transforms
import torchvision.datasets as dsets
from torch.utils.data import DataLoader, random_split
from tqdm import tqdm
from statistics import mean


# dataset
data_path = "/Users/filipzarnecki/AiGames/data/prepared_data/data.json"#"/Users/filipzarnecki/AiGames/data/prepared_data/mock.csv"
trajectory_dataset = TrajectoryDataset(data_path)

train_len = round(len(trajectory_dataset)*0.9)
# print(train_len)
val_len = len(trajectory_dataset) - train_len
train, val = random_split(trajectory_dataset, [train_len, val_len], generator=torch.Generator().manual_seed(2022))


# data loaders
batch_size = 1
n_iters = 3000
num_epochs = n_iters / (len(trajectory_dataset) / batch_size)
num_epochs = int(num_epochs)

train_loader = DataLoader(dataset=train, 
                          batch_size=batch_size, 
                          shuffle=True)

val_loader = DataLoader(dataset=val, 
                        batch_size=batch_size, 
                        shuffle=False)


# vaiables
input_size = 7
hidden_dim = 50

model = LSTMModel(hidden_dim, input_size)


# check
# print(model)
# print(len(list(model.parameters())))
# for i in range(len(list(model.parameters()))):
#     print(list(model.parameters())[i].size())


# loss
# criterion = frechet_distance

# optimizer
learning_rate = 0.1

optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)  #sgd


# training
# Number of steps to unroll
# seq_dim = 28  

iter = 0
for epoch in range(num_epochs):
    for i, (input_vectors, labels) in enumerate(train_loader):
        # Clear gradients w.r.t. parameters
        optimizer.zero_grad()

        # Forward pass to get output/logits
        # outputs.size() --> 100, 10
        # print(input_vectors.shape)
        outputs = model(input_vectors)

        # Calculate Loss: softmax --> cross entropy loss
        # print(outputs.shape)
        # print(labels.shape)
        # outputs = torch.squeeze(outputs).numpy().astype(dtype="float64")
        # labels = torch.squeeze(labels).numpy().astype(dtype="float64")
        # print(outputs.shape)
        # print(labels.shape)
        # loss = frechet_distance(outputs, labels)

        # print(outputs.type())
        # print(labels.type())
        outputs = torch.nn.functional.interpolate(torch.unsqueeze(outputs, 1), size=labels.shape[1:])#
        # print(outputs.type())
        # print(labels.type())

        loss = torch.cdist(outputs, labels)
        loss = torch.sum(loss, (1,2,3))

        # Getting gradients w.r.t. parameters
        loss.backward()

        # Updating parameters
        optimizer.step()

        iter += 1

        if iter % 100 == 0:
            # Calculate Accuracy         
            frechet_distances = []
            # Iterate through test dataset
            for input_vectors, labels in val_loader:
                # Forward pass only to get logits/output
                outputs = model(input_vectors)

                outputs = torch.nn.functional.interpolate(torch.unsqueeze(outputs, 1), size=labels.shape[1:])
                # print(outputs.type())
                # print(labels.type())

                loss = torch.cdist(outputs, labels)
                loss = torch.sum(loss, (1,2,3))

            # Print Loss
            print('Iteration: {}. Loss: {}.'.format(iter, loss.item()))

Reading LSTM data..
Iteration: 100. Loss: 9898.75390625.
Iteration: 200. Loss: 9899.8486328125.
Iteration: 300. Loss: 9898.8115234375.
Iteration: 400. Loss: 9898.880859375.
Iteration: 500. Loss: 9899.3544921875.
Iteration: 600. Loss: 9898.6611328125.
Iteration: 700. Loss: 9898.6640625.
Iteration: 800. Loss: 9899.9638671875.
Iteration: 900. Loss: 9899.17578125.
Iteration: 1000. Loss: 9899.244140625.
Iteration: 1100. Loss: 9898.732421875.
Iteration: 1200. Loss: 9899.228515625.
Iteration: 1300. Loss: 9898.970703125.
Iteration: 1400. Loss: 9898.6484375.
Iteration: 1500. Loss: 9898.8125.
Iteration: 1600. Loss: 9900.548828125.
Iteration: 1700. Loss: 9899.15234375.
Iteration: 1800. Loss: 9898.654296875.
Iteration: 1900. Loss: 9898.6875.
Iteration: 2000. Loss: 9898.640625.
Iteration: 2100. Loss: 9898.73828125.
Iteration: 2200. Loss: 9898.69921875.
Iteration: 2300. Loss: 9898.68359375.
Iteration: 2400. Loss: 9899.5791015625.
Iteration: 2500. Loss: 9898.6533203125.
Iteration: 2600. Loss: 9898.64