# LSTM model

In [1]:
import torch.nn as nn

class LSTMClusterPredictor(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(LSTMClusterPredictor, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers=3, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        lstm_out, _ = self.lstm(x)
        out = self.fc(lstm_out[:, -1, :])
        return out


# Dataset

In [2]:
import pandas as pd
import torch

class SequenceDataset(torch.utils.data.Dataset):
    def __init__(self, group_name):
        df = pd.read_csv('data/clusters.csv', converters={'centroid': eval})
        df = df[df['group'] == group_name].sort_values('start_date')
        df['latitude'] = df['centroid'].apply(lambda x: x[0])
        df['longitude'] = df['centroid'].apply(lambda x: x[1])
        df = df.drop(columns=['group', 'start_date', 'centroid'])
        self.data = df
        self.sequence_length = 3
    
    def __len__(self):
        return len(self.data) - self.sequence_length - 1

    def __getitem__(self, idx):
        sequence = self.data.iloc[idx : idx + self.sequence_length].values
        target = self.data.iloc[idx + self.sequence_length + 1].values
        return torch.tensor(sequence, dtype=torch.float32), torch.tensor(target, dtype=torch.float32)

ds = SequenceDataset('infrastructure')
print(ds[3])
print(len(ds))


(tensor([[ 8.0000, 43.7226, 10.3892],
        [ 5.0000, 43.7228, 10.3924],
        [ 6.0000, 43.7202, 10.4003]]), tensor([ 7.0000, 43.7221, 10.3961]))
20


# Train

In [6]:
import torch
from torch.utils.data import DataLoader

ds = SequenceDataset('infrastructure')
dl = torch.utils.data.DataLoader(ds, batch_size=1, shuffle=False)

model = LSTMClusterPredictor(3, 300, 3)
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)
loss_fn = nn.MSELoss()

model = model.to("cuda")

for epoch in range(500):
    for serie, pred in dl:
        serie = serie.to("cuda")
        pred = pred.to("cuda")

        out = model(serie)
        loss = loss_fn(out, pred)
        acc = ((torch.abs(out - pred) / (torch.abs(pred) + 1e-8)) < 0.05).float().mean()

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    print(f"Epoch {epoch} Loss: {loss.item()} Acc: {acc}")

Epoch 0 Loss: 661.5756225585938 Acc: 0.0
Epoch 1 Loss: 591.831298828125 Acc: 0.0
Epoch 2 Loss: 448.4278259277344 Acc: 0.0
Epoch 3 Loss: 384.174072265625 Acc: 0.0
Epoch 4 Loss: 355.1712341308594 Acc: 0.0
Epoch 5 Loss: 334.2536315917969 Acc: 0.0
Epoch 6 Loss: 316.5251159667969 Acc: 0.0
Epoch 7 Loss: 300.69171142578125 Acc: 0.0
Epoch 8 Loss: 286.20697021484375 Acc: 0.0
Epoch 9 Loss: 272.767822265625 Acc: 0.0
Epoch 10 Loss: 260.18231201171875 Acc: 0.0
Epoch 11 Loss: 248.319580078125 Acc: 0.0
Epoch 12 Loss: 237.08526611328125 Acc: 0.0
Epoch 13 Loss: 226.40855407714844 Acc: 0.0
Epoch 14 Loss: 216.23448181152344 Acc: 0.0
Epoch 15 Loss: 206.5191650390625 Acc: 0.0
Epoch 16 Loss: 197.2263641357422 Acc: 0.0
Epoch 17 Loss: 188.3263397216797 Acc: 0.0
Epoch 18 Loss: 179.79464721679688 Acc: 0.0
Epoch 19 Loss: 171.61117553710938 Acc: 0.3333333432674408
Epoch 20 Loss: 163.75819396972656 Acc: 0.3333333432674408
Epoch 21 Loss: 156.219970703125 Acc: 0.3333333432674408
Epoch 22 Loss: 148.9822998046875 Acc: