In [3]:
import numpy as np
import pandas as pd
import plotly.express as px

In [4]:
data = np.load('../data/metr_la_new.npz')

In [5]:
data['edges']

array([[  0,  37],
       [  0,  54],
       [  0, 116],
       ...,
       [206, 155],
       [206, 159],
       [206, 163]], dtype=int32)

In [6]:
dataset = data['targets']
dataset_size = len(dataset)

In [7]:
for i in range(0, len(dataset)):
    for j in range(0, len(dataset[i])):
        if np.isnan(dataset[i][j]):
            dataset[i][j] = dataset[max(i - 1, 0)][j]

In [8]:
import torch 
import random
from torch.utils.data import TensorDataset, DataLoader


In [22]:
device = "cpu" if torch.cuda.is_available() else "cpu"
device

'cpu'

In [23]:
train_size = int(0.6 * dataset_size)
test_size =  int(0.2 * dataset_size)
vertice = 0
dataset_for_vertice = dataset[:, vertice]

In [24]:
coef = 20
pred_cnt = 12

In [25]:
samples_train = [i for i in random.sample(range(coef, dataset_size - pred_cnt), train_size)]

X_train = [dataset_for_vertice[i - coef: i] for i in samples_train]
X_train = torch.tensor(X_train).to(device)

y_train = [dataset_for_vertice[i:i + pred_cnt] for i in samples_train]
y_train = torch.tensor(y_train).to(device)

samples_test = [i for i in random.sample(range(coef, dataset_size - pred_cnt), test_size)]
X_test = [dataset_for_vertice[i - coef: i] for i in samples_test]
X_test = torch.tensor(X_test).to(device)

y_test = [dataset_for_vertice[i:i + pred_cnt] for i in samples_test]
y_test = torch.tensor(y_test).to(device)

In [26]:
print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)
print(y_train)
print(y_test)


torch.Size([20563, 20]) torch.Size([20563, 12])
torch.Size([6854, 20]) torch.Size([6854, 12])
tensor([[62.8571, 63.7500, 68.3333,  ..., 66.0000, 54.5000, 60.7778],
        [63.5000, 66.7500, 66.0000,  ..., 65.3333, 65.3333, 60.7500],
        [67.1250, 65.7778, 67.2500,  ..., 67.1111, 69.0000, 67.4444],
        ...,
        [22.6667, 18.5556, 19.0000,  ..., 66.2222, 66.4444, 68.2500],
        [23.2500, 21.8889, 19.0000,  ..., 16.5000, 18.2222, 16.1250],
        [60.0000, 60.1250, 63.1250,  ..., 56.8889, 63.2500, 59.5556]])
tensor([[67.1250, 64.8750, 66.8889,  ..., 64.3750, 64.5556, 62.2500],
        [65.3333, 65.3333, 65.3333,  ..., 65.3333, 65.3333, 65.3333],
        [67.5000, 67.5000, 67.5000,  ..., 67.5000, 67.5000, 67.5000],
        ...,
        [65.6250, 66.3333, 64.6250,  ..., 66.1250, 66.6667, 67.2500],
        [61.8889, 65.2500, 56.8889,  ..., 61.5000, 64.0000, 63.1250],
        [62.5000, 65.4444, 64.3750,  ..., 63.7778, 62.6250, 62.5556]])


In [27]:
train_data_set = TensorDataset(X_train, y_train)
train_dataloader = DataLoader(train_data_set,
                        shuffle = True,
                        batch_size = 8)

test_data_set = TensorDataset(X_test, y_test)
test_dataloader = DataLoader(test_data_set,
                        shuffle = True,
                        batch_size = 1)

In [28]:
from torch import nn

model_1 = nn.Linear(coef, 1)
model_1.to(device)

model_1 = torch.compile(model_1)


In [107]:
optimizer = torch.optim.Adam(params = model_1.parameters(), lr = 1e-7, weight_decay=1e-5)
loss_fn = nn.L1Loss()

In [108]:
from tqdm import tqdm

In [109]:
torch.set_float32_matmul_precision('high')

In [110]:
def make_a_guess(model, X):
    ans = []
    updated_X = X
    for k in range(pred_cnt):
        ans.append(model(updated_X).to(device))
        updated_X = updated_X[:, :-1]
        updated_X = torch.cat([updated_X, ans[-1]], dim = 1)
    ans = torch.stack(ans, dim = 0).to(device)
    ans = torch.permute(ans, (1, 0, 2)).squeeze(dim = 2)
    return ans

In [111]:
epochs = 100


losses = []

for i in tqdm(range(epochs)):
    for x, y in train_dataloader:
        y_pred = make_a_guess(model = model_1, X=x)
        loss = loss_fn(y_pred, y).to(device)
        losses.append(loss.item())
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()


100%|██████████| 100/100 [04:50<00:00,  2.90s/it]


In [104]:
px.line(y = np.array(losses))

In [112]:
np.array(losses).mean()

3.8064255419727058

In [113]:
model_1.eval()
test_losses_using_model = []
with torch.inference_mode():
    for x, y in test_dataloader:
        y_pred = make_a_guess(model = model_1, X=x)
        loss = loss_fn(y_pred, y).to(device)
        test_losses_using_model.append(loss.item())

mean_loss_using_model = np.array(test_losses_using_model).mean()
mean_loss_using_model

3.7863373579636

In [47]:
type(dataset_for_vertice)

numpy.ndarray

In [58]:
average_val = torch.tensor(dataset_for_vertice.mean())
test_losses_using_average = []

for x, y in test_dataloader:
    y_pred = torch.stack([average_val] * pred_cnt, dim = 0).unsqueeze(dim = 0).to(device)
    loss = loss_fn(y_pred, y).to(device)
    test_losses_using_average.append(loss.item())


mean_loss_using_average = np.array(test_losses_using_average).mean()
mean_loss_using_average

6.6828109166922705

In [66]:
test_losses_using_last = []

for x, y in test_dataloader:
    last_val = x[:, -1]
    y_pred = torch.stack([last_val] * pred_cnt, dim = 0).unsqueeze(dim = 0).squeeze(dim = 2).to(device)
    loss = loss_fn(y_pred, y).to(device)
    test_losses_using_last.append(loss.item())


mean_loss_using_last = np.array(test_losses_using_last).mean()
mean_loss_using_last

3.431972785834098

In [91]:
from random import randint

In [92]:
test_losses_using_week_before = []
week_length = 12 * 24 * 7

for i in range(test_size):
    x = randint(week_length, dataset_for_vertice.shape[0] - 1)
    y = torch.tensor(dataset_for_vertice[x]).to(device)
    y_pred = torch.tensor(dataset_for_vertice[x - week_length]).to(device)
    loss = loss_fn(y_pred, y).to(device)
    test_losses_using_week_before.append(loss.item())

mean_loss_using_week_before = np.array(test_losses_using_week_before).mean()
mean_loss_using_week_before

4.732816902374365

In [114]:
from pandas import DataFrame as df

In [117]:
print("Average error for next hour guesses")
print(f"mean_loss_using_average: {mean_loss_using_average}")
print(f"mean_loss_using_last: {mean_loss_using_last}")
print(f"mean_loss_using_week_before :{mean_loss_using_week_before}")
print(f"mean_loss_using_model: {mean_loss_using_model}")


Average error for next hour guesses
mean_loss_using_average: 6.6828109166922705
mean_loss_using_last: 3.431972785834098
mean_loss_using_week_before :4.732816902374365
mean_loss_using_model: 3.7863373579636


In [128]:
comparison = df({"mean_loss_using_average": [mean_loss_using_average.item()],
                 "mean_loss_using_last": [mean_loss_using_last.item()],
                 "mean_loss_using_week_before": [mean_loss_using_week_before.item()],
                 "mean_loss_using_model": [mean_loss_using_model.item()]})
comparison.T

Unnamed: 0,0
mean_loss_using_average,6.682811
mean_loss_using_last,3.431973
mean_loss_using_week_before,4.732817
mean_loss_using_model,3.786337
