In [8]:
%load_ext autoreload
%autoreload 2

In [9]:
!pip install transformers

Defaulting to user installation because normal site-packages is not writeable


In [1]:
x_len = 50
y_len = 5

In [2]:
import torch
from transformers import TimeSeriesTransformerModel, TimeSeriesTransformerForPrediction, TimeSeriesTransformerConfig

In [3]:
configuration = TimeSeriesTransformerConfig(context_length = x_len//2, prediction_length=y_len, input_size=3, num_time_features=1)

In [5]:
model = TimeSeriesTransformerForPrediction(configuration)

In [50]:
print(model.config)

TimeSeriesTransformerConfig {
  "activation_dropout": 0.1,
  "activation_function": "gelu",
  "attention_dropout": 0.1,
  "cardinality": [
    0
  ],
  "context_length": 5,
  "d_model": 64,
  "decoder_attention_heads": 2,
  "decoder_ffn_dim": 32,
  "decoder_layerdrop": 0.1,
  "decoder_layers": 2,
  "distribution_output": "student_t",
  "dropout": 0.1,
  "embedding_dimension": [
    0
  ],
  "encoder_attention_heads": 2,
  "encoder_ffn_dim": 32,
  "encoder_layerdrop": 0.1,
  "encoder_layers": 2,
  "feature_size": 10,
  "init_std": 0.02,
  "input_size": 1,
  "is_encoder_decoder": true,
  "lags_sequence": [
    1,
    2,
    3,
    4,
    5,
    6,
    7
  ],
  "loss": "nll",
  "model_type": "time_series_transformer",
  "num_dynamic_real_features": 0,
  "num_parallel_samples": 100,
  "num_static_categorical_features": 0,
  "num_static_real_features": 0,
  "num_time_features": 1,
  "prediction_length": 5,
  "scaling": "mean",
  "transformers_version": "4.35.0",
  "use_cache": true
}



In [6]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, TensorDataset
import pandas as pd

from dataset import TimeSeriesDataset
from models import LinearRegression

train_ds = TimeSeriesDataset('sample_data.csv', ['u', 'v', 'w'], x_len=x_len, y_len=y_len, train=True)
test_ds =  TimeSeriesDataset('sample_data.csv', ['u', 'v', 'w'], x_len=x_len, y_len=y_len, train=False)

train_loader = DataLoader(train_ds, batch_size = 64, shuffle=True)
test_loader = DataLoader(test_ds, batch_size = 64, shuffle=True)


In [8]:
print(len(train_loader))

6148


In [7]:
for x, y in train_loader:
    batch_x = x
    batch_y = y
    break


In [8]:
print(batch_x.shape)

torch.Size([64, 50, 3])


In [9]:
print(batch_y.shape)

torch.Size([64, 5, 3])


In [10]:
past_time_features = torch.zeros((batch_x.shape[0], batch_x.shape[1], 1))
for i in range(past_time_features.shape[1]):
    past_time_features[:, i, 0] = i
print(past_time_features)

tensor([[[ 0.],
         [ 1.],
         [ 2.],
         ...,
         [47.],
         [48.],
         [49.]],

        [[ 0.],
         [ 1.],
         [ 2.],
         ...,
         [47.],
         [48.],
         [49.]],

        [[ 0.],
         [ 1.],
         [ 2.],
         ...,
         [47.],
         [48.],
         [49.]],

        ...,

        [[ 0.],
         [ 1.],
         [ 2.],
         ...,
         [47.],
         [48.],
         [49.]],

        [[ 0.],
         [ 1.],
         [ 2.],
         ...,
         [47.],
         [48.],
         [49.]],

        [[ 0.],
         [ 1.],
         [ 2.],
         ...,
         [47.],
         [48.],
         [49.]]])


In [11]:
print(past_time_features.shape)

torch.Size([64, 50, 1])


In [12]:
past_observed_mask = torch.ones_like(batch_x)

In [13]:
future_time_features = torch.zeros((batch_y.shape[0], batch_y.shape[1], 1))
for i in range(future_time_features.shape[1]):
    future_time_features[:, i, 0] = x_len + i
print(future_time_features)

tensor([[[50.],
         [51.],
         [52.],
         [53.],
         [54.]],

        [[50.],
         [51.],
         [52.],
         [53.],
         [54.]],

        [[50.],
         [51.],
         [52.],
         [53.],
         [54.]],

        [[50.],
         [51.],
         [52.],
         [53.],
         [54.]],

        [[50.],
         [51.],
         [52.],
         [53.],
         [54.]],

        [[50.],
         [51.],
         [52.],
         [53.],
         [54.]],

        [[50.],
         [51.],
         [52.],
         [53.],
         [54.]],

        [[50.],
         [51.],
         [52.],
         [53.],
         [54.]],

        [[50.],
         [51.],
         [52.],
         [53.],
         [54.]],

        [[50.],
         [51.],
         [52.],
         [53.],
         [54.]],

        [[50.],
         [51.],
         [52.],
         [53.],
         [54.]],

        [[50.],
         [51.],
         [52.],
         [53.],
         [54.]],

        [[50.],


In [46]:
configuration = TimeSeriesTransformerConfig(prediction_length=y_len, input_size=3, num_time_features=1, output_hidden_states=True)

In [47]:
model = TimeSeriesTransformerForPrediction(configuration)

In [48]:
outputs = model.forward(past_values=batch_x, past_time_features=past_time_features, past_observed_mask=past_observed_mask, future_values=batch_y, future_time_features=future_time_features)

In [37]:
print(outputs.keys())

odict_keys(['loss', 'params', 'decoder_hidden_states', 'encoder_last_hidden_state', 'encoder_hidden_states', 'loc', 'scale', 'static_features'])


In [49]:
len(outputs.decoder_hidden_states)

4

In [50]:
print(outputs.decoder_hidden_states[-1].shape)

torch.Size([64, 5, 64])


In [23]:
print(outputs.encoder_last_hidden_state.shape)

torch.Size([64, 5, 64])


In [24]:
outputs = model.generate(past_values=batch_x, past_time_features=past_time_features, past_observed_mask=past_observed_mask, future_time_features=future_time_features)

In [29]:
print(outputs.sequences.mean(dim=1))

tensor([[[-1.2620e-01, -7.8750e-02, -3.7091e-02],
         [ 5.1275e-02,  1.5506e-01, -2.4697e-02],
         [-2.8426e-01, -7.7742e-02, -1.0211e-03],
         [-3.1630e-02,  1.9068e-01,  3.1609e-02],
         [-3.1701e-02,  3.5638e-02,  2.2462e-02]],

        [[-8.1774e-02, -3.8398e-01, -5.1182e-02],
         [-5.3227e-03,  7.1341e-01,  5.5498e-03],
         [-1.8639e-01,  9.6515e-01,  2.4757e-03],
         [-5.4802e-02,  7.1514e-01,  2.1431e-02],
         [-3.2799e-01,  2.3209e-01,  2.7596e-02]],

        [[-2.0681e-02,  3.6318e-02,  3.8466e-03],
         [-9.0670e-02,  9.9933e-02, -2.8738e-04],
         [ 3.4861e-02,  6.3668e-02,  2.4545e-03],
         [-8.4854e-02,  2.5873e-02, -1.3427e-03],
         [-1.9003e-02,  4.9199e-02,  6.4034e-03]],

        [[-1.4310e-01,  7.2943e-02, -1.3158e-01],
         [-1.0458e+00,  1.3994e-01,  2.4679e-03],
         [ 1.0294e+00,  2.1864e-01, -9.2971e-03],
         [-9.7609e-01,  6.5727e-02, -1.7412e-02],
         [-5.7752e-01,  7.7125e-02, -8.1046e

In [13]:
for epoch in range(30):
    avg_loss = 0
    num_batch = 0
    for batch_x, batch_y in train_loader:
        past_time_features = torch.zeros((batch_x.shape[0], batch_x.shape[1], 1))
        for i in range(past_time_features.shape[1]):
            past_time_features[:, i, 0] = i
        # print(past_time_features)

        past_observed_mask = torch.ones_like(batch_x)

        future_time_features = torch.zeros((batch_y.shape[0], batch_y.shape[1], 1))
        for i in range(future_time_features.shape[1]):
            future_time_features[:, i, 0] = x_len + i
        # print(future_time_features)

        outputs = model(past_values=batch_x, past_time_features=past_time_features, past_observed_mask=past_observed_mask, future_values=batch_y, future_time_features=future_time_features)
        loss = outputs.loss
        print(loss)
        loss.backward()

        avg_loss += loss
        num_batch += 1
    print("Epoch", epoch)
    print("Loss", avg_loss/num_batch)

    
    

tensor(3.8177, grad_fn=<DivBackward0>)
tensor(2.8533, grad_fn=<DivBackward0>)
tensor(3.7381, grad_fn=<DivBackward0>)
tensor(3.5779, grad_fn=<DivBackward0>)
tensor(3.4822, grad_fn=<DivBackward0>)
tensor(3.3144, grad_fn=<DivBackward0>)
tensor(3.5969, grad_fn=<DivBackward0>)
tensor(3.9496, grad_fn=<DivBackward0>)
tensor(3.5486, grad_fn=<DivBackward0>)
tensor(3.6138, grad_fn=<DivBackward0>)
tensor(3.1827, grad_fn=<DivBackward0>)
tensor(3.0984, grad_fn=<DivBackward0>)
tensor(3.3480, grad_fn=<DivBackward0>)
tensor(3.7535, grad_fn=<DivBackward0>)
tensor(3.2892, grad_fn=<DivBackward0>)
tensor(3.8992, grad_fn=<DivBackward0>)
tensor(4.1113, grad_fn=<DivBackward0>)
tensor(3.8435, grad_fn=<DivBackward0>)
tensor(3.7581, grad_fn=<DivBackward0>)
tensor(3.6424, grad_fn=<DivBackward0>)
tensor(3.8677, grad_fn=<DivBackward0>)
tensor(3.7115, grad_fn=<DivBackward0>)
tensor(3.9467, grad_fn=<DivBackward0>)
tensor(3.4640, grad_fn=<DivBackward0>)
tensor(3.3617, grad_fn=<DivBackward0>)
tensor(3.5547, grad_fn=<D

: 

In [18]:
print(past_time_features)

tensor([[[ 0.],
         [ 1.],
         [ 2.],
         ...,
         [47.],
         [48.],
         [49.]],

        [[ 0.],
         [ 1.],
         [ 2.],
         ...,
         [47.],
         [48.],
         [49.]],

        [[ 0.],
         [ 1.],
         [ 2.],
         ...,
         [47.],
         [48.],
         [49.]],

        ...,

        [[ 0.],
         [ 1.],
         [ 2.],
         ...,
         [47.],
         [48.],
         [49.]],

        [[ 0.],
         [ 1.],
         [ 2.],
         ...,
         [47.],
         [48.],
         [49.]],

        [[ 0.],
         [ 1.],
         [ 2.],
         ...,
         [47.],
         [48.],
         [49.]]])


In [58]:
outputs = model(past_values=batch_x, past_time_features = past_time_features, past_observed_mask=past_observed_mask, future_values=batch_y,  future_time_features = future_time_features)

Details
12
5
torch.Size([64, 5, 3])
torch.Size([64, 5, 1])
3
torch.Size([64, 3])
torch.Size([64, 3])
Static feat: torch.Size([64, 6])
torch.Size([64, 10, 6])
torch.Size([64, 10, 1])
torch.Size([64, 10, 7])
torch.Size([64, 10, 21])
torch.Size([64, 10, 1])


In [62]:
print(outputs.last_hidden_state.shape)

torch.Size([64, 5, 64])


In [63]:
import torch.nn as nn
linear_layer = nn.Linear(64, 3)

In [65]:
linear_outputs = (linear_layer(outputs.last_hidden_state))

In [66]:
print(linear_outputs.shape)

torch.Size([64, 5, 3])


In [68]:
print(batch_y.shape)

torch.Size([64, 5, 3])


In [71]:
import losses


In [73]:
loss = (losses.MSE_loss(linear_outputs, batch_y))

In [74]:
loss.backward()

In [3]:
import torch.nn as nn

In [60]:
class TransformerModel(nn.Module):

    def __init__(self, input_width, input_length, output_length = 1, hidden_size=64):
        super(TransformerModel, self).__init__()

        self.name = "Transformer"
        self.width = input_width
        self.length = input_length
        self.output_length = output_length
        configuration = TimeSeriesTransformerConfig(prediction_length=output_length, input_size=input_width, context_length=input_length//2, num_time_features=1, d_model=hidden_size)
        self.transformer = TimeSeriesTransformerForPrediction(configuration)
        self.linear = nn.Linear(hidden_size, input_width)


    def forward(self, x):
        past_time_features = torch.zeros((x.shape[0], x.shape[1], 1))
        for i in range(past_time_features.shape[1]):
            past_time_features[:, i, 0] = i

        past_observed_mask = torch.ones_like(x)
        future_observed_mask = torch.zeros((x.shape[0], self.output_length, self.width))
        future_values = torch.zeros((x.shape[0], self.output_length, self.width))
        
        future_time_features = torch.zeros((x.shape[0], self.output_length, 1))
        for i in range(future_time_features.shape[1]):
            future_time_features[:, i, 0] = x.shape[1] + 1
        
        transformer_outs = self.transformer.forward(past_values=x, past_time_features=past_time_features, past_observed_mask=past_observed_mask, future_values=future_values, future_observed_mask=future_observed_mask, future_time_features=future_time_features)
        # print("Transformer outs:", transformer_outs.encoder_last_hidden_state)
        final = self.linear(transformer_outs.encoder_last_hidden_state)
        
        return final

In [61]:
model = TransformerModel(3, 50, output_length=5)

In [63]:
out = model(batch_x)

Details
32
25
3
torch.Size([64, 3])
torch.Size([64, 3])
Static feat: torch.Size([64, 6])
torch.Size([64, 30, 6])
torch.Size([64, 30, 1])
torch.Size([64, 30, 7])
torch.Size([64, 30, 21])
torch.Size([64, 30, 1])
Transformer outs: tensor([[[-0.8454, -0.7658, -0.9050,  ...,  0.4679,  1.8213, -0.4274],
         [ 0.5682,  0.2422, -0.1665,  ...,  0.4348,  1.8297, -0.5905],
         [ 0.7410,  0.6464,  0.3506,  ...,  0.3658,  1.7525, -0.7766],
         ...,
         [-0.3293, -1.1190, -1.0771,  ...,  0.5427,  1.9099, -0.7080],
         [-1.0992, -1.3562, -0.4832,  ...,  0.5345,  1.9909, -0.6660],
         [-1.2448, -1.0995, -0.1293,  ...,  0.4949,  2.0194, -0.7563]],

        [[-0.1312, -0.9330, -1.6273,  ...,  0.4498,  1.8341, -0.1581],
         [ 0.6346,  0.2994, -0.7922,  ...,  0.4944,  1.9750, -0.2670],
         [ 0.8396,  0.7446, -0.2492,  ...,  0.4406,  1.9686, -0.4193],
         ...,
         [-0.1370, -1.2213,  0.0218,  ..., -0.0350,  2.0296, -0.8359],
         [-0.9637, -1.3752, -0.7

In [64]:
print(out.shape)

torch.Size([64, 25, 3])
