<a href="https://colab.research.google.com/github/MengOonLee/Deep_learning/blob/master/PyTorch/Transformer/Tabular/TimeSeriesForecasting.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np

T = 1000
num_entities = 3
num_features = 2

data_dict = {}
for eid in range(num_entities):
    t = np.linspace(0, 100, T)
    f1 = np.sin(t / 5 + eid) + 0.1 * np.random.randn(T)
    f2 = np.cos(t / 7 + eid) + 0.1 * np.random.randn(T)
    data_dict[eid] = np.stack([f1, f2], axis=1)

data_dict

{0: array([[-0.07128091,  0.93790153],
        [ 0.20601548,  0.94605575],
        [ 0.00167567,  1.06531155],
        ...,
        [ 0.98205582, -0.13919762],
        [ 0.94552301, -0.04329776],
        [ 0.96030832, -0.05028652]]),
 1: array([[ 0.85276768,  0.6288676 ],
        [ 0.64769861,  0.62390474],
        [ 0.87543808,  0.57842596],
        ...,
        [ 0.9046985 , -0.96573103],
        [ 1.08769967, -0.76288693],
        [ 0.93924716, -0.95458607]]),
 2: array([[ 0.91502969, -0.29877488],
        [ 0.89600783, -0.33235187],
        [ 0.86294203, -0.38180884],
        ...,
        [ 0.06144888, -0.70264627],
        [-0.16449508, -0.74815663],
        [ 0.06712878, -0.77144457]])}

In [2]:
import torch
torch.manual_seed(seed=42)

class TimeSeriesDataset(torch.utils.data.Dataset):
    def __init__(self, data_dict, input_window, output_window):
        self.series = []
        self.entity_ids = []
        self.input_window = input_window
        self.output_window = output_window

        for eid, data in data_dict.items():
            for i in range(len(data) - input_window - output_window):
                x = data[i : i + input_window]
                y = data[i + input_window : i + input_window + output_window]
                self.series.append((x, y))
                self.entity_ids.append(eid)

        self.num_entities = len(data_dict)

    def __len__(self):
        return len(self.series)

    def __getitem__(self, idx):
        x, y = self.series[idx]
        entity_id = self.entity_ids[idx]
        return (
            torch.tensor(data=x, dtype=torch.float32),
            torch.tensor(data=y, dtype=torch.float32),
            torch.tensor(data=entity_id, dtype=torch.long)
        )

input_window = 48
output_window = 24
ds_train = TimeSeriesDataset(data_dict=data_dict,
    input_window=input_window, output_window=output_window)
dl_train = torch.utils.data.DataLoader(dataset=ds_train,
    batch_size=32, shuffle=True)
len(dl_train.dataset)

2784

In [12]:
import torch
torch.manual_seed(seed=42)

class PositionalEncoding(torch.nn.Module):
    def __init__(self, d_model, max_len=5000):
        super().__init__()
        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float32).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-np.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        self.register_buffer('pe', pe.unsqueeze(0))

    def forward(self, x):
        return x + self.pe[:, :x.size(1)]

class TimeSeriesForecast(torch.nn.Module):
    def __init__(self, num_entities, feature_size, entity_emb_dim=8, d_model=64,
            nhead=8, num_layers=3, dropout=0.1):
        super().__init__()
        self.entity_embedding = torch.nn.Embedding(num_embeddings=num_entities,
            embedding_dim=entity_emb_dim)
        self.input_proj = torch.nn.Linear(in_features=feature_size + entity_emb_dim,
            out_features=d_model)
        self.pos_encoder = PositionalEncoding(d_model=d_model)
        self.transformer = torch.nn.TransformerEncoder(num_layers=num_layers,
            encoder_layer=torch.nn.TransformerEncoderLayer(
                d_model=d_model, nhead=nhead, dim_feedforward=d_model*4,
                dropout=dropout, batch_first=True))
        self.decoder = torch.nn.Linear(in_features=d_model,
            out_features=feature_size * output_window)
        self.output_window = output_window
        self.feature_size = feature_size

    def forward(self, src, entity_id):
        B, T, F = src.shape
        entity_emb = self.entity_embedding(entity_id)
        entity_emb_expanded = entity_emb.unsqueeze(1).expand(-1, T, -1)
        src = torch.cat([src, entity_emb_expanded], dim=-1)

        src = self.input_proj(src)
        src = self.pos_encoder(src)
        enc_output = self.transformer(src)
        out = self.decoder(enc_output[:, -1, :])
        out = out.view(B, self.output_window, self.feature_size)
        return out

model = TimeSeriesForecast(num_entities=num_entities, feature_size=num_features)

In [13]:
import torch
torch.manual_seed(seed=42)

def train_model(model, dl_train, epochs=20, lr=1e-3):
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    criterion = torch.nn.MSELoss()

    for epoch in range(epochs):
        model.train()
        total_loss = 0
        for x, y, eid in dl_train:
            optimizer.zero_grad()
            preds = model(x, eid)
            loss = criterion(preds, y)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        avg_loss = total_loss / len(dl_train)
        print(f"Epoch {epoch+1}/{epochs}, Loss: {avg_loss:.4f}")

train_model(model, dl_train, epochs=20)

Epoch 1/10, Loss: 0.1958
Epoch 2/10, Loss: 0.0353
Epoch 3/10, Loss: 0.0231
Epoch 4/10, Loss: 0.0195
Epoch 5/10, Loss: 0.0181
Epoch 6/10, Loss: 0.0168
Epoch 7/10, Loss: 0.0164
Epoch 8/10, Loss: 0.0156
Epoch 9/10, Loss: 0.0154
Epoch 10/10, Loss: 0.0149


In [31]:
import torch
torch.manual_seed(seed=42)

def forecast(model, src, entity_id, pred_len):
    model.eval()
    with torch.no_grad():
        src = src.unsqueeze(0)
        entity_id = torch.tensor(data=[entity_id])
        preds = model(src, entity_id)
        return preds.squeeze(0).numpy()

i, id = 888, 1
x = torch.tensor(data=data_dict[id][i : i + input_window],
    dtype=torch.float32)
y = data_dict[id][i + input_window : i + input_window + output_window]
y_pred = forecast(model, x, entity_id=id, pred_len=output_window)
print("y:", y)
print("y_pred:", y_pred)

y: [[ 0.77091201 -0.13375599]
 [ 0.75352538 -0.12422246]
 [ 0.91731976 -0.27321802]
 [ 0.80422489 -0.30810425]
 [ 0.73318738 -0.42620719]
 [ 0.83282173 -0.26848276]
 [ 0.72980904 -0.38207294]
 [ 0.79906559 -0.10571329]
 [ 0.8199129  -0.56774012]
 [ 0.84259372 -0.20982637]
 [ 0.9824768  -0.39180723]
 [ 0.77390561 -0.28363771]
 [ 0.68105386 -0.42982536]
 [ 0.97853799 -0.46495799]
 [ 0.93230588 -0.66800758]
 [ 0.9558214  -0.38904966]
 [ 0.91957769 -0.64779903]
 [ 0.92163031 -0.60056508]
 [ 0.92840276 -0.57289515]
 [ 0.86390194 -0.5240665 ]
 [ 0.97676027 -0.6245762 ]
 [ 0.82367513 -0.39934494]
 [ 1.07931982 -0.36649008]
 [ 1.12059914 -0.65808368]]
y_pred: [[ 0.7346154  -0.2775444 ]
 [ 0.7617782  -0.27883735]
 [ 0.7337716  -0.3463002 ]
 [ 0.73953146 -0.34721524]
 [ 0.76058406 -0.3877237 ]
 [ 0.75093335 -0.34155047]
 [ 0.8083109  -0.36562157]
 [ 0.79522675 -0.35184228]
 [ 0.7815093  -0.39119858]
 [ 0.8243122  -0.37632406]
 [ 0.81537294 -0.4079032 ]
 [ 0.825135   -0.431915  ]
 [ 0.80610317 -0