<a href="https://colab.research.google.com/github/OneFineStarstuff/State-of-the-Art/blob/main/Transformers_for_Time_Series_Forecasting.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim

class TimeSeriesTransformer(nn.Module):
    def __init__(self, input_dim, model_dim, num_heads, num_layers, output_dim, dropout=0.1):
        super(TimeSeriesTransformer, self).__init__()
        self.transformer = nn.Transformer(
            d_model=model_dim,
            nhead=num_heads,
            num_encoder_layers=num_layers,
            num_decoder_layers=num_layers,
            dropout=dropout,
            batch_first=True
        )
        self.input_proj = nn.Linear(input_dim, model_dim)
        self.output_proj = nn.Linear(model_dim, output_dim)

    def forward(self, src, tgt):
        src = self.input_proj(src)
        tgt = self.input_proj(tgt)
        output = self.transformer(src, tgt)
        output = self.output_proj(output)
        return output

# Define model parameters
input_dim = 1
model_dim = 64
num_heads = 4
num_layers = 3
output_dim = 1

# Check device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Initialize model
model = TimeSeriesTransformer(input_dim, model_dim, num_heads, num_layers, output_dim).to(device)
optimizer = optim.Adam(model.parameters(), lr=1e-3)
criterion = nn.MSELoss()

# Dummy data for training
src = torch.rand(32, 10, input_dim).to(device)  # 32 sequences, 10 time steps, 1 feature
tgt = torch.rand(32, 10, input_dim).to(device)  # 32 sequences, 10 time steps, 1 feature
y = torch.rand(32, 10, output_dim).to(device)   # 32 sequences, 10 time steps, 1 feature

# Training loop
for epoch in range(100):
    optimizer.zero_grad()
    output = model(src, tgt)
    loss = criterion(output, y)
    loss.backward()
    optimizer.step()
    print(f"Epoch {epoch+1}, Loss: {loss.item()}")