## example implementation

In [None]:
#import statements
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torch_geometric_temporal.nn.recurrent import AGCRN

## initalisation

In [12]:
class MyTradeDataset(Dataset):
    def __init__(self, num_samples, T, num_nodes, in_channels, num_sectors):
        """
        For demonstration, we create random data:
        X shape: [num_samples, T, num_nodes, in_channels]
        Y shape: [num_samples, num_nodes, num_sectors]
        """
        self.X = torch.randn(num_samples, T, num_nodes, in_channels)
        self.Y = torch.randn(num_samples, num_nodes, num_sectors)

    def __len__(self):
        return self.X.shape[0]

    def __getitem__(self, idx):
        return self.X[idx], self.Y[idx]

## training algo

In [13]:
import torch
import torch.nn as nn
import torch.optim as optim

# Hyperparameters
num_samples=50
T=10 #no. of time steps
num_nodes = 10         # e.g. 10 country pairs
num_sectors=5
node_features = 2        # sentiment & FBIC
in_channels = num_sectors + node_features
out_channels = 8       # hidden dimension in AGCRN
K = 3                  # polynomial filter size
embedding_dims = 4     # dimension of each node's embedding
num_epochs = 5
batch_size = 4
lr = 1e-3

dataset = MyTradeDataset(
    num_samples=num_samples,
    T=T,
    num_nodes=num_nodes,
    in_channels=in_channels,
    num_sectors=num_sectors
)
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

# 2) Instantiate AGCRN
model = AGCRN(
    number_of_nodes=num_nodes,
    in_channels=in_channels,
    out_channels=out_channels,
    K=K,
    embedding_dimensions=embedding_dims
)

# 3) Create the node embedding E separately (following your interface).
#    We'll just do a random init. This is learnable, so we wrap it in nn.Parameter.
E = nn.Parameter(torch.randn(num_nodes, embedding_dims), requires_grad=True)

# 4) A simple linear "prediction head" to map from the hidden dimension (out_channels) -> 1
prediction_head = nn.Linear(out_channels, num_sectors)

# 5) Combine everything in a single optimizer. We must include the node embedding (E) as well.
optimizer = optim.Adam(
    list(model.parameters()) + list(prediction_head.parameters()) + [E],
    lr=lr
)

criterion = nn.MSELoss()

# 6) Training loop
for epoch in range(num_epochs):
    model.train()
    total_loss = 0.0

    for X_batch, Y_batch in dataloader:
        # X_batch: [batch_size, num_nodes, in_channels]
        # Y_batch: [batch_size, num_nodes, 1]

        optimizer.zero_grad()

        H = None
        # Unroll over T time steps
        for t in range(T):
            X_t = X_batch[:, t, :, :]  # [batch_size, num_nodes, in_channels]
            H = model(X_t, E, H)  # H is the hidden state, E is the node embedding
            

        # Now map from [out_channels] -> 1 dimension
        # We'll do this for each node:
        Y_pred = prediction_head(H)

        # Compute MSE loss with target
        loss = criterion(Y_pred, Y_batch)

        # Backprop & update
        loss.backward()
        optimizer.step()

        total_loss += loss.item() * X_batch.size(0)

    avg_loss = total_loss / len(dataset)
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {avg_loss:.4f}")

print("Training complete!")

Epoch [1/5], Loss: 1.1194
Epoch [2/5], Loss: 1.0829
Epoch [3/5], Loss: 1.0574
Epoch [4/5], Loss: 1.0366
Epoch [5/5], Loss: 1.0182
Training complete!
