In [2]:
import spektral

from spektral.layers import GraphConv
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense

2023-12-24 01:30:05.139131: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2023-12-24 01:30:05.139173: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2023-12-24 01:30:05.140774: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2023-12-24 01:30:05.277451: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [4]:
import torch
import torch.nn.functional as F
import torch.optim as optim
from torch_geometric.data import Data
from torch_geometric.nn import GCNConv

# Example node features (each row corresponds to a node's feature)
# Replace these with your actual data
node_features = torch.tensor([[2.5, 3.0], [5.5, 4.2], [3.1, 3.7]], dtype=torch.float)

# Example edge index (defining the graph structure)
edge_index = torch.tensor([[0, 1, 2, 0], # This is going to be the graph direction. edge 0 --> 
                           [1, 0, 1, 2]], 
                           dtype=torch.long)

# Example static edge weights
# Replace these with your actual data or a method to compute them
edge_weights = torch.tensor([0.5, 0.7, 0.9, 0.4], dtype=torch.float) # Weights of each edge connection.

# Creating a graph with edge weights
graph = Data(x=node_features, edge_index=edge_index, edge_attr=edge_weights)

# GCN Model Definition
class GCN(torch.nn.Module):
    def __init__(self):
        super(GCN, self).__init__()
        self.conv1 = GCNConv(graph.num_node_features, 16)
        self.conv2 = GCNConv(16, 1)  # Output size is 1

    def forward(self, data):
        x, edge_index, edge_weight = data.x, data.edge_index, data.edge_attr

        x = self.conv1(x, edge_index, edge_weight)
        x = F.relu(x)
        x = F.dropout(x, training=self.training)
        x = self.conv2(x, edge_index, edge_weight)

        return x

# Initialize model, optimizer, and loss function
model = GCN()
optimizer = optim.Adam(model.parameters(), lr=0.01)
criterion = torch.nn.MSELoss()

# Dummy target values (prices) for demonstration
# Replace these with your actual target values
target_values = torch.tensor([10.0, 12.0, 11.5], dtype=torch.float)

# Training Loop
for epoch in range(200):
    model.train()
    optimizer.zero_grad()
    out = model(graph)
    loss = criterion(out.view(-1), target_values)
    loss.backward()
    optimizer.step()

    if epoch % 10 == 0:
        print(f"Epoch {epoch}, Loss: {loss.item()}")

# Evaluate the model
model.eval()
predicted_values = model(graph)


Epoch 0, Loss: 118.65608978271484
Epoch 10, Loss: 68.46800231933594
Epoch 20, Loss: 5.2134575843811035
Epoch 30, Loss: 4.327686786651611
Epoch 40, Loss: 3.3659143447875977
Epoch 50, Loss: 11.481392860412598
Epoch 60, Loss: 61.27983093261719
Epoch 70, Loss: 7.159087657928467
Epoch 80, Loss: 5.361532211303711
Epoch 90, Loss: 24.83232307434082
Epoch 100, Loss: 6.735262393951416
Epoch 110, Loss: 14.21876049041748
Epoch 120, Loss: 12.98305606842041
Epoch 130, Loss: 6.100507736206055
Epoch 140, Loss: 10.257633209228516
Epoch 150, Loss: 28.92128562927246
Epoch 160, Loss: 33.12507247924805
Epoch 170, Loss: 3.5632517337799072
Epoch 180, Loss: 7.786409854888916
Epoch 190, Loss: 0.00377857219427824
tensor([[ 9.7425],
        [11.5521],
        [ 8.7654]], grad_fn=<AddBackward0>)


In [3]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch_geometric.nn import GCNConv
from torch.nn import TransformerEncoder, TransformerEncoderLayer
import math

class PositionalEncoding(nn.Module):
    def __init__(self, d_model, dropout=0.1, max_len=5000):
        super(PositionalEncoding, self).__init__()
        self.dropout = nn.Dropout(p=dropout)

        position = torch.arange(max_len).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2) * (-math.log(10000.0) / d_model))
        pe = torch.zeros(max_len, 1, d_model)
        pe[:, 0, 0::2] = torch.sin(position * div_term)
        pe[:, 0, 1::2] = torch.cos(position * div_term)
        self.register_buffer('pe', pe)

    def forward(self, x):
        x = x + self.pe[:x.size(0)]
        return self.dropout(x)

class GNNTransformer(nn.Module):
    def __init__(self, num_node_features, num_classes, num_heads=4, num_layers=2, dim_feedforward=128):
        super(GNNTransformer, self).__init__()
        
        # GNN part
        self.conv1 = GCNConv(num_node_features, 16)
        self.conv2 = GCNConv(16, 16)

        # Transformer part
        self.pos_encoder = PositionalEncoding(16, dropout=0.1)
        transformer_layer = TransformerEncoderLayer(d_model=16, nhead=num_heads, dim_feedforward=dim_feedforward)
        self.transformer_encoder = TransformerEncoder(transformer_layer, num_layers=num_layers)

        # Output layer
        self.out = nn.Linear(16, num_classes)

    def forward(self, data):
        # GNN part
        x, edge_index = data.x, data.edge_index
        x = F.relu(self.conv1(x, edge_index))
        x = F.dropout(x, training=self.training)
        x = self.conv2(x, edge_index)

        # Reshape for Transformer
        x = x.unsqueeze(1)  # Add a fake sequence dimension (suitable for Transformer)

        # Transformer part
        x = self.pos_encoder(x)
        x = self.transformer_encoder(x)

        # Output layer
        x = x.squeeze(1)  # Remove the fake sequence dimension
        x = self.out(x)
        return x

# Example usage
num_node_features = 3  # Replace with your actual number of node features
num_classes = 1  # Replace with your actual number of classes (for classification) or 1 (for regression)

model = GNNTransformer(num_node_features, num_classes)
optimizer = optim.Adam(model.parameters(), lr=0.01)

# Dummy data for demonstration
node_features = torch.rand(10, num_node_features)  # 10 nodes, each with 'num_node_features' features
edge_index = torch.randint(0, 10, (2, 20))  # 20 edges randomly connected
data = torch_geometric.data.Data(x=node_features, edge_index=edge_index)

# Generate synthetic labels for demonstration (random integers for classification)
labels = torch.randint(0, num_classes, (10,))  # Assuming 10 nodes

# Training loop
for epoch in range(100):
    model.train()
    optimizer.zero_grad()
    out = model(data)
    loss = F.cross_entropy(out, labels)  # Use cross_entropy for classification
    loss.backward()
    optimizer.step()
    if epoch % 10 == 0:
        print(f"Epoch {epoch}, Loss: {loss.item()}")

Epoch 0, Loss: 0.7563823461532593
Epoch 10, Loss: 0.6653655767440796
Epoch 20, Loss: 0.6533530950546265
Epoch 30, Loss: 0.5221158266067505
Epoch 40, Loss: 0.4485265612602234
Epoch 50, Loss: 0.49332723021507263
Epoch 60, Loss: 0.38029158115386963
Epoch 70, Loss: 0.5505840182304382
Epoch 80, Loss: 0.3836038112640381
Epoch 90, Loss: 0.5117744207382202


# PyTorch GNN-Transformer Model Explanation

## Importing Libraries
- `torch`: The main PyTorch library, used for tensor operations and neural network components.
- `torch.nn`: Submodule containing classes to help create neural network layers.
- `torch.optim`: Contains optimizers like Adam, used for updating network weights.
- `torch.nn.functional`: Functional API that includes methods like activation functions.
- `torch_geometric.nn`: PyTorch Geometric's neural network module, specifically for graph neural networks.
- `math`: Standard Python library for mathematical operations.

## Positional Encoding Class
- Adds positional encodings to inputs, critical in Transformer models. Positional encodings give a sense of order or position to the model.
- `__init__`: Initializes the positional encoding with a specific dropout rate and precomputes positional encodings.
- `forward`: Adds positional encodings to the input tensor `x` and applies dropout.

## GNNTransformer Class
- A custom neural network module combining a Graph Convolutional Network (GCN) with a Transformer.
- `__init__`: Initializes GCN layers, positional encoding, transformer encoder, and final output linear layer.
- `forward`: Defines the forward pass. Processes input data using GCN layers, reshapes for the Transformer, applies Transformer encoder, and outputs through a linear layer.

## Model Initialization
- `num_node_features` and `num_classes`: Number of features per node and output classes.
- Creates an instance of `GNNTransformer` and sets up an Adam optimizer.

## Data Preparation
- Synthetic node features and edge indices created to represent a graph.
- `labels`: Random synthetic labels for each node, used as training targets.

## Training Loop
- Trains the model for a number of epochs.
- In each epoch, zeroes optimizer gradients, runs forward pass, calculates loss using cross-entropy, and backpropagates to update model parameters.
- Prints the loss every 10 epochs to monitor progress.


# Training GNN-Transformer Model for Time Series Prediction

## 1. Determining the Lag Length
- Determine the optimal lag length for the time series data.
- The lag length can be based on domain knowledge, experimentation, or time series cross-validation.

## 2. Preparing the Training Data
- Structure the training data so that each prediction uses the previous `N` days (lag length) of data.
- For each node, create input sequences of node features over the lag window.
- The target for each sequence is the market price at the next time point.

## 3. Sequential Training Approach
- Train the model using a rolling window approach, where it predicts the next point based on the previous lagged sequence.
- For example, use time points 1 to 10 to predict point 11, then use points 2 to 11 to predict point 12, and so on.

## 4. Handling Sequential Data in Transformers
- The Transformer processes these sequences with positional encodings that reflect the sequence's order.
- It learns patterns within these sequences, aided by the graph context from the GNN.

## 5. Training Batches and Randomization
- Randomize the order of training samples for better generalization.
- Maintain the correct temporal order within each individual sequence.

## 6. Model Evaluation and Validation
- Use a validation set to evaluate performance on unseen data.
- Consider time series cross-validation for effective hyperparameter tuning.

## 7. Additional Considerations
- Be aware of the risks of overfitting and apply regularization techniques as needed.
- Preprocess financial time series data carefully and consider including additional relevant features.

This training approach leverages the temporal dynamics of each entity and the relational dynamics within the graph, suitable for financial time series data.


# Using Positional Encodings for Sequential Time Series Data

## Purpose of Positional Encodings
- Transformers lack an inherent mechanism to recognize the order of inputs in a sequence. Positional encodings provide information about the sequence order to the Transformer.

## Generating Positional Encodings
- Positional encodings are generated using sinusoidal functions of different frequencies for each position `pos` in the sequence and dimension `i` in the embedding.
- The formulas for positional encoding `pos_enc` are:
  - `pos_enc(pos, 2i) = sin(pos / 10000^(2i/d_model))`
  - `pos_enc(pos, 2i+1) = cos(pos / 10000^(2i/d_model))`
- `d_model` is the dimension of the embedding, and `pos` is the position in the sequence.

## Applying Positional Encodings
- The positional encoding vector is added to the input embedding vector for each time step in the sequence.
- The `pos` value corresponds to the temporal order in the sequence.
- For different time steps in a sequence, `pos` is calculated accordingly (e.g., 0 for the first day, 1 for the second day, etc.).

## Handling Different Sequence Lengths
- Generate positional encodings up to the maximum sequence length to accommodate sequences of different lengths.
- Apply masking in the Transformer to ignore padded positions in sequences.

## Training and Prediction
- Ensure that each input sequence to the Transformer has the correct positional encodings added during both training and prediction.
- This approach allows the model to understand the temporal relationship between different time steps in the sequence.

Positional encodings are crucial for the Transformer to understand the order of events in time series data, making more informed predictions.
