In [None]:
import torch
from IPython.display import clear_output
pt_version = torch.__version__
print(pt_version)

In [None]:
import torch
from torch import nn
from torch_geometric_temporal.nn.attention.mtgnn import MTGNN
import torch
import numpy as np

def main():
    # ---------------------------------------------
    # 1. Configuration & Hyperparameters
    # ---------------------------------------------
    num_nodes = 10         # e.g., 10 countries
    seq_length = 12        # 12 time steps of historical data
    in_dim = 5             # e.g., 5 features per country (5 sectors of imports)
    out_dim = 1            # e.g., 1 feature to predict (export volume) 
                           # or set = # of sectors if you predict multiple export sectors
    batch_size = 4

    # Since we want to supply our own adjacency (based on trade relationships),
    # we set build_adj=False so the model won't learn a new adjacency.
    model = MTGNN(
        gcn_true=True,
        build_adj=False,       # <--- use your adjacency, do not learn a new one
        gcn_depth=2,
        num_nodes=num_nodes,
        kernel_set=[2],        # minimal kernel set for demonstration
        kernel_size=2,
        dropout=0.3,
        subgraph_size=5,
        node_dim=16,
        dilation_exponential=1,
        conv_channels=16,
        residual_channels=16,
        skip_channels=32,
        end_channels=64,
        seq_length=seq_length,
        in_dim=in_dim,
        out_dim=out_dim,
        layers=1,
        propalpha=0.05,
        tanhalpha=3.0,
        layer_norm_affline=True,
    )

    # ---------------------------------------------
    # 2. Define Trade-Based Adjacency Matrix
    # ---------------------------------------------
    # Suppose you already have an N x N matrix of trade scores.
    # Here we create a dummy adjacency for illustration:
    A_tilde = torch.rand(num_nodes, num_nodes)

    # Potentially, you might want to symmetrize or row-normalize it:
    # For example, row-normalize so each row sums to 1:
    A_tilde = A_tilde / A_tilde.sum(dim=1, keepdim=True).clamp(min=1e-9)

    # ---------------------------------------------
    # 3. Create Dummy Input Data
    # ---------------------------------------------
    # Shape is (batch_size, in_dim, num_nodes, seq_length).
    # This means 'batch_size' examples, each example has 'in_dim' features for each node,
    # and we have 'seq_length' time steps.
    x_in = torch.randn(batch_size, in_dim, num_nodes, seq_length)

    # ---------------------------------------------
    # 4. Forward Pass
    # ---------------------------------------------
    # Pass your adjacency as A_tilde. Model won't try to rebuild adjacency if build_adj=False.
    output = model(x_in, A_tilde=A_tilde)

    print("Input shape:", x_in.shape)
    print("Adjacency shape:", A_tilde.shape)
    print("Output shape:", output.shape)

    # If out_dim=1, output is (batch_size, 1, num_nodes, some_time) 
    # or just 1 time step, depending on your setup.

if __name__ == "__main__":
    main()

Input shape: torch.Size([4, 5, 10, 12])
Adjacency shape: torch.Size([10, 10])
Output shape: torch.Size([4, 1, 10, 1])


## Base off this implementation

In [None]:

import torch
from torch import nn
from torch_geometric_temporal.nn.attention.mtgnn import MTGNN
import torch
import numpy as np

def main():
    # ---------------------------------------------
    # 1. Configuration & Hyperparameters
    # ---------------------------------------------
    num_nodes = 10
    seq_length = 12
    in_dim = 5            # Number of input sectors
    out_dim = 5           # Number of output sectors
    batch_size = 4
    learning_rate = 0.001
    epochs = 10
    
    # We'll use the smaller kernel_set and fewer layers to avoid dimension issues:
    model = MTGNN(
        gcn_true=True,
        build_adj=False,    # We will provide our adjacency, do not learn a new one
        gcn_depth=2,
        num_nodes=num_nodes,
        kernel_set=[2],     # minimal kernel for demonstration
        kernel_size=2,
        dropout=0.3,
        subgraph_size=5,
        node_dim=16,
        dilation_exponential=1,
        conv_channels=16,
        residual_channels=16,
        skip_channels=32,
        end_channels=64,
        seq_length=seq_length,
        in_dim=in_dim,
        out_dim=out_dim,    # same dimension as input (5 sectors)
        layers=1,
        propalpha=0.05,
        tanhalpha=3.0,
        layer_norm_affline=True,
    )

    # ---------------------------------------------
    # 2. Define or Load Adjacency Matrix
    # ---------------------------------------------
    # Suppose you already have an N x N matrix of trade scores.
    # Here we create a dummy adjacency for illustration:
    A_tilde = torch.rand(num_nodes, num_nodes)
    A_tilde = A_tilde / A_tilde.sum(dim=1, keepdim=True).clamp(min=1e-9)

    # ---------------------------------------------
    # 3. Create Dummy Input Data
    # ---------------------------------------------
    # Shape is (batch_size, in_dim=5, num_nodes, seq_length).
    # "in_dim=5" means each country has 5 sector features per time step.
    x_in = torch.randn(batch_size, in_dim, num_nodes, seq_length)

    # ---------------------------------------------
    # 4. Forward Pass
    # ---------------------------------------------
    output = model(x_in, A_tilde=A_tilde)

    print("Input shape:", x_in.shape)
    print("Adjacency shape:", A_tilde.shape)
    print("Output shape:", output.shape)

if __name__ == "__main__":
    main()

Input shape: torch.Size([4, 5, 10, 12])
Adjacency shape: torch.Size([10, 10])
Output shape: torch.Size([4, 5, 10, 1])


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch_geometric_temporal.nn.attention.mtgnn import MTGNN

num_countries = 10
num_sectors = 5
seq_length = 12
batch_size = 4

#Adjacency matrix (N x N) for trade relationships between countries
A = torch.rand(num_countries, num_countries)
A = A / A.sum(dim=1, keepdim=True).clamp(min=1e-9)

# Build the model to forecast 'num_sectors' (out_dim=5)
model = MTGNN(
    gcn_true=True,
    build_adj=False,          # We'll supply adjacency A
    gcn_depth=2,
    num_nodes=num_countries,
    kernel_set=[2],
    kernel_size=2,
    dropout=0.3,
    subgraph_size=5,
    node_dim=16,
    dilation_exponential=1,
    conv_channels=16,
    residual_channels=16,
    skip_channels=32,
    end_channels=64,
    seq_length=seq_length,
    in_dim=num_sectors,       # input dimension (5 sectors)
    out_dim=num_sectors,      # want 5 predicted sector volumes
    layers=1,
    propalpha=0.05,
    tanhalpha=3.0,
    layer_norm_affline=True,
)

# Example input: (batch_size, in_dim=5, num_countries=10, seq_length=12)
X_in = torch.randn(batch_size, num_sectors, num_countries, seq_length)

# Forward pass through the model
output = model(X_in, A_tilde=A)  # shape: (batch_size, 5, 10, 1)

# Get predictions only for the target country, e.g. index 0
predictions_for_target = output[..., 0, :]  # shape: (batch_size, 5, 1)

print(predictions_for_target)

tensor([[[ 1.6872e+00],
         [ 2.8977e+00],
         [ 1.5462e-03],
         [ 9.6436e-01],
         [-1.5651e+00]],

        [[ 1.4357e+00],
         [ 3.8232e+00],
         [ 1.1662e-01],
         [ 2.6538e-01],
         [-2.5016e+00]],

        [[ 7.2784e-01],
         [ 2.9344e+00],
         [ 1.4065e-01],
         [ 4.6304e-01],
         [-1.0732e+00]],

        [[ 9.1411e-01],
         [ 2.6709e+00],
         [-1.9015e-01],
         [ 5.8581e-01],
         [-1.5652e+00]]], grad_fn=<SliceBackward0>)


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim

def train_model():
    # ------------------------------------------------
    # 1. Define Hyperparameters
    # ------------------------------------------------
    num_nodes = 10
    seq_length = 12
    in_dim = 5            # Number of input sectors
    out_dim = 5           # Number of output sectors
    batch_size = 4
    learning_rate = 0.001
    epochs = 10

    # ------------------------------------------------
    # 2. Initialize the MTGNN Model
    # ------------------------------------------------
    model = MTGNN(
        gcn_true=True,
        build_adj=False,        # We'll provide the adjacency ourselves
        gcn_depth=2,
        num_nodes=num_nodes,
        kernel_set=[2],         # Minimal kernel set for demonstration
        kernel_size=2,
        dropout=0.3,
        subgraph_size=5,
        node_dim=16,
        dilation_exponential=1,
        conv_channels=16,
        residual_channels=16,
        skip_channels=32,
        end_channels=64,
        seq_length=seq_length,
        in_dim=in_dim,          # 5 input sectors
        out_dim=out_dim,        # 5 output sectors
        layers=1,
        propalpha=0.05,
        tanhalpha=3.0,
        layer_norm_affline=True,
    )

    # ------------------------------------------------
    # 3. Define or Load the Adjacency Matrix
    # ------------------------------------------------
    # Suppose you have a trade-based adjacency of shape (num_nodes, num_nodes).
    # For demonstration, we create a random adjacency and row-normalize it:
    A_tilde = torch.rand(num_nodes, num_nodes)
    A_tilde = A_tilde / A_tilde.sum(dim=1, keepdim=True).clamp(min=1e-9)

    # ------------------------------------------------
    # 4. Create a Synthetic Dataset (Replace with Real Data)
    # ------------------------------------------------
    # We'll assume we have 100 samples. Each sample:
    #   - X: shape (in_dim=5, num_nodes=10, seq_length=12)
    #   - y: shape (out_dim=5, num_nodes=10, 1 time step)
    X_data = torch.randn(100, in_dim, num_nodes, seq_length)
    y_data = torch.randn(100, out_dim, num_nodes, 1)

    # Wrap them in a TensorDataset and DataLoader for easy batching
    dataset = torch.utils.data.TensorDataset(X_data, y_data)
    data_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True)

    # ------------------------------------------------
    # 5. Set Up Loss and Optimizer
    # ------------------------------------------------
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    # ------------------------------------------------
    # 6. Training Loop
    # ------------------------------------------------
    for epoch in range(epochs):
        model.train()
        total_loss = 0.0

        for i, (x_batch, y_batch) in enumerate(data_loader):
            optimizer.zero_grad()

            # Forward pass through MTGNN
            y_pred = model(x_batch, A_tilde=A_tilde)

            # Compute loss
            loss = criterion(y_pred, y_batch)

            # Backprop and optimize
            loss.backward()
            optimizer.step()

            total_loss += loss.item()

        avg_loss = total_loss / (i + 1)
        print(f"Epoch [{epoch+1}/{epochs}] - Loss: {avg_loss:.4f}")

    print("Training complete.")

if __name__ == "__main__":
    train_model()

Epoch [1/10] - Loss: 2.3408
Epoch [2/10] - Loss: 1.1429
Epoch [3/10] - Loss: 1.0414
Epoch [4/10] - Loss: 1.0214
Epoch [5/10] - Loss: 1.0069
Epoch [6/10] - Loss: 0.9836
Epoch [7/10] - Loss: 0.9694
Epoch [8/10] - Loss: 0.9592
Epoch [9/10] - Loss: 0.9438
Epoch [10/10] - Loss: 0.9317
Training complete.


### UPDATE (START HERE)
as per discussed, sentiment score becomes one of the features to put in, will put in 1 other for now, lets call it bilateral trade index

### Import Statements

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch_geometric_temporal.nn.attention.mtgnn import MTGNN

### Data Preparation
Assume the data is already cleaned

In [None]:
#try batch size=4/7 for 25 years of historical data; for a seq length of 5(5/3 batches per epoch)
#input features will be input sectors (10) + 3 additional features for now
#seq_length will be every 5 years
#assume data will be read from csv 
import pandas as pd

class TradeDataset:
    def __init__ (self, csv_file, seq_length=5, transform=None):
        df=pd.read_csv(csv_file)

        # Suppose you have 13 input features (10 "input sectors" + 3 additional features)
        # and 1 target column. Adjust indexing to match your real CSV.
        data = df.values  # shape = (num_rows, total_columns)

        self.seq_length = seq_length
        self.X = []
        self.y = []
        
        # Build sequences of length 'seq_length'
        # For each index i, we take rows[i : i+seq_length] as inputs 
        # and row[i+seq_length] (or some slice) as the target.
        for i in range(len(data) - seq_length):
            # For example, let's say columns [0:13] are features, column 13 is target
            x_seq = data[i : i + seq_length, :13] 
            y_val = data[i + seq_length, 13]

            self.X.append(x_seq)
            self.y.append(y_val)

        # Convert to tensors
        self.X = torch.tensor(self.X, dtype=torch.float32)
        self.y = torch.tensor(self.y, dtype=torch.float32)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

## Country Dictionary

0. Singapore
1. China
2. Malaysia
3. United States
4. Hong Kong, China
5. Indonesia
6. Korea, Rep.
7. Japan
8. Thailand
9. Australia
10. Vietnam
11. India
12. United Arab Emirates
13. Philippines
14. Germany
15. France
16. Switzerland
17. Netherlands

In [None]:
num_countries=18            #based on the excel 
num_country_pair=num_countries*(num_countries-1)
num_sectors=10              # for now; wait for ee zhen then index and split it
seq_length = 5              # Each sample is a 5-year window
batch_size = 4             # can be 7 as well
csv_path=''

In [None]:
# Create the dataset and data loader
dataset = TradeDataset(csv_path, seq_length=seq_length)
data_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True)

### Model Code

In [None]:
# define model for training
model = MTGNN(
    gcn_true=True,
    build_adj=True,        # auto adjacency for training; for model to learn based on node embedding features
    gcn_depth=2,
    num_nodes=num_country_pair,
    kernel_set=[2],         # Minimal kernel set for demonstration
    kernel_size=2,
    dropout=0.3,
    subgraph_size=5,
    node_dim=16,
    dilation_exponential=1,
    conv_channels=16,
    residual_channels=16,
    skip_channels=32,
    end_channels=64,
    seq_length=seq_length,
    in_dim=num_sectors+2,          
    out_dim=num_sectors,         
    layers=1,
    propalpha=0.05,
    tanhalpha=3.0,
    layer_norm_affline=True,
)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# loss function and optimiser
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3, weight_decay=1e-5)

epochs = 10 #set to 10 first

# training loop
for epoch in range(epochs):
    model.train()
    total_loss = 0.0

    for i, (x_batch, y_batch) in enumerate(data_loader):
        optimizer.zero_grad()

        # Forward pass through MTGNN (learn through data)
        y_pred = model(x_batch, A_tilde=None, idx=None, FE=None)
        
        # Compute loss
        loss = criterion(y_pred, y_batch)

        # Backprop and optimize
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    avg_loss = total_loss / (i + 1)
    print(f"Epoch [{epoch+1}/{epochs}] - Loss: {avg_loss:.4f}")

print("Training complete.")


Epoch [1/10] - Loss: 1.4750
Epoch [2/10] - Loss: 1.1267
Epoch [3/10] - Loss: 1.0524
Epoch [4/10] - Loss: 1.0134
Epoch [5/10] - Loss: 0.9940
Epoch [6/10] - Loss: 0.9701
Epoch [7/10] - Loss: 0.9491
Epoch [8/10] - Loss: 0.9402
Epoch [9/10] - Loss: 0.9223
Epoch [10/10] - Loss: 0.9035
Training complete.


In [None]:
# Save the model
torch.save(model.state_dict(), "mtgnn_sector_forecast.pt")
print("Model saved to mtgnn_sector_forecast.pt")

## Load the Model

In [None]:
model_loaded=MTGNN(
    gcn_true=True,
    build_adj=True,        # auto adjacency for training; for model to learn based on node embedding features
    gcn_depth=2,
    num_nodes=num_country_pair,
    kernel_set=[2],         # Minimal kernel set for demonstration
    kernel_size=2,
    dropout=0.3,
    subgraph_size=5,
    node_dim=16,
    dilation_exponential=1,
    conv_channels=16,
    residual_channels=16,
    skip_channels=32,
    end_channels=64,
    seq_length=seq_length,
    in_dim=num_sectors+2,          # 5 input sectors + 2 additional features
    out_dim=num_sectors,        # 5 output sectors
    layers=1,
    propalpha=0.05,
    tanhalpha=3.0,
    layer_norm_affline=True,
)

model_loaded.load_state_dict(torch.load("mtgnn_sector_forecast.pt", map_location=torch.device('cpu')))
model_loaded.eval()

#test data for prediction
with torch.no_grad():
    
    # shape: [batch_size=2, in_dim=7, num_nodes=6, seq_len=10] 
    X_test = torch.randn(2, num_sectors + 2, num_country_pair, seq_length)
    
    y_pred = model_loaded(X_test, A_tilde=None, idx=None, FE=None)
    
    print("Prediction shape:", y_pred.shape)
    
    # For out_dim=5: y_pred shape is [2, 5, 6, 1] (for each country pair, 5 sectors)
    print("Forecasted sectoral trade volumes (sample):")
    print(y_pred)

Prediction shape: torch.Size([2, 5, 6, 1])
Forecasted sectoral trade volumes (sample):
tensor([[[[ 0.2133],
          [-0.0894],
          [-0.5410],
          [-0.0796],
          [ 0.0178],
          [-0.1630]],

         [[ 0.1771],
          [ 0.0513],
          [-0.3636],
          [ 0.4902],
          [-0.2471],
          [-0.2570]],

         [[ 0.2128],
          [ 0.0664],
          [-0.2309],
          [ 0.7080],
          [ 0.0675],
          [ 0.4353]],

         [[-0.4976],
          [-0.4523],
          [-0.2808],
          [-0.4529],
          [-0.3936],
          [-0.7220]],

         [[ 0.5685],
          [ 0.0531],
          [-0.8483],
          [ 0.4036],
          [-0.0140],
          [-0.5388]]],


        [[[-0.4324],
          [-0.1338],
          [ 0.2895],
          [ 0.1366],
          [ 0.0425],
          [-0.9602]],

         [[-0.8531],
          [ 0.3688],
          [ 0.4448],
          [-0.0305],
          [ 0.3498],
          [-0.3390]],

         [[-0.0