## trying out a pytorch geometric temporal implementation

In [1]:
import torch
from IPython.display import clear_output
pt_version = torch.__version__
print(pt_version)

2.6.0+cpu


In [None]:
%pip install torch-geometric-temporal




[notice] A new release of pip is available: 23.2.1 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip





In [None]:
import torch
import numpy as np
import pandas as pd
from torch.utils.data import Dataset, DataLoader
import torch.optim as optim
from torch.nn import MSELoss
import matplotlib.pyplot as plt
import random

# Set seeds for reproducibility
torch.manual_seed(42)
np.random.seed(42)
random.seed(42)

# Create synthetic export data
def generate_synthetic_export_data(num_countries=3, num_timestamps=16, sparsity=0):
    """
    Generate synthetic export data between countries
    
    Args:
        num_countries: Number of countries in the dataset
        num_timestamps: Number of time points
        sparsity: Fraction of country pairs that have trade relationships
    
    Returns:
        DataFrame with columns [timestamp, source_country, target_country, export_volume]
    """
    countries = [f"Country_{i}" for i in range(num_countries)]
    timestamps = pd.date_range(start='2020-01-01', periods=num_timestamps, freq='M')
    
    # Create all possible country pairs
    country_pairs = []
    for src in countries:
        for tgt in countries:
            if src != tgt and random.random() > sparsity:  # Add sparsity
                country_pairs.append((src, tgt))
    
    data = []
    for ts in timestamps:
        for src, tgt in country_pairs:
            # Create some temporal patterns and country-specific effects
            src_idx = int(src.split('_')[1])
            tgt_idx = int(tgt.split('_')[1])
            
            # Base volume depends on source and target country sizes
            base_volume = (src_idx + 1) * 10 + (tgt_idx + 1) * 5
            
            # Add seasonal pattern
            month = ts.month
            seasonal_factor = 1.0 + 0.2 * np.sin(2 * np.pi * month / 12)
            
            # Add trend
            trend_factor = 1.0 + 0.01 * (ts.year - 2020) * 12 + 0.01 * ts.month
            
            # Add some randomness
            noise = np.random.normal(1, 0.1)
            
            export_volume = base_volume * seasonal_factor * trend_factor * noise
            
            data.append({
                'timestamp': ts,
                'source_country': src,
                'target_country': tgt,
                'export_volume': export_volume
            })
    
    return pd.DataFrame(data)

# Generate example data
data = generate_synthetic_export_data(num_countries=3, num_timestamps=16)
print(f"Generated synthetic export data with {len(data)} records")
print(data.head())

# Create a dataset class for MTGNN
class ExportDataset(Dataset):
    def __init__(self, data, num_countries, seq_length, predict_length):
        """
        data: DataFrame with columns [timestamp, source_country, target_country, export_volume]
        num_countries: Total number of countries
        seq_length: Historical sequence length to use for prediction
        predict_length: Number of future time steps to predict
        """
        self.data = data
        self.num_countries = num_countries
        self.seq_length = seq_length
        self.predict_length = predict_length
        
        # Get unique timestamps
        self.timestamps = sorted(data['timestamp'].unique())
        
        # Create a mapping of country names to indices
        countries = sorted(set(data['source_country'].unique()) | set(data['target_country'].unique()))
        self.country_to_idx = {country: idx for idx, country in enumerate(countries)}
        self.idx_to_country = {idx: country for country, idx in self.country_to_idx.items()}
        
        # Create adjacency matrices for each timestamp
        self.adj_matrices = {}
        self.node_features = {}  # Total exports per country at each timestamp
        for ts in self.timestamps:
            adj = np.zeros((num_countries, num_countries))
            ts_data = data[data['timestamp'] == ts]
            
            for _, row in ts_data.iterrows():
                src_idx = self.country_to_idx[row['source_country']]
                tgt_idx = self.country_to_idx[row['target_country']]
                adj[src_idx, tgt_idx] = row['export_volume']
                
            self.adj_matrices[ts] = adj
            
            # Calculate total exports per country
            self.node_features[ts] = np.sum(adj, axis=1)
            
    def __len__(self):
        return len(self.timestamps) - self.seq_length - self.predict_length + 1
    
    def __getitem__(self, idx):
        # Get sequence of timestamps
        input_timestamps = self.timestamps[idx:idx+self.seq_length]
        target_timestamps = self.timestamps[idx+self.seq_length:idx+self.seq_length+self.predict_length]
        
        # Create input tensor of shape (in_channels, num_nodes, seq_length)
        X = np.zeros((1, self.num_countries, self.seq_length))
        for i, ts in enumerate(input_timestamps):
            X[0, :, i] = self.node_features[ts]  # Total exports from each country
        
        # Create target tensor
        y = np.zeros((1, self.num_countries, self.predict_length))
        for i, ts in enumerate(target_timestamps):
            y[0, :, i] = self.node_features[ts]
        
        # Create adjacency matrix - average over the input sequence
        A = np.mean([self.adj_matrices[ts] for ts in input_timestamps], axis=0)
        
        # Row-normalize the adjacency matrix
        row_sums = A.sum(axis=1, keepdims=True)
        # Avoid division by zero
        row_sums[row_sums == 0] = 1
        A_normalized = A / row_sums
        
        # Convert to PyTorch tensors
        return torch.FloatTensor(X), torch.FloatTensor(y), torch.FloatTensor(A_normalized)

# Set up parameters
num_countries = len(set(data['source_country'].unique()) | set(data['target_country'].unique()))
seq_length = 12  # Use 12 months of history
predict_length = 1  # Predict next 3 months

# Create dataset and dataloader
dataset = ExportDataset(data, num_countries, seq_length, predict_length)
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])

train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=4, shuffle=False)



Generated synthetic export data with 96 records
   timestamp source_country target_country  export_volume
0 2020-01-31      Country_0      Country_1      23.323699
1 2020-01-31      Country_0      Country_2      27.390971
2 2020-01-31      Country_1      Country_0      29.573955
3 2020-01-31      Country_1      Country_2      44.807302
4 2020-01-31      Country_2      Country_0      37.974495


In [45]:
dataset.adj_matrices

{numpy.datetime64('2020-01-31T00:00:00.000000000'): array([[ 0.        , 23.32369885, 27.3909709 ],
        [29.57395491,  0.        , 44.8073016 ],
        [37.9744946 , 43.39949536,  0.        ]]),
 numpy.datetime64('2020-02-29T00:00:00.000000000'): array([[ 0.        , 27.71297426, 32.21264328],
        [28.51221574,  0.        , 44.15584848],
        [39.94246953, 45.63746952,  0.        ]]),
 numpy.datetime64('2020-03-31T00:00:00.000000000'): array([[ 0.        , 25.31813074, 24.98796404],
        [25.5700039 ,  0.        , 40.82754415],
        [38.87849257, 50.99363881,  0.        ]]),
 numpy.datetime64('2020-04-30T00:00:00.000000000'): array([[ 0.        , 22.18684489, 26.19533522],
        [34.97404921,  0.        , 41.74049481],
        [42.99304188, 41.85180063,  0.        ]]),
 numpy.datetime64('2020-05-31T00:00:00.000000000'): array([[ 0.        , 21.84247591, 29.19528898],
        [25.55150605,  0.        , 41.94375924],
        [37.9969181 , 44.85237488,  0.        ]]),


In [43]:
device='cpu'
for X, y, A in train_loader:
    X, y, A = X.to(device), y.to(device), A.to(device)
    print(X.shape, y.shape, A[0].shape)

torch.Size([3, 1, 3, 12]) torch.Size([3, 1, 3, 1]) torch.Size([3, 3])


In [None]:
# Initialize the MTGNN model
from torch_geometric_temporal.nn.attention.mtgnn import MTGNN  

model = MTGNN(
    gcn_true=True,             # Use graph convolution
    build_adj=False,           # We're providing our own adjacency matrix (shape [batch, num_nodes, num_nodes])
    gcn_depth=2,               # Depth of graph convolution
    num_nodes=3,               # 3 nodes (countries)
    kernel_set=[2],         # Kernel set remains as is
    kernel_size=1,             # Base kernel size
    dropout=0.3,               # Dropout rate
    subgraph_size=3,           # Full graph (3 nodes)
    node_dim=40,               # Node embedding dimension
    dilation_exponential=2,    # Dilation factor
    conv_channels=2,          # Convolution channels
    residual_channels=2,      # Residual channels
    skip_channels=4,          # Skip channels
    end_channels=8,          # End convolution channels
    seq_length=12,             # Input sequence length (from X shape)
    in_dim=1,                  # Input dimension (export volume channel) ()
    out_dim=num_countries,     # Output dimension (predicted export volume) (should be for each sector and country, so is sector*country)
    layers=1,                  # Number of MTGNN layers
    propalpha=0.05,            # Propagation weight
    tanhalpha=3,               # Tanh alpha
    layer_norm_affline=True    # Use affine transformation in layer norm
)

# Set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)

# Initialize optimizer and loss function
optimizer = optim.Adam(model.parameters(), lr=0.001)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=5, factor=0.5, verbose=True)
criterion = MSELoss()

# Training function
def train(model, train_loader, optimizer, criterion, device):
    model.train()
    total_loss = 0
    for X, y, A in train_loader:
        X, y, A = X.to(device), y.to(device), A.to(device)
        
        optimizer.zero_grad()
        print('X:', X.shape, 'y:', y.shape, 'A:', A.shape)
        output = model(X, A_tilde=A)
        
        loss = criterion(output, y)
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item() * X.size(0)
    
    return total_loss / len(train_loader.dataset)

# Evaluation function
def evaluate(model, test_loader, criterion, device):
    model.eval()
    total_loss = 0
    with torch.no_grad():
        for X, y, A in test_loader:
            X, y, A = X.to(device), y.to(device), A.to(device)
            
            output = model(X, A_tilde=A)
            loss = criterion(output, y)
            
            total_loss += loss.item() * X.size(0)
    
    return total_loss / len(test_loader.dataset)

# Training loop
num_epochs = 30
train_losses = []
val_losses = []

for epoch in range(num_epochs):
    train_loss = train(model, train_loader, optimizer, criterion, device)
    val_loss = evaluate(model, test_loader, criterion, device)
    
    train_losses.append(train_loss)
    val_losses.append(val_loss)
    
    scheduler.step(val_loss)
    
    print(f"Epoch {epoch+1}/{num_epochs}, Train Loss: {train_loss:.6f}, Val Loss: {val_loss:.6f}")

# Plot training and validation loss
plt.figure(figsize=(10, 6))
plt.plot(train_losses, label='Training Loss')
plt.plot(val_losses, label='Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Training and Validation Loss')
plt.legend()
plt.grid(True)
plt.show()

# Function to make predictions
def predict_future_exports(model, dataset, idx, device):
    """
    Make predictions for a specific sample in the dataset
    
    Args:
        model: Trained MTGNN model
        dataset: ExportDataset instance
        idx: Index of the sample to predict
        device: Device to run inference on
    
    Returns:
        actual: Ground truth values
        predicted: Predicted values
    """
    model.eval()
    X, y, A = dataset[idx]
    X, y, A = X.unsqueeze(0).to(device), y.to(device), A.to(device)
    
    with torch.no_grad():
        output = model(X, A_tilde=A)
        
    # Convert tensors to numpy arrays
    predicted = output.cpu().squeeze().numpy()
    actual = y.cpu().squeeze().numpy()
    
    return actual, predicted

# Make a prediction for a sample in the test dataset
sample_idx = 0  # First sample in the test dataset
test_sample = test_dataset[sample_idx]

# Get actual data sample
actual, predicted = predict_future_exports(model, test_dataset, sample_idx, device)

X: torch.Size([2, 1, 3, 13]) y: torch.Size([2, 1, 3, 1]) A: torch.Size([2, 3, 3])


RuntimeError: The size of tensor a (3) must match the size of tensor b (2) at non-singleton dimension 2

## Simple implementation of MTGNN

3 sectors, 3 countries, seq_length of 3

In [60]:
import torch
import numpy as np
import pandas as pd
from torch.utils.data import Dataset, DataLoader
import torch.optim as optim
from torch.nn import MSELoss
import matplotlib.pyplot as plt
import random

# Set seeds for reproducibility
torch.manual_seed(42)
np.random.seed(42)
random.seed(42)

# define static variables
num_countries=3
num_sectors=3
seq_length=12

#form pre-defined adjacency matrix
matrix=[[np.random.rand(0,1) for _ in range(3)] for _ in range(3)]
adj_matrix=torch.tensor(matrix,dtype=torch.float)

export_volumes = np.random.rand(num_countries, num_sectors, seq_length)

# Convert to a DataFrame for better visualization
countries = [f"Country_{i+1}" for i in range(num_countries)]
sectors = [f"Sector_{i+1}" for i in range(num_sectors)]
timestamps = [f"Timestamp_{i+1}" for i in range(seq_length)]

# Create a multi-index DataFrame
index = pd.MultiIndex.from_product([countries, sectors], names=["Country", "Sector"])
export_data = pd.DataFrame(export_volumes.reshape(-1, seq_length), index=index, columns=timestamps)


In [61]:
print(export_data.head())

                    Timestamp_1  Timestamp_2  Timestamp_3  Timestamp_4  \
Country   Sector                                                         
Country_1 Sector_1     0.374540     0.950714     0.731994     0.598658   
          Sector_2     0.832443     0.212339     0.181825     0.183405   
          Sector_3     0.456070     0.785176     0.199674     0.514234   
Country_2 Sector_1     0.304614     0.097672     0.684233     0.440152   
          Sector_2     0.546710     0.184854     0.969585     0.775133   

                    Timestamp_5  Timestamp_6  Timestamp_7  Timestamp_8  \
Country   Sector                                                         
Country_1 Sector_1     0.156019     0.155995     0.058084     0.866176   
          Sector_2     0.304242     0.524756     0.431945     0.291229   
          Sector_3     0.592415     0.046450     0.607545     0.170524   
Country_2 Sector_1     0.122038     0.495177     0.034389     0.909320   
          Sector_2     0.939499     0

In [62]:
import torch

# Select one country (e.g., Country_1) for prediction
selected_country = "Country_1"
input_data = export_data.loc[selected_country].values  # Shape: (num_sectors, seq_length)

# Convert to PyTorch tensor and reshape to match MTGNN input requirements
# Shape required: (batch_size, in_dim, num_nodes, seq_length)
# Since we are predicting for one country, num_nodes = 1 (as we are not considering node interactions between countries)
# Correct adjustment
# Adjust input dimensions to include all countries but focus on the selected one
# Since you are predicting for one country, you might need to adjust how you handle num_nodes
input_data_all_countries = export_volumes[:, :, :]  # Shape: (num_countries, num_sectors, seq_length)
input_data_all_countries = input_data_all_countries.transpose(1, 0, 2)  # Shape: (num_sectors, num_countries, seq_length)
input_tensor = torch.tensor(input_data_all_countries, dtype=torch.float32).unsqueeze(0)  # Shape: (batch_size, in_dim, num_nodes, seq_length)

print(input_tensor.shape)

torch.Size([1, 3, 3, 12])


In [None]:
from torch.utils.data import Dataset, DataLoader

class ExportDataset(Dataset):
    def __init__(self, export_volumes):
        self.export_volumes = export_volumes

    def __len__(self):
        return self.export_volumes.shape[2] - seq_length + 1  # Number of possible time windows

    def __getitem__(self, idx):
        start = idx
        end = start + seq_length
        input_data = self.export_volumes[:, :, start:end].transpose(1, 0, 2)
        return torch.tensor(input_data, dtype=torch.float32)

dataset = ExportDataset(export_volumes)
dataloader = DataLoader(dataset, batch_size=4, shuffle=True)

batch_size = 4  # Desired batch size
input_tensor_batch = torch.tensor(input_data_all_countries, dtype=torch.float32).unsqueeze(0).repeat(batch_size, 1, 1, 1)  # Shape: (batch_size, in_dim, num_nodes, seq_length)

In [68]:
print(input_tensor.shape)

torch.Size([1, 3, 3, 12])


In [74]:
# Initialize the MTGNN model
from torch_geometric_temporal.nn.attention.mtgnn import MTGNN

model = MTGNN(
    gcn_true=True,             # Use graph convolution
    build_adj=True,           # We're providing our own adjacency matrix (shape [batch, num_nodes, num_nodes])
    gcn_depth=1,               # Depth of graph convolution
    num_nodes=3,               # 3 nodes (countries)
    kernel_set=[2],         # Kernel set remains as is
    kernel_size=1,             # Base kernel size
    dropout=0.3,               # Dropout rate
    subgraph_size=3,           # Full graph (3 nodes)
    node_dim=4,               # Node embedding dimension
    dilation_exponential=1,    # Dilation factor
    conv_channels=16,         # Convolution channels (adjusted to match the paper)
    residual_channels=16,     # Residual channels (adjusted to match the paper)
    skip_channels=32,         # Skip channels (adjusted to match the paper)
    end_channels=64,          # End convolution channels (adjusted to match the first output layer in the paper)
    seq_length=12,             # Input sequence length (from X shape)
    in_dim=num_sectors,       # Input dimension (number of sectors)
    out_dim=num_sectors,               # Output dimension (adjusted to match the final output layer in the paper)
    layers=1,                  # Number of MTGNN layers (adjusted to match the number of convolution modules)
    propalpha=0.05,            # Propagation weight
    tanhalpha=3,               # Tanh alpha
    layer_norm_affline=True    # Use affine transformation in layer norm
)
# Set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)

# Initialize optimizer and loss function
optimizer = optim.Adam(model.parameters(), lr=0.001)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=5, factor=0.5, verbose=True)
criterion = MSELoss()



In [75]:
output=model(input_tensor,adj_matrix)

RuntimeError: Calculated padded input size per channel: (3 x 11). Kernel size: (1 x 12). Kernel size can't be greater than actual input size