## Actual Implementation

In [1]:
#import statements
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, TensorDataset
import pandas as pd
import numpy as np
# from torch_geometric_temporal.nn.recurrent import AGCRN

## initalisation

In [2]:
# #try batch size=4/7 for 25 years of historical data; for a seq length of 5(5/3 batches per epoch)
# #input features will be input sectors (8) + 3 additional features for now
# #T will be every 5 years (tbd)
# #assume data will be read from csv 


# class ModelDataset(Dataset):
#     def __init__(self, csv_file, T=5):
#         """
#         X shape: [num_samples, T, num_nodes, in_channels]
#         Y shape: [num_samples, num_nodes, num_sectors]
#         """
#         df=pd.read_csv(csv_file)
#         data = df.values 
#         self.T = T
#         self.X = []
#         self.y = []
        
#         # Build sequences of length 'T'
#         # For each index i, we take rows[i : i+T] as inputs 
#         # and row[i+T] (or some slice) as the target.
#         for i in range(len(data) - T):
#             # For example, let's say columns [0:13] are features, column 13 is target
#             x_seq = data[i : i + T, :13] 
#             y_val = data[i + T, 13]

#             self.X.append(x_seq)
#             self.y.append(y_val)

#         # Convert to tensors
#         self.X = torch.tensor(self.X, dtype=torch.float32)
#         self.y = torch.tensor(self.y, dtype=torch.float32)

#     def __len__(self):
#         return self.X.shape[0]

#     def __getitem__(self, idx):
#         return self.X[idx], self.Y[idx]

## Sector Dictionary
0. Category 1 (Agri)
1. Category 2 (Mining)
2. Category 3 (Construction)
3. Category 4 (Textile)
4. Category 5 (Transport Svcs)
5. Category 6 (ICT)
6. Category 7 (Health, pharm, sports etc)
7. Category 8 (Govt, Millitary, Misc)

In [None]:
num_countries=18
num_country_pairs=18*(18-1) 
num_sectors=8 # 8 sectors

class Args:
    def __init__(self):
        # Model structure
        self.num_nodes = num_country_pairs  
        self.input_dim = num_sectors+2    # e.g. sectorial export volume + sentiment score + 2 indexes
        self.rnn_units = 64
        self.output_dim = num_sectors   # e.g., predict only the sectorial export volume
        self.horizon = 3      # forecast 3 steps ahead
        self.num_layers = 2
        self.cheb_k = 2
        self.embed_dim = 20
        self.default_graph = True  
        self.log_dir = './logs/'
        self.debug = False
        self.model='AGCRN'
        self.normaliser = 'std'
        self.device='cpu'
        self.batch_size=7 # 4/7 depending on results
        self.mode='train'
        # Training
        self.seed=10
        self.loss_func= 'mse'
        self.epochs = 50
        self.lr_init = 0.009
        self.lr_decay = False
        self.lr_decay_steps = 5,20,40,70
        self.lr_decay_rate = 0.3
        self.early_stop = True
        self.early_stop_patience = 15
        self.teacher_forcing = True
        self.tf_decay_steps = 20
        self.real_value = False
        self.grad_norm = True
        self.max_grad_norm = 5

        # Testing
        self.mae_thresh=None
        self.mape_thresh=0.

        #Logging
        self.log_step = 20
        self.plot=True



args = Args()

## Data handling

In [4]:
# fbic_data=pd.read_csv('../data/cleaned/FBIC_cleaned.csv',header=0)


In [5]:
# fbic_data=fbic_data.rename(columns={'iso3a':'country_a','iso3b':'country_b'})

## Import data

In [6]:
# training_data=pd.read_csv('../data/final/training_model_data.csv',header=0)

In [7]:
# training_data=training_data[['country_a','country_b','bec_1','bec_2','bec_3','bec_4','bec_5','bec_6','bec_7','bec_8','sentiment_index','tradeagreementindex','year']]

In [8]:
# training_data.to_csv('../data/final/training_model_data.csv',index=False)

In [None]:
def csv_to_tensor(csv_file):
    """
    Reads a CSV file with columns:
      country1, country2, sector1, sector2, ..., sector8, sentiment, year
    and returns a tensor of shape (T, N, D), where:
      T = number of years,
      N = number of unique country pairs,
      D = num of sectors + features.
    Also returns the sorted list of years and country pair nodes.
    """
    # Read the CSV into a DataFrame
    df = pd.read_csv(csv_file)
    
    # Ensure the 'year' column is integer (if needed)
    df['year'] = df['year'].astype(int)
    
    # Get a sorted list of unique years
    years = sorted(df['year'].unique())
    T = len(years)
    
    # Get all unique country pairs
    pairs_df = df[['country_a', 'country_b']].drop_duplicates()
    # Create a sorted list of tuples (country1, country2) for consistent node ordering
    country_pairs = sorted([tuple(x) for x in pairs_df.values])
    N = len(country_pairs)
    
    # Number of features (8 sectors + 1 sentiment)
    D = 10

    # Initialize an empty numpy array for the tensor data
    tensor_data = np.empty((T, N, D), dtype=float)
    
    # Loop over each year and each country pair to fill in the tensor
    for t, year in enumerate(years):
        # Get data for the current year
        df_year = df[df['year'] == year]
        for n, (c1, c2) in enumerate(country_pairs):
            # Filter rows for the current country pair
            row = df_year[(df_year['country_a'] == c1) & (df_year['country_b'] == c2)]
            if not row.empty:
                # Extract the 8 sector columns and the sentiment column.
                # Assumes these columns are named exactly as shown.
                features = row.iloc[0][['bec_1', 'bec_2', 'bec_3', 'bec_4', 
                                         'bec_5', 'bec_6', 'bec_7', 'bec_8', 'sentiment_index','tradeagreementindex']].values
                tensor_data[t, n, :] = features.astype(float)
            else:
                # If a record is missing for a given year/country pair, fill with zeros (or choose another strategy)
                tensor_data[t, n, :] = np.zeros(D)
                
    return tensor_data, years, country_pairs

def group_into_windows(tensor_data, window_size=7):
    """
    Given a tensor of shape (T, N, D), group the data into overlapping windows.
    Each window is of length window_size (here, 7 time periods).
    Returns a numpy array of shape (num_samples, window_size, N, D).
    """
    T, N, D = tensor_data.shape
    num_samples = T - window_size + 1  # sliding window with stride 1
    windows = []
    for i in range(num_samples):
        window = tensor_data[i: i + window_size]  # shape: (window_size, N, D)
        windows.append(window)
    windows = np.stack(windows)  # shape: (num_samples, window_size, N, D)
    return windows

def split_input_target_direct(windows, input_len=4, horizon=3):
    """
    Splits each window into input and a single target that is horizon steps forward.
    
    windows: numpy array of shape (num_samples, window_size, N, D)
              where window_size = input_len + horizon.
    input_len: number of time steps used as input.
    horizon: steps forward to pick the target (here, horizon=3).
    
    Returns:
      x: inputs of shape (num_samples, input_len, N, D)
      y: targets of shape (num_samples, N, 8), which are the first 8 features of the target time step.
    """
    # x: first input_len time steps (e.g., years 2006-2009 if input_len=4)
    x = windows[:, :input_len]  
    # y_full: the time step exactly horizon steps forward (i.e., index input_len + horizon - 1)
    y_full = windows[:, input_len + horizon-1]  
    # y: only the first 8 features from the predicted time step (ignoring sentiment_index and tradeagreementindex)
    y = y_full[..., :8]
    return x, y

def train_val_split(x, y, val_ratio=0.2):
    """
    Splits the data into train and validation sets by ratio.
    """
    num_samples = x.shape[0]
    split_index = int(num_samples * (1 - val_ratio))
    x_train, y_train = x[:split_index], y[:split_index]
    x_val, y_val = x[split_index:], y[split_index:]
    return x_train, y_train, x_val, y_val



In [10]:
from AGCRN.lib.dataloader import normalize_dataset

#convert csv to tensor
training_data_tensor, years, country_pairs = csv_to_tensor('../data/final/training_model_data.csv')

In [11]:
# 4. Inspect data for a specific time slice.
# For instance, data for the first year:
print("Data for year {}:".format(years[0]))
print(training_data_tensor[0])  # This prints the data for all nodes/features for the first year.

# 5. Inspect data for a specific country pair at a given year.
# For example, for the first country pair in the first year:
print("Features for {} in {}:".format(country_pairs[0], years[0]))
print(training_data_tensor[0, 0, :])

Data for year 2006:
[[0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 1.06073920e+09
  5.48504801e-01 0.00000000e+00]
 [3.79788246e+06 1.35599141e+06 6.93940766e+06 ... 9.82446937e+07
  7.03075909e-01 0.00000000e+00]
 [0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 2.95395251e+09
  6.19748649e-01 0.00000000e+00]
 ...
 [5.67775428e+07 1.05682782e+09 1.89697853e+08 ... 4.53273661e+08
  7.30780274e-01 1.40000000e+01]
 [1.19379420e+08 3.08204908e+08 2.02183745e+08 ... 9.48527231e+07
  6.67942802e-01 1.40000000e+01]
 [7.46406599e+08 1.21952570e+09 1.19232143e+09 ... 1.21779717e+09
  6.90423445e-01 0.00000000e+00]]
Features for ('ARE', 'AUS') in 2006:
[0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 1.06073920e+09
 5.48504801e-01 0.00000000e+00]


## Country Dictionary

0. Singapore
1. China
2. Malaysia
3. United States
4. Hong Kong, China
5. Indonesia
6. Korea, Rep.
7. Japan
8. Thailand
9. Australia
10. Vietnam
11. India
12. United Arab Emirates
13. Philippines
14. Germany
15. France
16. Switzerland
17. Netherlands

In [12]:
country_pairs

[('ARE', 'AUS'),
 ('ARE', 'CHE'),
 ('ARE', 'CHN'),
 ('ARE', 'DEU'),
 ('ARE', 'FRA'),
 ('ARE', 'HKG'),
 ('ARE', 'IDN'),
 ('ARE', 'IND'),
 ('ARE', 'JPN'),
 ('ARE', 'KOR'),
 ('ARE', 'MYS'),
 ('ARE', 'NLD'),
 ('ARE', 'PHL'),
 ('ARE', 'SGP'),
 ('ARE', 'THA'),
 ('ARE', 'USA'),
 ('ARE', 'VNM'),
 ('AUS', 'ARE'),
 ('AUS', 'CHE'),
 ('AUS', 'CHN'),
 ('AUS', 'DEU'),
 ('AUS', 'FRA'),
 ('AUS', 'HKG'),
 ('AUS', 'IDN'),
 ('AUS', 'IND'),
 ('AUS', 'JPN'),
 ('AUS', 'KOR'),
 ('AUS', 'MYS'),
 ('AUS', 'NLD'),
 ('AUS', 'PHL'),
 ('AUS', 'SGP'),
 ('AUS', 'THA'),
 ('AUS', 'USA'),
 ('AUS', 'VNM'),
 ('CHE', 'ARE'),
 ('CHE', 'AUS'),
 ('CHE', 'CHN'),
 ('CHE', 'DEU'),
 ('CHE', 'FRA'),
 ('CHE', 'HKG'),
 ('CHE', 'IDN'),
 ('CHE', 'IND'),
 ('CHE', 'JPN'),
 ('CHE', 'KOR'),
 ('CHE', 'MYS'),
 ('CHE', 'NLD'),
 ('CHE', 'PHL'),
 ('CHE', 'SGP'),
 ('CHE', 'THA'),
 ('CHE', 'USA'),
 ('CHE', 'VNM'),
 ('CHN', 'ARE'),
 ('CHN', 'AUS'),
 ('CHN', 'CHE'),
 ('CHN', 'DEU'),
 ('CHN', 'FRA'),
 ('CHN', 'HKG'),
 ('CHN', 'IDN'),
 ('CHN', 'IND'

In [13]:
from sklearn.preprocessing import MinMaxScaler
#do normalisation
data_to_normalize = training_data_tensor[:, :, :8]
normalized_data, scaler = normalize_dataset(data_to_normalize, normalizer='std',column_wise=True)
remaining_features = training_data_tensor[:, :, 8:]
# Get the shape dimensions
T, N, _ = remaining_features.shape

# Initialize the scaler with the desired feature range (0, 1)
scaler2 = MinMaxScaler(feature_range=(0, 1))

# Reshape the first column of remaining_features to 2D (T*N, 1)
col_data = remaining_features[:, :, 0].reshape(-1, 1)

# Fit and transform the column data using the scaler
col_scaled = scaler2.fit_transform(col_data)
# Concatenate along the last axis
normalized_training_data = np.concatenate((normalized_data, remaining_features), axis=-1)

# 2. Group data into overlapping windows of 7 time periods (3 input + 3 ahead).
windows = group_into_windows(normalized_training_data, window_size=6)
print("Windows shape (num_samples, 7, N, D):", windows.shape)

# 3. Split each window into 3 input time periods and a single target (3 steps forward).
x, y = split_input_target_direct(windows, input_len=3, horizon=3)
print("Input shape (num_samples, 4, N, D):", x.shape)
print("Target shape (num_samples, N, D):", y.shape)

# 4. Perform train/validation split.
x_train, y_train, x_val, y_val = train_val_split(x, y, val_ratio=0.2)
print("Train samples:", x_train.shape[0])
print("Validation samples:", x_val.shape[0])

x_train_tensor=torch.tensor(x_train, dtype=torch.float32)
y_train_tensor=torch.tensor(y_train, dtype=torch.float32)
train_dataset=TensorDataset(x_train_tensor, y_train_tensor)

x_val_tensor=torch.tensor(x_val, dtype=torch.float32)
y_val_tensor=torch.tensor(y_val, dtype=torch.float32)
val_dataset=TensorDataset(x_val_tensor, y_val_tensor)

# Create the dataset and data loader
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=args.batch_size, shuffle=False)
# test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=args.batch_size, shuffle=False)

Normalize the dataset by Standard Normalization
Windows shape (num_samples, 7, N, D): (9, 6, 306, 10)
Input shape (num_samples, 4, N, D): (9, 3, 306, 10)
Target shape (num_samples, N, D): (9, 306, 8)
Train samples: 7
Validation samples: 2


## training algo

In [14]:
from datetime import datetime
from AGCRN.model.BasicTrainer import Trainer
from agcrn_model import AGCRNFinal
import os

model=AGCRNFinal(args)
model=model.to(args.device)
for p in model.parameters():
    if p.dim() >= 2:
        nn.init.xavier_uniform_(p)
    else:
        # For biases or 1D parameters, just fill with zeros or some small constant
        nn.init.zeros_(p)

#load dataset here

#init loss function, optimizer
loss=torch.nn.MSELoss().to(args.device)
optimizer=optim.Adam(model.parameters(),lr=args.lr_init,eps=1.0e-8,weight_decay=0.0,amsgrad=False)

#learning rate decay
lr_scheduler=None
if args.lr_decay:
    print('Applying learning rate decay.')
    lr_decay_steps = [int(i) for i in list(args.lr_decay_step.split(','))]
    lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer=optimizer,
                                                        milestones=lr_decay_steps,
                                                        gamma=args.lr_decay_rate)

#config log path
current_time = datetime.now().strftime('%Y%m%d%H%M%S')
current_dir = os.getcwd()
log_dir = os.path.join(current_dir,'logs')
args.log_dir = log_dir

#start training
trainer = Trainer(model, loss, optimizer, train_loader, val_loader, scaler=scaler, #need to get these 
                  args=args, lr_scheduler=lr_scheduler)
if args.mode == 'train':
    trainer.train()
# elif args.mode == 'test':
#     model.load_state_dict(torch.load('./pre-trained/{}.pth'.format(args.dataset)))
#     print("Load saved model")
#     trainer.test(model, trainer.args, test_loader, scaler, trainer.logger)
# else:
#     raise ValueError


2025-04-06 14:02: Experiment log path in: c:\Users\rob-l\Documents\NUS\Y3S2\DSE3101\T5G1\model\logs


Creat Log File in:  c:\Users\rob-l\Documents\NUS\Y3S2\DSE3101\T5G1\model\logs\run.log
output shape: torch.Size([7, 3, 306, 8])
label shape: torch.Size([7, 306, 8])
last_output shape: torch.Size([7, 306, 8])


2025-04-06 14:02: Train Epoch 1: 0/1 Loss: 0.772359
2025-04-06 14:02: **********Train Epoch 1: averaged Loss: 0.772359, tf_ratio: 0.952381
2025-04-06 14:02: **********Val Epoch 1: average Loss: 1.235979
2025-04-06 14:02: *********************************Current best model saved!
2025-04-06 14:02: Train Epoch 2: 0/1 Loss: 0.770021
2025-04-06 14:02: **********Train Epoch 2: averaged Loss: 0.770021, tf_ratio: 0.950061
2025-04-06 14:02: **********Val Epoch 2: average Loss: 1.231963


output shape: torch.Size([2, 3, 306, 8])
label shape: torch.Size([2, 306, 8])
last_output shape: torch.Size([2, 306, 8])
output shape: torch.Size([7, 3, 306, 8])
label shape: torch.Size([7, 306, 8])
last_output shape: torch.Size([7, 306, 8])
output shape: torch.Size([2, 3, 306, 8])
label shape: torch.Size([2, 306, 8])
last_output shape: torch.Size([2, 306, 8])


2025-04-06 14:02: *********************************Current best model saved!
2025-04-06 14:02: Train Epoch 3: 0/1 Loss: 0.767568
2025-04-06 14:02: **********Train Epoch 3: averaged Loss: 0.767568, tf_ratio: 0.947635
2025-04-06 14:02: **********Val Epoch 3: average Loss: 1.227639
2025-04-06 14:02: *********************************Current best model saved!


output shape: torch.Size([7, 3, 306, 8])
label shape: torch.Size([7, 306, 8])
last_output shape: torch.Size([7, 306, 8])
output shape: torch.Size([2, 3, 306, 8])
label shape: torch.Size([2, 306, 8])
last_output shape: torch.Size([2, 306, 8])
output shape: torch.Size([7, 3, 306, 8])
label shape: torch.Size([7, 306, 8])
last_output shape: torch.Size([7, 306, 8])


2025-04-06 14:02: Train Epoch 4: 0/1 Loss: 0.764887
2025-04-06 14:02: **********Train Epoch 4: averaged Loss: 0.764887, tf_ratio: 0.945098
2025-04-06 14:02: **********Val Epoch 4: average Loss: 1.222878
2025-04-06 14:02: *********************************Current best model saved!
2025-04-06 14:02: Train Epoch 5: 0/1 Loss: 0.761887
2025-04-06 14:02: **********Train Epoch 5: averaged Loss: 0.761887, tf_ratio: 0.942445


output shape: torch.Size([2, 3, 306, 8])
label shape: torch.Size([2, 306, 8])
last_output shape: torch.Size([2, 306, 8])
output shape: torch.Size([7, 3, 306, 8])
label shape: torch.Size([7, 306, 8])
last_output shape: torch.Size([7, 306, 8])


2025-04-06 14:02: **********Val Epoch 5: average Loss: 1.217555
2025-04-06 14:02: *********************************Current best model saved!
2025-04-06 14:02: Train Epoch 6: 0/1 Loss: 0.758472
2025-04-06 14:02: **********Train Epoch 6: averaged Loss: 0.758472, tf_ratio: 0.939672
2025-04-06 14:02: **********Val Epoch 6: average Loss: 1.211541


output shape: torch.Size([2, 3, 306, 8])
label shape: torch.Size([2, 306, 8])
last_output shape: torch.Size([2, 306, 8])
output shape: torch.Size([7, 3, 306, 8])
label shape: torch.Size([7, 306, 8])
last_output shape: torch.Size([7, 306, 8])
output shape: torch.Size([2, 3, 306, 8])
label shape: torch.Size([2, 306, 8])
last_output shape: torch.Size([2, 306, 8])


2025-04-06 14:02: *********************************Current best model saved!
2025-04-06 14:02: Train Epoch 7: 0/1 Loss: 0.754540
2025-04-06 14:02: **********Train Epoch 7: averaged Loss: 0.754540, tf_ratio: 0.936774
2025-04-06 14:02: **********Val Epoch 7: average Loss: 1.204692
2025-04-06 14:02: *********************************Current best model saved!


output shape: torch.Size([7, 3, 306, 8])
label shape: torch.Size([7, 306, 8])
last_output shape: torch.Size([7, 306, 8])
output shape: torch.Size([2, 3, 306, 8])
label shape: torch.Size([2, 306, 8])
last_output shape: torch.Size([2, 306, 8])


2025-04-06 14:02: Train Epoch 8: 0/1 Loss: 0.749972
2025-04-06 14:02: **********Train Epoch 8: averaged Loss: 0.749972, tf_ratio: 0.933747
2025-04-06 14:02: **********Val Epoch 8: average Loss: 1.196840
2025-04-06 14:02: *********************************Current best model saved!


output shape: torch.Size([7, 3, 306, 8])
label shape: torch.Size([7, 306, 8])
last_output shape: torch.Size([7, 306, 8])
output shape: torch.Size([2, 3, 306, 8])
label shape: torch.Size([2, 306, 8])
last_output shape: torch.Size([2, 306, 8])


2025-04-06 14:02: Train Epoch 9: 0/1 Loss: 0.744636
2025-04-06 14:02: **********Train Epoch 9: averaged Loss: 0.744636, tf_ratio: 0.930586
2025-04-06 14:02: **********Val Epoch 9: average Loss: 1.187795
2025-04-06 14:02: *********************************Current best model saved!


output shape: torch.Size([7, 3, 306, 8])
label shape: torch.Size([7, 306, 8])
last_output shape: torch.Size([7, 306, 8])
output shape: torch.Size([2, 3, 306, 8])
label shape: torch.Size([2, 306, 8])
last_output shape: torch.Size([2, 306, 8])
output shape: torch.Size([7, 3, 306, 8])
label shape: torch.Size([7, 306, 8])
last_output shape: torch.Size([7, 306, 8])


2025-04-06 14:02: Train Epoch 10: 0/1 Loss: 0.738384
2025-04-06 14:02: **********Train Epoch 10: averaged Loss: 0.738384, tf_ratio: 0.927286
2025-04-06 14:02: **********Val Epoch 10: average Loss: 1.177350
2025-04-06 14:02: *********************************Current best model saved!
2025-04-06 14:02: Train Epoch 11: 0/1 Loss: 0.731069
2025-04-06 14:02: **********Train Epoch 11: averaged Loss: 0.731069, tf_ratio: 0.923842
2025-04-06 14:02: **********Val Epoch 11: average Loss: 1.165311


output shape: torch.Size([2, 3, 306, 8])
label shape: torch.Size([2, 306, 8])
last_output shape: torch.Size([2, 306, 8])
output shape: torch.Size([7, 3, 306, 8])
label shape: torch.Size([7, 306, 8])
last_output shape: torch.Size([7, 306, 8])
output shape: torch.Size([2, 3, 306, 8])
label shape: torch.Size([2, 306, 8])
last_output shape: torch.Size([2, 306, 8])


2025-04-06 14:02: *********************************Current best model saved!
2025-04-06 14:02: Train Epoch 12: 0/1 Loss: 0.722580
2025-04-06 14:02: **********Train Epoch 12: averaged Loss: 0.722580, tf_ratio: 0.920249
2025-04-06 14:02: **********Val Epoch 12: average Loss: 1.151546
2025-04-06 14:02: *********************************Current best model saved!


output shape: torch.Size([7, 3, 306, 8])
label shape: torch.Size([7, 306, 8])
last_output shape: torch.Size([7, 306, 8])
output shape: torch.Size([2, 3, 306, 8])
label shape: torch.Size([2, 306, 8])
last_output shape: torch.Size([2, 306, 8])
output shape: torch.Size([7, 3, 306, 8])
label shape: torch.Size([7, 306, 8])
last_output shape: torch.Size([7, 306, 8])


2025-04-06 14:02: Train Epoch 13: 0/1 Loss: 0.712931
2025-04-06 14:02: **********Train Epoch 13: averaged Loss: 0.712931, tf_ratio: 0.916501
2025-04-06 14:02: **********Val Epoch 13: average Loss: 1.136118
2025-04-06 14:02: *********************************Current best model saved!
2025-04-06 14:02: Train Epoch 14: 0/1 Loss: 0.702443
2025-04-06 14:02: **********Train Epoch 14: averaged Loss: 0.702443, tf_ratio: 0.912594
2025-04-06 14:02: **********Val Epoch 14: average Loss: 1.119560


output shape: torch.Size([2, 3, 306, 8])
label shape: torch.Size([2, 306, 8])
last_output shape: torch.Size([2, 306, 8])
output shape: torch.Size([7, 3, 306, 8])
label shape: torch.Size([7, 306, 8])
last_output shape: torch.Size([7, 306, 8])
output shape: torch.Size([2, 3, 306, 8])
label shape: torch.Size([2, 306, 8])
last_output shape: torch.Size([2, 306, 8])


2025-04-06 14:02: *********************************Current best model saved!
2025-04-06 14:02: Train Epoch 15: 0/1 Loss: 0.692095
2025-04-06 14:02: **********Train Epoch 15: averaged Loss: 0.692095, tf_ratio: 0.908523
2025-04-06 14:02: **********Val Epoch 15: average Loss: 1.103523
2025-04-06 14:02: *********************************Current best model saved!


output shape: torch.Size([7, 3, 306, 8])
label shape: torch.Size([7, 306, 8])
last_output shape: torch.Size([7, 306, 8])
output shape: torch.Size([2, 3, 306, 8])
label shape: torch.Size([2, 306, 8])
last_output shape: torch.Size([2, 306, 8])
output shape: torch.Size([7, 3, 306, 8])
label shape: torch.Size([7, 306, 8])
last_output shape: torch.Size([7, 306, 8])


2025-04-06 14:02: Train Epoch 16: 0/1 Loss: 0.683865
2025-04-06 14:02: **********Train Epoch 16: averaged Loss: 0.683865, tf_ratio: 0.904282
2025-04-06 14:02: **********Val Epoch 16: average Loss: 1.091295
2025-04-06 14:02: *********************************Current best model saved!
2025-04-06 14:02: Train Epoch 17: 0/1 Loss: 0.679668
2025-04-06 14:02: **********Train Epoch 17: averaged Loss: 0.679668, tf_ratio: 0.899866
2025-04-06 14:02: **********Val Epoch 17: average Loss: 1.085439


output shape: torch.Size([2, 3, 306, 8])
label shape: torch.Size([2, 306, 8])
last_output shape: torch.Size([2, 306, 8])
output shape: torch.Size([7, 3, 306, 8])
label shape: torch.Size([7, 306, 8])
last_output shape: torch.Size([7, 306, 8])
output shape: torch.Size([2, 3, 306, 8])
label shape: torch.Size([2, 306, 8])
last_output shape: torch.Size([2, 306, 8])


2025-04-06 14:02: *********************************Current best model saved!
2025-04-06 14:02: Train Epoch 18: 0/1 Loss: 0.677158
2025-04-06 14:02: **********Train Epoch 18: averaged Loss: 0.677158, tf_ratio: 0.895269
2025-04-06 14:02: **********Val Epoch 18: average Loss: 1.084393
2025-04-06 14:02: *********************************Current best model saved!


output shape: torch.Size([7, 3, 306, 8])
label shape: torch.Size([7, 306, 8])
last_output shape: torch.Size([7, 306, 8])
output shape: torch.Size([2, 3, 306, 8])
label shape: torch.Size([2, 306, 8])
last_output shape: torch.Size([2, 306, 8])
output shape: torch.Size([7, 3, 306, 8])
label shape: torch.Size([7, 306, 8])
last_output shape: torch.Size([7, 306, 8])


2025-04-06 14:02: Train Epoch 19: 0/1 Loss: 0.671979
2025-04-06 14:02: **********Train Epoch 19: averaged Loss: 0.671979, tf_ratio: 0.890488
2025-04-06 14:02: **********Val Epoch 19: average Loss: 1.086369
2025-04-06 14:02: Train Epoch 20: 0/1 Loss: 0.664062
2025-04-06 14:02: **********Train Epoch 20: averaged Loss: 0.664062, tf_ratio: 0.885516
2025-04-06 14:02: **********Val Epoch 20: average Loss: 1.090724


output shape: torch.Size([2, 3, 306, 8])
label shape: torch.Size([2, 306, 8])
last_output shape: torch.Size([2, 306, 8])
output shape: torch.Size([7, 3, 306, 8])
label shape: torch.Size([7, 306, 8])
last_output shape: torch.Size([7, 306, 8])
output shape: torch.Size([2, 3, 306, 8])
label shape: torch.Size([2, 306, 8])
last_output shape: torch.Size([2, 306, 8])


2025-04-06 14:02: Train Epoch 21: 0/1 Loss: 0.655584
2025-04-06 14:02: **********Train Epoch 21: averaged Loss: 0.655584, tf_ratio: 0.880348
2025-04-06 14:02: **********Val Epoch 21: average Loss: 1.096871


output shape: torch.Size([7, 3, 306, 8])
label shape: torch.Size([7, 306, 8])
last_output shape: torch.Size([7, 306, 8])
output shape: torch.Size([2, 3, 306, 8])
label shape: torch.Size([2, 306, 8])
last_output shape: torch.Size([2, 306, 8])
output shape: torch.Size([7, 3, 306, 8])
label shape: torch.Size([7, 306, 8])
last_output shape: torch.Size([7, 306, 8])


2025-04-06 14:02: Train Epoch 22: 0/1 Loss: 0.648067
2025-04-06 14:02: **********Train Epoch 22: averaged Loss: 0.648067, tf_ratio: 0.874981
2025-04-06 14:02: **********Val Epoch 22: average Loss: 1.103945
2025-04-06 14:02: Train Epoch 23: 0/1 Loss: 0.641643
2025-04-06 14:02: **********Train Epoch 23: averaged Loss: 0.641643, tf_ratio: 0.869408


output shape: torch.Size([2, 3, 306, 8])
label shape: torch.Size([2, 306, 8])
last_output shape: torch.Size([2, 306, 8])
output shape: torch.Size([7, 3, 306, 8])
label shape: torch.Size([7, 306, 8])
last_output shape: torch.Size([7, 306, 8])
output shape: torch.Size([2, 3, 306, 8])
label shape: torch.Size([2, 306, 8])
last_output shape: torch.Size([2, 306, 8])


2025-04-06 14:02: **********Val Epoch 23: average Loss: 1.111114
2025-04-06 14:02: Train Epoch 24: 0/1 Loss: 0.635638
2025-04-06 14:02: **********Train Epoch 24: averaged Loss: 0.635638, tf_ratio: 0.863625
2025-04-06 14:02: **********Val Epoch 24: average Loss: 1.117851


output shape: torch.Size([7, 3, 306, 8])
label shape: torch.Size([7, 306, 8])
last_output shape: torch.Size([7, 306, 8])
output shape: torch.Size([2, 3, 306, 8])
label shape: torch.Size([2, 306, 8])
last_output shape: torch.Size([2, 306, 8])
output shape: torch.Size([7, 3, 306, 8])
label shape: torch.Size([7, 306, 8])
last_output shape: torch.Size([7, 306, 8])


2025-04-06 14:02: Train Epoch 25: 0/1 Loss: 0.629310
2025-04-06 14:02: **********Train Epoch 25: averaged Loss: 0.629310, tf_ratio: 0.857629
2025-04-06 14:02: **********Val Epoch 25: average Loss: 1.123959
2025-04-06 14:02: Train Epoch 26: 0/1 Loss: 0.622219
2025-04-06 14:02: **********Train Epoch 26: averaged Loss: 0.622219, tf_ratio: 0.851414
2025-04-06 14:02: **********Val Epoch 26: average Loss: 1.129487


output shape: torch.Size([2, 3, 306, 8])
label shape: torch.Size([2, 306, 8])
last_output shape: torch.Size([2, 306, 8])
output shape: torch.Size([7, 3, 306, 8])
label shape: torch.Size([7, 306, 8])
last_output shape: torch.Size([7, 306, 8])
output shape: torch.Size([2, 3, 306, 8])
label shape: torch.Size([2, 306, 8])
last_output shape: torch.Size([2, 306, 8])


2025-04-06 14:02: Train Epoch 27: 0/1 Loss: 0.614298
2025-04-06 14:02: **********Train Epoch 27: averaged Loss: 0.614298, tf_ratio: 0.844977
2025-04-06 14:02: **********Val Epoch 27: average Loss: 1.134602


output shape: torch.Size([7, 3, 306, 8])
label shape: torch.Size([7, 306, 8])
last_output shape: torch.Size([7, 306, 8])
output shape: torch.Size([2, 3, 306, 8])
label shape: torch.Size([2, 306, 8])
last_output shape: torch.Size([2, 306, 8])
output shape: torch.Size([7, 3, 306, 8])
label shape: torch.Size([7, 306, 8])
last_output shape: torch.Size([7, 306, 8])


2025-04-06 14:02: Train Epoch 28: 0/1 Loss: 0.605800
2025-04-06 14:02: **********Train Epoch 28: averaged Loss: 0.605800, tf_ratio: 0.838313
2025-04-06 14:02: **********Val Epoch 28: average Loss: 1.139480
2025-04-06 14:02: Train Epoch 29: 0/1 Loss: 0.597180
2025-04-06 14:02: **********Train Epoch 29: averaged Loss: 0.597180, tf_ratio: 0.831421


output shape: torch.Size([2, 3, 306, 8])
label shape: torch.Size([2, 306, 8])
last_output shape: torch.Size([2, 306, 8])
output shape: torch.Size([7, 3, 306, 8])
label shape: torch.Size([7, 306, 8])
last_output shape: torch.Size([7, 306, 8])


2025-04-06 14:02: **********Val Epoch 29: average Loss: 1.144210
2025-04-06 14:03: Train Epoch 30: 0/1 Loss: 0.588888
2025-04-06 14:03: **********Train Epoch 30: averaged Loss: 0.588888, tf_ratio: 0.824296
2025-04-06 14:03: **********Val Epoch 30: average Loss: 1.148777


output shape: torch.Size([2, 3, 306, 8])
label shape: torch.Size([2, 306, 8])
last_output shape: torch.Size([2, 306, 8])
output shape: torch.Size([7, 3, 306, 8])
label shape: torch.Size([7, 306, 8])
last_output shape: torch.Size([7, 306, 8])
output shape: torch.Size([2, 3, 306, 8])
label shape: torch.Size([2, 306, 8])
last_output shape: torch.Size([2, 306, 8])


2025-04-06 14:03: Train Epoch 31: 0/1 Loss: 0.581148
2025-04-06 14:03: **********Train Epoch 31: averaged Loss: 0.581148, tf_ratio: 0.816937
2025-04-06 14:03: **********Val Epoch 31: average Loss: 1.153077


output shape: torch.Size([7, 3, 306, 8])
label shape: torch.Size([7, 306, 8])
last_output shape: torch.Size([7, 306, 8])
output shape: torch.Size([2, 3, 306, 8])
label shape: torch.Size([2, 306, 8])
last_output shape: torch.Size([2, 306, 8])
output shape: torch.Size([7, 3, 306, 8])
label shape: torch.Size([7, 306, 8])
last_output shape: torch.Size([7, 306, 8])


2025-04-06 14:03: Train Epoch 32: 0/1 Loss: 0.573856
2025-04-06 14:03: **********Train Epoch 32: averaged Loss: 0.573856, tf_ratio: 0.809341
2025-04-06 14:03: **********Val Epoch 32: average Loss: 1.156904
2025-04-06 14:03: Train Epoch 33: 0/1 Loss: 0.566744
2025-04-06 14:03: **********Train Epoch 33: averaged Loss: 0.566744, tf_ratio: 0.801506
2025-04-06 14:03: **********Val Epoch 33: average Loss: 1.159859
2025-04-06 14:03: Validation performance didn't improve for 15 epochs. Training stops.
2025-04-06 14:03: Total training time: 0.1092min, best loss: 1.084393
2025-04-06 14:03: Saving current best model to c:\Users\rob-l\Documents\NUS\Y3S2\DSE3101\T5G1\model\logs\best_model.pth


output shape: torch.Size([2, 3, 306, 8])
label shape: torch.Size([2, 306, 8])
last_output shape: torch.Size([2, 306, 8])
output shape: torch.Size([7, 3, 306, 8])
label shape: torch.Size([7, 306, 8])
last_output shape: torch.Size([7, 306, 8])
output shape: torch.Size([2, 3, 306, 8])
label shape: torch.Size([2, 306, 8])
last_output shape: torch.Size([2, 306, 8])


In [15]:
# # MODEL DEFINITION
# model = AGCRN(
#     number_of_nodes=num_country_pairs,
#     in_channels=in_channels,
#     out_channels=out_channels,
#     K=K,
#     embedding_dimensions=embedding_dims
# )

# # 3) Create the node embedding E separately (following your interface).
# #    We'll just do a random init. This is learnable, so we wrap it in nn.Parameter.
# E = nn.Parameter(torch.zeros(num_country_pairs, embedding_dims), requires_grad=True)

# # 4) "prediction head" to map from [out_channels] -> [num_sectors]
# prediction_head = nn.Linear(out_channels, num_sectors)

# # 5) Combine everything in a single optimizer. We must include the node embedding (E) as well.
# optimizer = optim.Adam(
#     list(model.parameters()) + list(prediction_head.parameters()) + [E],
#     lr=lr
# )

# criterion = nn.MSELoss()

# # 6) Training loop
# for epoch in range(num_epochs):
#     model.train()
#     total_loss = 0.0

#     for X_batch, Y_batch in dataloader:
#         # X_batch: [batch_size, num_nodes, in_channels]
#         # Y_batch: [batch_size, num_nodes, num_sectors]

#         optimizer.zero_grad()

#         H = None
#         # Unroll over T time steps
#         for t in range(T):
#             X_t = X_batch[:, t, :, :]  # [batch_size, num_nodes, in_channels]
#             H = model(X_t, E, H)  # H is the hidden state, E is the node embedding
            

#         # Now map from [out_channels] -> 1 dimension
#         # We'll do this for each node:
#         Y_pred = prediction_head(H)
#         print('Y_pred.shape', Y_pred.shape)
#         # Compute MSE loss with target
#         loss = criterion(Y_pred, Y_batch)

#         # Backprop & update
#         loss.backward()
#         optimizer.step()

#         total_loss += loss.item() * X_batch.size(0)

#     avg_loss = total_loss / len(dataset)
#     print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {avg_loss:.4f}")

# print("Training complete!")