# Preprocessing and Feature Creation

Data Processing and Feature Engineering is performed in the following, using the given variables downloaded from the Binance API, we will calculate the following:

In [None]:
!pip install ta

In [None]:
import torch
from ta.momentum import rsi
from ta.trend import macd
import pandas as pd
import numpy as np
import os
from sklearn.preprocessing import MinMaxScaler
from ta import add_all_ta_features
from ta.volatility import BollingerBands
from ta.momentum import RSIIndicator, StochasticOscillator
from ta.trend import EMAIndicator, MACD
from ta.volume import VolumeWeightedAveragePrice

if torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')

print(device)

# Directory containing .csv files of cryptocurrency data
directory = '/kaggle/input/crypto-2021-10-01-1hour'
dataframes = []

for filename in os.listdir(directory):
    if filename.endswith('.csv') and filename != "USDCUSDT1h.csv" and filename != "DAIUSDT1h.csv":
        ticker = filename[:-6] # Remove the '1h.csv' or '1d.csv' extension
        data = pd.read_csv(os.path.join(directory, filename), parse_dates=['time'])

        cutoff_date = pd.Timestamp('2021-10-01')

        if data['time'].iloc[0] > cutoff_date:
            print(f"Skipping {ticker}: first entry is after cutoff date.")
            continue  # Skip this dataset
            
        data = data[data['time'] >= cutoff_date]

        windows = [5, 10, 20]
        data["NormClose"] = (data["close"] - data["close"].mean()) / data["close"].std()
        data["DailyLogReturn"] = np.log(1 + data["close"].pct_change())

        data['volume_quote_ratio'] = data['volume'] / data['quote_volume']
        data['buy_sell_volume_ratio'] = data['buy_base_vol'] / data['volume']
        data['buy_sell_quote_ratio'] = data['buy_quote_vol'] / data['quote_volume']

        # Stochastic Oscillator
        stoch = StochasticOscillator(high = data['high'], low = data['low'], close = data['close'])
        data['stoch_k'] = stoch.stoch()
        data['stoch_d'] = stoch.stoch_signal()

        # VWAP
        vwap = VolumeWeightedAveragePrice(high = data['high'], low = data['low'],
                                         close = data['close'], volume = data['volume'])
        data['vwap'] = vwap.volume_weighted_average_price()

        for window in windows:
            # Parkinson Volatility
            data[f'parkinson_vol_{window}'] = np.sqrt(
                (1.0 / (4.0 * np.log(2.0))) *
                (np.log(data['high'] / data['low']) ** 2).rolling(window).mean()
            )

            # Garman-Klass Volatility
            data[f'garman_klass_vol_{window}'] = np.sqrt(
                (0.5 * np.log(data['high'] / data['low']) ** 2) -
                (2.0 * np.log(2.0) - 1.0) * (np.log(data['close'] / data['open']) ** 2)
            ).rolling(window).mean()

        data['avg_trade_size'] = data['volume'] / data['trades']
        data['avg_trade_quote_size'] = data['quote_volume'] / data['trades']
        data["Ticker"] = ticker
        data.set_index('time', inplace = True)
        data.drop(columns = ["open", "low", "high", "volume", "buy_base_vol", "quote_volume", "trades", "buy_quote_vol"], inplace=True)
        
        if data.empty:
            print(f"Warning: {ticker} DataFrame is empty.")
            continue

        dataframes.append(data)
        
# Concatenate all DataFrames into a single DataFrame with a hierarchical index
all_data = pd.concat(dataframes, keys=[df['Ticker'][0] for df in dataframes])

# level_0_index = 'OMUSDT'  # Replace with your actual level 0 index value

# # Filter the DataFrame for the specific level 0 index
# filtered_data = all_data.loc[level_0_index]

# # Find rows with missing/NaN values
# missing_data = filtered_data[filtered_data.isna().any(axis=1)]

# Drop any rows with missing values and filter time range
all_data = all_data.dropna()
all_data = all_data.loc['2021-10-01':]

# Specify the level 1 index you want to delete
level_1_index_to_delete = pd.Timestamp('2023-06-08 23:00:00')

# Drop the specified level 1 index across all level 0 indexes
all_data = all_data.drop(level_1_index_to_delete, level=1, errors='ignore')

# Display the combined DataFrame
all_data

In [None]:
row_counts = all_data.groupby(level=0).size()

# Print the number of rows for each level 0 index (make sure they are all the same)
print(row_counts)

In [None]:
# Print the first row of every currency
first_rows = all_data.reset_index(level=1).groupby(level=0).first()

first_rows

In [None]:
import matplotlib.pyplot as plt
import matplotlib.dates as mdates

plt.figure(figsize=(10, 5))
tickers = all_data.index.get_level_values(0).unique()
selected_tickers = np.random.choice(tickers, size=10, replace=False)

for ticker in selected_tickers:	
    plt.plot(all_data.loc[ticker].index,              
             np.cumsum(all_data.loc[ticker]["DailyLogReturn"]),              
             label=f'{ticker}')
    
plt.title("Evolution of Ten Cryptocurrencies")
plt.xlabel('Date')
plt.gca().tick_params(axis='x', rotation=45)
plt.gca().xaxis.set_major_locator(mdates.MonthLocator(interval = 4))
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%b %Y'))
plt.ylabel('Cumulative Log Return')
plt.legend(ncol=2, loc="lower right", prop={'size': 8, 'family': 'serif'})
plt.show()

# Correlation between Cryptocurrency

Computing the correlation between currencies to initialise the edges for the graph. Current method utilises a date range to compute correlation (2023-10-01 to 2024-10-01). Preferably, we would use fundamental information about each currency for correlation

In [None]:
df_close = all_data.copy()

# Reset the index to access the time column
df_close.reset_index(inplace=True)
df_close = df_close[["time", "Ticker", "close"]]
df_close["time"] = pd.to_datetime(df_close["time"])
df_close.set_index("time", inplace=True) 

# Sort the DataFrame by index to ensure it is monotonic
df_close.sort_index(inplace=True)
df_close["close"] = np.log(df_close["close"])

# Filter for dates in 2024
df_close_filtered = df_close.loc['2024-01-01':'2024-10-01']
df_close_filtered.reset_index(inplace=True) 

# Create a pivot table to reshape the DataFrame
df_close_filtered = pd.pivot_table(    
    df_close_filtered, 
    values="close", 
    columns="Ticker", 
    index="time")

df_close_filtered

In [None]:
import seaborn as sns

correlation_matrix = df_close_filtered.diff().corr()
correlation_matrix = (correlation_matrix - (correlation_matrix == 1)) # Drop Self-Correlation

plt.figure(figsize=(15, 15))

sns.heatmap(correlation_matrix,            
            linewidths=.5,            
            annot=True,            
            square=True,            
            cmap="viridis")

plt.xlabel("Cryptocurrency")
plt.ylabel("Cryptocurrency")
plt.title("Correlation Matrix between each Cryptocurrency")
plt.show()

In [None]:
import networkx as nx
correlation_matrix_np = correlation_matrix.to_numpy()
adj_correlation_matrix = (correlation_matrix_np * (abs(correlation_matrix_np) > .7).astype(int)) # Threshold to form edge (0.7)
correlation_matrix_graph = nx.from_numpy_array(adj_correlation_matrix)
correlation_matrix_graph = nx.relabel_nodes(correlation_matrix_graph, dict(enumerate(correlation_matrix.index)))

plt.figure(figsize=(12, 12))

nx.draw(correlation_matrix_graph, 
        with_labels=True, 
        node_size=100, 
        node_color='skyblue', 
        font_size=8, 
        font_weight='bold', 
        font_color='black', 
        pos=nx.spring_layout(correlation_matrix_graph))

plt.title('Crypto Graph by Historical Correlation')
plt.show()

In [None]:
adj_correlation_matrix

# Building PyTorch Geometric Dataset

In [None]:
!pip install torch_geometric

In [None]:
import torch
from torch_geometric.data import Data

nodes_nb = len(adj_correlation_matrix)
 
x = torch.tensor(
	all_data.drop(columns=["Ticker", "close"]).to_numpy().reshape((nodes_nb, -1, all_data.shape[1] - 2)), dtype=torch.float32).to(device)  # shape (nodes_nb, timestamps_nb, features_nb) Note: This won't work if they are not integers
x = x.transpose(1, 2)  # shape (nodes_nb, features_nb, timestamps_nb)

close_prices = torch.tensor(
		all_data[["close"]].to_numpy().reshape((nodes_nb, -1)), dtype=torch.float32
	).to(device)

edge_nb = np.count_nonzero(adj_correlation_matrix)
edge_index, edge_weight = torch.zeros((2, edge_nb), dtype=torch.long).to(device), torch.zeros((edge_nb,), dtype=torch.float32).to(device)
count = 0
for i in range(nodes_nb):
		for j in range(nodes_nb):
			if (weight := adj_correlation_matrix[i, j]) != 0:
				edge_index[0, count], edge_index[1, count] = i, j
				edge_weight[count] = weight
				count += 1

x.shape, edge_index.shape, edge_weight.shape

In [17]:
past_window, future_window = 25, 1
timestamps = [
			Data(
				x = x[:, :, idx:idx + past_window],
				edge_index = edge_index,
				edge_weight = edge_weight,
				close_price = close_prices[:, idx:idx + past_window],
				y = x[:, 0, idx + past_window:idx + past_window + future_window],
				close_price_y=close_prices[:, idx + past_window:idx + past_window + future_window],
			).to(device) for idx in range(x.shape[2] - past_window - future_window)
		]

print(timestamps[-1])

Data(x=[34, 16, 25], edge_index=[2, 208], y=[34, 1], edge_weight=[208], close_price=[34, 25], close_price_y=[34, 1])


# Model Definition

Baseline Graph Convolutional Network Models used for comparing our modified MTGNN. We researched various other GCNs that also capture temporal and spatial dynamics in a dataset. The following models and the research papers they are from are included below.

In [18]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.nn import GCNConv, GATv2Conv

class GCN(nn.Module):
	"""
	Simple two layers GCN model.
	"""
	def __init__(self, in_channels: int, layer_sizes: list[int] = None, bias: bool = True, improved: bool = False):
		super(GCN, self).__init__()
		layer_sizes = layer_sizes or [32, 32]
		self.convs = nn.ModuleList([
		   GCNConv(in_channels, layer_sizes[0], bias=bias, improved=improved),
		] + [
		   GCNConv(layer_sizes[i], layer_sizes[i + 1], bias=bias, improved=improved) for i in
		   range(len(layer_sizes) - 1)
   		])

	def forward(self, x: torch.tensor, edge_index: torch.tensor, edge_weight: torch.tensor) -> torch.tensor:
		"""
		:param x: The feature matrix of the graph X_t (Nodes_nb, Features_nb)
		:param edge_index: The edge index of the graph A (2, Edges_nb)
		:param edge_weight: The edge weight of the graph (Edges_nb,)
		:return: The hidden state of the GCN h_t (Nodes_nb, Hidden_size)
		"""
		for conv in self.convs[:-1]:
			x = F.leaky_relu(conv(x, edge_index, edge_weight))
		return self.convs[-1](x, edge_index, edge_weight)
        
class GAT(nn.Module):
	"""
	Simple two layers GAT model.
	"""
	def __init__(self, in_channels: int, layer_sizes: list[int] = None, bias: bool = True):
		super(GAT, self).__init__()
		layer_sizes = layer_sizes or [32, 32]
		self.convs = nn.ModuleList([
		   GATv2Conv(in_channels, layer_sizes[0], bias=bias, edge_dim=1),
		] + [
		   GATv2Conv(layer_sizes[i], layer_sizes[i + 1], bias=bias, edge_dim=1) for i in
		   range(len(layer_sizes) - 1)
   		])

	def forward(self, x: torch.tensor, edge_index: torch.tensor, edge_weight: torch.tensor) -> torch.tensor:
		"""
		:param x: The feature matrix of the graph X_t (Nodes_nb, Features_nb)
		:param edge_index: The edge index of the graph A (2, Edges_nb)
		:param edge_weight: The edge weight of the graph (Edges_nb,)
		:return: The hidden state of the GCN h_t (Nodes_nb, Hidden_size)
		"""
		for conv in self.convs[:-1]:
			x = F.leaky_relu(conv(x, edge_index, edge_weight))
		return self.convs[-1](x, edge_index, edge_weight)

class TGCNCell(nn.Module):
	"""
	T-GCN Cell for one timestep, from https://arxiv.org/pdf/1811.05320.
	"""
	def __init__(self, in_channels: int, hidden_size: int, use_gat: bool = True):
		super(TGCNCell, self).__init__()
		if use_gat:
			self.gcn = GAT(in_channels, [hidden_size, hidden_size]).to(device)
		else:
			self.gcn = GCN(in_channels, [hidden_size, hidden_size]).to(device)
		self.lin_u = nn.Linear(2 * hidden_size + in_channels, hidden_size)
		self.lin_r = nn.Linear(2 * hidden_size + in_channels, hidden_size)
		self.lin_c = nn.Linear(2 * hidden_size + in_channels, hidden_size)

	def forward(self, x: torch.tensor, edge_index: torch.tensor, edge_weight: torch.tensor, h: torch.tensor) -> tuple[torch.tensor, torch.tensor]:
		"""
		:param x: The feature matrix of the graph X_t (Nodes_nb, Features_nb)
		:param edge_index: The edge index of the graph A (2, Edges_nb)
		:param edge_weight: The edge weight of the graph (Edges_nb,)
		:param h: The hidden state of the GRU h_{t-1} (Nodes_nb, Hidden_size)
		:return: The hidden state of the GRU h_t (Nodes_nb, Hidden_size)
		"""
		gcn_out = F.sigmoid(self.gcn(x, edge_index, edge_weight))  # f(A,X_t), Eq. 2
		u = F.sigmoid(self.lin_u(torch.cat([x, gcn_out, h], dim=-1)))  # u_t, Eq. 3
		r = F.sigmoid(self.lin_r(torch.cat([x, gcn_out, h], dim=-1)))  # r_t,  Eq. 4
		c = F.tanh(self.lin_c(torch.cat([x, gcn_out, r * h], dim=-1)))  # c_t, Eq. 5

		return u * h + (1 - u) * c  # h_t, Eq. 6

class TGCN(nn.Module):
	"""
	T-GCN model from https://arxiv.org/pdf/1811.05320.
	"""
	def __init__(self, in_channels: int, out_channels: int, hidden_size: int, layers_nb: int = 2, output_activation: nn.Module = None, use_gat: bool = True):
		super(TGCN, self).__init__()
		self.hidden_size = hidden_size
		self.layers_nb = max(1, layers_nb)
		self.cells = nn.ModuleList(
			[TGCNCell(in_channels, hidden_size, use_gat=use_gat)] + [TGCNCell(hidden_size, hidden_size, use_gat=use_gat) for _ in range(self.layers_nb - 1)]
		)
		self.out = nn.Sequential(
			nn.Linear(hidden_size, out_channels),
			output_activation if output_activation is not None else nn.Identity(),
		)

	def forward(self, x: torch.tensor, edge_index: torch.tensor, edge_weight: torch.tensor) -> torch.tensor:
		"""
		:param x: The feature matrix of the graph X_t (Nodes_nb, Features_nb, SeqLength)
		:param edge_index: The edge index of the graph A (2, Edges_nb)
		:param edge_weight: The edge weight of the graph (Edges_nb,)
		:return: The output of the model (Nodes_nb, OutFeatures_nb)
		"""
		h_prev = [
			torch.zeros(x.shape[0], self.hidden_size).to(device) for _ in range(self.layers_nb)
		]
		for t in range(x.shape[-1]):
			h = x[:, :, t]  # h is the output of the previous GRU layer (the input features for the first layer)
			for i, cell in enumerate(self.cells):
				h = cell(h, edge_index, edge_weight, h_prev[i])
				h_prev[i] = h
		return self.out(h_prev[-1])

class A3TGCN(nn.Module):
	"""
	A3T-GCN model from https://arxiv.org/pdf/2006.11583.
	"""
	def __init__(self, in_channels: int, out_channels: int, hidden_size: int, layers_nb: int = 2, output_activation: nn.Module = None, use_gat: bool = True):
		super(A3TGCN, self).__init__()
		self.hidden_size = hidden_size
		self.layers_nb = max(1, layers_nb)
		self.cells = nn.ModuleList(
			[TGCNCell(in_channels, hidden_size, use_gat=use_gat)] + [TGCNCell(hidden_size, hidden_size, use_gat=use_gat) for _ in range(self.layers_nb - 1)]
		)
		self.attention = nn.Sequential(
			nn.Linear(hidden_size, 1),
			nn.Softmax(dim=1),
		)
		nn.init.uniform_(self.attention[0].weight)
		self.out = nn.Sequential(
			nn.Linear(hidden_size, out_channels),
			output_activation if output_activation is not None else nn.Identity(),
		)

	def forward(self, x: torch.tensor, edge_index: torch.tensor, edge_weight: torch.tensor) -> torch.tensor:
		"""
		:param x: The feature matrix of the graph X_t (Nodes_nb, Features_nb, SeqLength)
		:param edge_index: The edge index of the graph A (2, Edges_nb)
		:param edge_weight: The edge weight of the graph (Edges_nb,)
		:return: The output of the model (Nodes_nb, OutFeatures_nb)
		"""
		h_prev = [
			torch.zeros(x.shape[0], self.hidden_size).to(device) for _ in range(self.layers_nb)
		]
		h_final = torch.zeros(x.shape[0], x.shape[-1], self.hidden_size).to(device)
		for t in range(x.shape[-1]):
			h = x[:, :, t]  # h is the output of the previous GRU layer (the input features for the first layer)
			for i, cell in enumerate(self.cells):
				h = cell(h, edge_index, edge_weight, h_prev[i])
				h_prev[i] = h
			h_final[:, t, :] = h
		return self.out(F.leaky_relu(torch.sum(F.leaky_relu(h_final) * self.attention(h_final), dim=1)))


class DCGRUCell(nn.Module):
	"""
	DCRNN Cell for one timestep, from https://arxiv.org/pdf/1707.01926.
	"""
	def __init__(self, in_channels: int, hidden_size: int, use_gat: bool = True):
		super(DCGRUCell, self).__init__()
		if use_gat:
			self.gcn_r = GAT(in_channels + hidden_size, [hidden_size, hidden_size], bias=True)
			self.gcn_u = GAT(in_channels + hidden_size, [hidden_size, hidden_size], bias=True)
			self.gcn_c = GAT(in_channels + hidden_size, [hidden_size, hidden_size], bias=True)
		else:
			self.gcn_r = GCN(in_channels + hidden_size, [hidden_size, hidden_size], bias=True)
			self.gcn_u = GCN(in_channels + hidden_size, [hidden_size, hidden_size], bias=True)
			self.gcn_c = GCN(in_channels + hidden_size, [hidden_size, hidden_size], bias=True)

	def forward(self, x: torch.tensor, edge_index: torch.tensor, edge_weight: torch.tensor, h: torch.tensor) -> torch.tensor:
		"""
		:param x: The feature matrix of the graph X_t (Nodes_nb, Features_nb)
		:param edge_index: The edge index of the graph A (2, Edges_nb)
		:param edge_weight: The edge weight of the graph (Edges_nb,)
		:param h: The hidden state of the GRU h_{t-1} (Nodes_nb, Hidden_size)
		:return: The hidden state of the GRU h_t (Nodes_nb, Hidden_size)
		"""
		x_h = torch.cat([x, h], dim=-1)
		r = F.sigmoid(self.gcn_r(x_h, edge_index, edge_weight))
		u = F.sigmoid(self.gcn_u(x_h, edge_index, edge_weight))
		c = F.tanh(self.gcn_c(torch.cat([x, r * h], dim=-1), edge_index, edge_weight))
		return u * h + (1 - u) * c

class DCGNN(nn.Module):
	"""
	DCGNN model from https://arxiv.org/pdf/1707.01926.
	"""
	def __init__(self, in_channels: int, out_channels: int, hidden_size: int, layers_nb: int = 2, output_activation: nn.Module = None, use_gat: bool = True):
		super(DCGNN, self).__init__()
		self.hidden_size = hidden_size
		self.layers_nb = max(1, layers_nb)
		self.cells = nn.ModuleList(
			[DCGRUCell(in_channels, hidden_size, use_gat=use_gat)] + [DCGRUCell(hidden_size, hidden_size, use_gat=use_gat) for _ in range(self.layers_nb - 1)]
		)
		self.out = nn.Sequential(
			nn.Linear(hidden_size, out_channels),
			output_activation if output_activation is not None else nn.Identity(),
		)

	def forward(self, x: torch.tensor, edge_index: torch.tensor, edge_weight: torch.tensor) -> torch.tensor:
		"""
		:param x: The feature matrix of the graph X_t (Nodes_nb, Features_nb, SeqLength)
		:param edge_index: The edge index of the graph A (2, Edges_nb)
		:param edge_weight: The edge weight of the graph (Edges_nb,)
		:return: The output of the model (Nodes_nb, OutFeatures_nb)
		"""
		h_prev = [
			torch.zeros(x.shape[0], self.hidden_size).to(device) for _ in range(self.layers_nb)
		]
		for t in range(x.shape[-1]):
			h = x[:, :, t]  # h is the output of the previous GRU layer (the input features for the first layer)
			for i, cell in enumerate(self.cells):
				h = cell(h, edge_index, edge_weight, h_prev[i])
				h_prev[i] = h
		return self.out(h_prev[-1])

# Training and Testing

In [14]:
torch.cuda.empty_cache()

In [22]:
import torch.nn as nn
import torch.optim as optim
from torch_geometric.loader import DataLoader
torch.cuda.empty_cache()

train_part = .9 # Split Data by so that first 90% is for training, and last 10% for testing
batch_size = 16

train_dataset, test_dataset = timestamps[:int(train_part * len(timestamps))], timestamps[int(train_part * len(timestamps)):]
print(f"Train dataset: {len(train_dataset)}, Test dataset: {len(test_dataset)}")
train_dataloader, test_dataloader = DataLoader(train_dataset, batch_size = batch_size, shuffle=True), DataLoader(test_dataset, batch_size=len(test_dataset), drop_last=True)

Train dataset: 24293, Test dataset: 2700


In [44]:
from torch import nn, optim
from torch.utils.tensorboard import SummaryWriter
from torch_geometric.data import DataLoader
from datetime import datetime
from tqdm import trange
        
def test_iteration(model: nn.Module, criterion: nn.Module, test_dataloader: DataLoader, epoch: int, writer: SummaryWriter) -> None:
    """
    Test iteration
    :param model: Model to test
    :param criterion: Loss function to use
    :param test_dataloader: Test data loader
    :param epoch: Current epoch
    :param writer: Tensorboard writer
    """
    model.eval()
    for idx, data in enumerate(test_dataloader):
        data = data.to(device) 
        out = model(data.x, data.edge_index, data.edge_weight).to(device) 
        loss = criterion(out, data.y)
        writer.add_scalar("Loss/Test Loss", loss.item(), epoch * len(test_dataloader) + idx)

def train_iteration(model: nn.Module, optimizer: optim.Optimizer, pbar: trange, criterion: nn.Module, train_dataloader: DataLoader, epoch: int, writer: SummaryWriter) -> None:
    """
    Train iteration
    :param model: Model to train
    :param optimizer: Optimizer to use (Adam, ...)
    :param pbar: tqdm progress bar
    :param criterion: Loss function to use
    :param train_dataloader: Train data loader
    :param epoch: Current epoch
    :param writer: Tensorboard writer
    :param measure_acc: Whether to measure accuracy or not (for classification tasks)
    """
    model.train()
    total_loss = 0.0
    num_batches = len(train_dataloader)
    
    for idx, data in enumerate(train_dataloader):
        data = data.to(device) 
        optimizer.zero_grad()
        out = model(data.x, data.edge_index, data.edge_weight)
        loss = criterion(out, data.y)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
        pbar.set_postfix({"Batch": f"{(idx + 1) / len(train_dataloader) * 100:.1f}%"})
        writer.add_scalar("Loss/Train Loss", loss.item(), epoch * len(train_dataloader) + idx)
        
    average_loss = total_loss / num_batches
    print(f'Epoch {epoch + 1}/{num_epochs}, Average Training Loss: {average_loss:.4f}')

def train(model: nn.Module, optimizer: optim.Optimizer, criterion: nn.Module, train_dataloader: DataLoader, test_dataloader: DataLoader, num_epochs: int, task_title: str = "") -> None:
    """
    Train function for a regression / classification model
    :param model: Model to train
    :param optimizer: Optimizer to use (Adam, ...)
    :param criterion: Loss function to use
    :param train_dataloader: Train data loader
    :param test_dataloader: Test data loader
    :param num_epochs: Number of epochs to train on the train dataset
    :param task_title: Title of the tensorboard run
    """
    writer = SummaryWriter(f'runs/{task_title}_{datetime.now().strftime("%d_%m_%Hh%M")}_{model.__class__.__name__}')
    for epoch in (pbar := trange(num_epochs, desc="Epochs")):
        train_iteration(model, optimizer, pbar, criterion, train_dataloader, epoch, writer)
        test_iteration(model, criterion, test_dataloader, epoch, writer)

# Training

In [None]:
in_channels, out_channels, hidden_size, layers_nb = timestamps[0].x.shape[-2], 1, 16, 2
lr, weight_decay, num_epochs = 0.001, 1e-5, 16

tgcn_no_gat_model = TGCN(in_channels, out_channels, hidden_size, layers_nb, use_gat = False).to(device)

criterion = nn.MSELoss()
optimizer = optim.Adam(tgcn_no_gat_model.parameters(), lr = lr, weight_decay = weight_decay)

tgcn_no_gat_model

In [None]:
train(tgcn_no_gat_model, optimizer, criterion, train_dataloader, test_dataloader, num_epochs, "PriceForecasting_TGCN")
torch.save(tgcn_no_gat_model.state_dict(), 'tgcn_no_gat_model_epoch_16.pth')

In [None]:
in_channels, out_channels, hidden_size, layers_nb = timestamps[0].x.shape[-2], 1, 16, 2
a3tgcn_no_gat_model = A3TGCN(in_channels, out_channels, hidden_size, layers_nb, use_gat = False).to(device)

lr, weight_decay, num_epochs = 0.001, 1e-5, 16

criterion = nn.MSELoss()
optimizer_a3tgcn = optim.Adam(a3tgcn_no_gat_model.parameters(), lr=lr, weight_decay=weight_decay)
a3tgcn_no_gat_model

In [None]:
train(a3tgcn_no_gat_model, optimizer_a3tgcn, criterion, train_dataloader, test_dataloader, num_epochs, "PriceForecasting_A3TGCN")
torch.save(a3tgcn_no_gat_model.state_dict(), 'a3tgcn_no_gat_model_epoch_16.pth')

In [None]:
in_channels, out_channels, hidden_size, layers_nb = timestamps[0].x.shape[-2], 1, 16, 2
dcgnn_no_gat_model = DCGNN(in_channels, out_channels, hidden_size, layers_nb, use_gat = False).to(device)

lr, weight_decay, num_epochs = 0.001, 1e-5, 16

criterion = nn.MSELoss()
optimizer_dcgnn = optim.Adam(dcgnn_no_gat_model.parameters(), lr=lr, weight_decay=weight_decay)
dcgnn_no_gat_model

In [None]:
train(dcgnn_no_gat_model, optimizer_dcgnn, criterion, train_dataloader, test_dataloader, num_epochs, "PriceForecasting_DCGNN")
torch.save(dcgnn_no_gat_model.state_dict(), 'dcgnn_no_gat_model_epoch_16.pth')

# Results

Printing Regression Error and plotting predictions against actual targets for comparison

In [32]:
from torch.nn import functional as F

def get_regression_error(model: nn.Module, dataloader: DataLoader) -> tuple[float, float, float, float]:
	"""
	Computes regression errors
	:param model: Model to test
	:param dataloader: Dataloader to test on
	:return: Mean squared error, rooted mean squared error, mean absolute error, mean relative error
	"""
	mse = 0
	rmse = 0
	mae = 0
	mre = 0
	for data in dataloader:
		out = model(data.x, data.edge_index, data.edge_weight)
		mse += F.mse_loss(out, data.y).item()
		rmse += F.mse_loss(out, data.y).sqrt().item()
		mae += F.l1_loss(out, data.y).item()
		mre += (F.l1_loss(out, data.y) / data.y.abs().mean()).item()
	return mse / len(dataloader), rmse / len(dataloader), mae / len(dataloader), mre / len(dataloader)

def plot_regression_all(model: nn.Module, data: Data, all_data: pd.DataFrame, title: str = None) -> None:
    """
    Plot graphs for all currencies in the regression model.
    :param model: Model to test
    :param data: Data to test on
    :param all_data: DataFrame containing all data with tickers as index level 0
    :param title: Title of the plot
    """
    model.eval()
    out = model(data.x, data.edge_index, data.edge_weight)

    preds = out.reshape(len(data.ptr) - 1, -1).cpu()
    target = data.y.reshape(len(data.ptr) - 1, -1).cpu()

    # Extract tickers from the DataFrame
    tickers = all_data.index.levels[0].tolist()
    
    num_currencies = len(tickers)
    num_cols = 4
    num_rows = (num_currencies + num_cols - 1) // num_cols

    fig, axs = plt.subplots(num_rows, num_cols, figsize=(20, num_rows * 4))
    fig.suptitle(title)
    axs = axs.flatten()

    for idx in range(num_currencies):
        ax = axs[idx]
        ax.plot(target[:, idx].detach().cpu().numpy(), label="Real")
        ax.plot(preds[:, idx].detach().cpu().numpy(), label="Predicted")
        ax.set_title(f"Currency: {tickers[idx]}")
        ax.legend()

    # Hide any unused subplots
    for j in range(num_currencies, num_rows * num_cols):
        axs[j].axis('off')

    plt.tight_layout(rect=[0, 0.03, 1, 0.95])  # Adjust layout to include title
    plt.show()

## TCGN Model Results (No GAT)

In [None]:
# TCGN Model Results (No Attention Mechanism)
mse, rmse, mae, mre = get_regression_error(tgcn_no_gat_model, train_dataloader)
print(f"TCGN w/o GAT Train MSE: {mse:.4f}, RMSE: {rmse:.4f}, MAE: {mae:.4f}, MRE: {mre:.4f}")

In [None]:
plot_regression_all(tgcn_no_gat_model, next(iter(train_dataloader)), all_data, "TGCN Train Plots")

In [None]:
# Test Results
A3TGCN, rmse, mae, mre = get_regression_error(tgcn_no_gat_model, test_dataloader)
print(f"Test MSE: {mse:.4f}, RMSE: {rmse:.4f}, MAE: {mae:.4f}, MRE: {mre:.4f}")

In [None]:
plot_regression_all(tgcn_no_gat_model, next(iter(test_dataloader)), all_data, "TGCN Test Plots")

## A3TCGN Model Results (Without GAT)

In [None]:
# Train Results (skip as it takes a minute)
mse, rmse, mae, mre = get_regression_error(a3tgcn_no_gat_model, train_dataloader)
print(f"Train MSE: {mse:.4f}, RMSE: {rmse:.4f}, MAE: {mae:.4f}, MRE: {mre:.4f}")

In [None]:
plot_regression_all(a3tgcn_no_gat_model, next(iter(train_dataloader)), all_data, "A3TGCN Train Plots")

In [None]:
# Test Results
mse, rmse, mae, mre = get_regression_error(a3tgcn_no_gat_model, test_dataloader)
print(f"Test MSE: {mse:.4f}, RMSE: {rmse:.4f}, MAE: {mae:.4f}, MRE: {mre:.4f}")

In [None]:
plot_regression_all(a3tgcn_no_gat_model, next(iter(test_dataloader)), all_data, "A3TGCN Test Plots")

## DCGNN Results (No GAT)

In [None]:
# Train Results (skip as it takes a minute)
mse, rmse, mae, mre = get_regression_error(dcgnn_no_gat_model, train_dataloader)
print(f"Train MSE: {mse:.4f}, RMSE: {rmse:.4f}, MAE: {mae:.4f}, MRE: {mre:.4f}")

In [None]:
plot_regression_all(dcgnn_no_gat_model, next(iter(train_dataloader)), all_data, "DCGNN Train Plots")

In [None]:
# Test Results
mse, rmse, mae, mre = get_regression_error(dcgnn_no_gat_model, test_dataloader)
print(f"Test MSE: {mse:.4f}, RMSE: {rmse:.4f}, MAE: {mae:.4f}, MRE: {mre:.4f}")

In [None]:
plot_regression_all(dcgnn_no_gat_model, next(iter(test_dataloader)), all_data, "DCGNN Test Plots")