###	1.	Install Necessary Libraries:
Ensure that you have the necessary libraries installed. You can install PyTorch-Geometric and its dependencies using pip if you haven’t already:

In [1]:
#!pip install torch torch-geometric

#### 1.a. Handle graph saving and loading 

In [2]:
import pickle

def save_graph(graph, filename):
    with open(filename, 'wb') as f:
        pickle.dump(graph, f)

def load_graph(filename):
    with open(filename, 'rb') as f:
        return pickle.load(f)
# Filepath where the graph will be saved
graph_filename = 'nvg_graph.pkl'    

In [3]:
import networkx as nx
def create_nvg(prices):
    N = len(prices)
    G = nx.Graph()
    
    for i in range(N):
        G.add_node(i, feature=prices[i])
        for j in range(i + 1, N):
            visibility = True
            for k in range(i + 1, j):
                # Linear interpolation between points i and j at point k
                expected_value_at_k = prices[i] + (prices[j] - prices[i]) * (k - i) / (j - i)
                
                # Check if point k obstructs the visibility
                if prices[k] >= expected_value_at_k:
                    visibility = False
                    break
            if visibility:
                G.add_edge(i, j)
                
    return G

In [4]:
import os
import pandas as pd
# Check if the graph file exists
if os.path.exists(graph_filename):
    print("Loading graph from disk...")
    g = load_graph(graph_filename)
else:
    print("Creating and saving graph...")
    df = pd.read_csv("dataset/EURUSD_Daily_200005300000_202405300000.csv", delimiter="\t")
    # Extract the closing prices from the DataFrame
    closing = df["<CLOSE>"]
    g = create_nvg(closing)
    save_graph(g, graph_filename)
    print("Saving graph to disk...")

In [5]:
from torch_geometric.utils import from_networkx
import numpy as np
import torch
# Convert NetworkX graph to PyTorch Geometric format
dataset = from_networkx(g)
# Extract the node features (closing prices)
node_features = np.array([g.nodes[n]['feature'] for n in g.nodes])

# Assign features to the graph
dataset.x = torch.tensor(node_features, dtype=torch.float).view(-1, 1)

In [6]:
print(dataset)

###	2.	Load and Prepare the Dataset:
We’ll use the Cora dataset, which is a standard citation network dataset. The nodes represent documents, and the edges represent citations between them.

In [22]:
from torch_geometric.transforms import RandomLinkSplit
import torch

# Split the edges into training, validation, and test sets
transform = RandomLinkSplit(is_undirected=True, key="edge_label", split_labels=True)

train_data, val_data, test_data = transform(dataset)

###	3.	Define the VGAE Model:
We need to define the VGAE model, which consists of an encoder that maps the input features into a latent space. The encoder is typically implemented using Graph Convolutional Networks (GCNs).

In [8]:
from torch_geometric.nn import VGAE, GCNConv
class GCNEncoder(torch.nn.Module):
    def __init__(self, in_channels, out_channels):
        super(GCNEncoder, self).__init__()
        self.conv1 = GCNConv(in_channels, 2 * out_channels)
        self.conv_mu = GCNConv(2 * out_channels, out_channels)
        self.conv_logstd = GCNConv(2 * out_channels, out_channels)
    
    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index).relu()
        mu = self.conv_mu(x, edge_index)
        logstd = self.conv_logstd(x, edge_index)
        return mu, logstd

In [16]:
# Initialize the VGAE model
out_channels = 16
model = VGAE(GCNEncoder(dataset.num_features, out_channels))

###	4.	Training the Model:
The model is trained by optimizing a loss function that combines a reconstruction loss (which ensures the graph is reconstructed correctly) and a KL divergence loss (which regularizes the latent space).

In [23]:
from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts, ReduceLROnPlateau
# Define optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=1e-5)
scheduler = CosineAnnealingWarmRestarts(optimizer, 5, 2)# Definitely lead to better loss 
#scheduler = ReduceLROnPlateau(optimizer, factor=0.9)

# Training loop
def train():
    model.train()
    optimizer.zero_grad()
    z = model.encode(train_data.x, train_data.edge_index)
    loss = model.recon_loss(z, train_data.edge_index)
    loss = loss + (1 / train_data.num_nodes) * model.kl_loss()
    loss.backward()
    optimizer.step()
    # scheduler.step(metrics=loss)
    scheduler.step()
    return loss.item(), scheduler.get_last_lr()[0]

# Train the model for 5000 epochs
for epoch in range(5000):
    loss, lr = train()
    if epoch % 100 == 0:
        print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}, LR: {lr:.9f}')

###	5.	Evaluate the Model:
After training, we can evaluate the model’s performance on the test set. The evaluation typically involves predicting the probability of edges (links) between nodes and comparing them to the true test edges.

In [19]:
# Test the model
from sklearn.metrics import roc_auc_score, average_precision_score
# Test the model
def test(data):
    model.eval()
    with torch.no_grad():
        z = model.encode(data.x, data.edge_index)
        pos_pred = model.decode(z, data.pos_edge_label_index).cpu().numpy()
        neg_pred = model.decode(z, data.neg_edge_label_index).cpu().numpy()

        y_pred = torch.cat([torch.tensor(pos_pred), torch.tensor(neg_pred)])
        y_true = torch.cat([torch.ones(pos_pred.shape[0]), torch.zeros(neg_pred.shape[0])])

        roc_auc = roc_auc_score(y_true, y_pred)
        ap_score = average_precision_score(y_true, y_pred)

        return roc_auc, ap_score

In [24]:
roc_auc, ap_score = test(train_data)
print(f'Train -> ROC AUC Score: {roc_auc:.4f}, Average Precision Score: {ap_score:.4f}')

In [25]:
roc_auc, ap_score = test(test_data)
print(f'Test -> ROC AUC Score: {roc_auc:.4f}, Average Precision Score: {ap_score:.4f}')