In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
data_addr = '/content/drive/MyDrive/cs5284pro/dow_1day_price.csv'
adj_addr = '/content/drive/MyDrive/cs5284pro/dow_1day_090_01_corr.csv'
s_index = 0
lr = 1e-3
n_neurons = 128
seq_len = 12
n_epochs = 40
batch_size = 128
n_off = 0
th = 0.2

In [None]:
import pandas as pd
import scipy
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import accuracy_score, r2_score, mean_squared_error, mean_absolute_error
from math import sqrt
from sklearn.preprocessing import MinMaxScaler
import numpy as np
import scipy.sparse as sp
from torch.utils.data import TensorDataset, DataLoader

class GraphTransformerLayer(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_heads, dropout=0.1):
        super(GraphTransformerLayer, self).__init__()
        self.attention = nn.MultiheadAttention(embed_dim=hidden_dim, num_heads=num_heads, dropout=dropout)
        self.ffn = nn.Sequential(
            nn.Linear(hidden_dim, hidden_dim * 2),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(hidden_dim * 2, hidden_dim),
        )
        self.norm1 = nn.LayerNorm(hidden_dim)
        self.norm2 = nn.LayerNorm(hidden_dim)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        """
        x: [batch_size, seq_len, input_dim]
        """
        if x.dim() == 2:
            x = x.unsqueeze(-1)

        # Transpose for MultiheadAttention
        x = x.permute(1, 0, 2)  # [seq_len, batch_size, input_dim]

        # Self-attention
        attn_output, _ = self.attention(x, x, x)
        attn_output = attn_output.permute(1, 0, 2)  # [batch_size, seq_len, hidden_dim]

        # Residual connection and feedforward network
        x = self.norm1(x.permute(1, 0, 2) + attn_output)
        ffn_output = self.ffn(x)
        x = self.norm2(x + self.dropout(ffn_output))

        return x

class TfGRUCell(nn.Module):
    def __init__(self, num_units, adj, num_gcn_nodes, s_index, hidden_dim=1, num_heads=1, dropout=0.1):
        super(TfGRUCell, self).__init__()
        self.units = num_units
        self._gcn_nodes = num_gcn_nodes
        self.s_index = s_index

        # Preprocess adjacency matrix
        adj = self.calculate_laplacian(adj)
        # if isinstance(adj, sp.coo_matrix):
        # adj = adj.toarray()  # Convert to a dense NumPy array
        self.register_buffer("_adj", adj)  # Save as a non-trainable buffer

        # Graph Transformer layer
        self.graph_transformer = GraphTransformerLayer(
            input_dim=self.units,
            hidden_dim=hidden_dim,
            num_heads=num_heads,
            dropout=dropout,
        )

        # GRU weights
        self.wz = nn.Linear(hidden_dim, self.units, bias=True)
        self.wr = nn.Linear(hidden_dim, self.units, bias=True)
        self.wh = nn.Linear(hidden_dim, self.units, bias=True)

        self.uz = nn.Linear(self.units, self.units, bias=False)
        self.ur = nn.Linear(self.units, self.units, bias=False)
        self.uh = nn.Linear(self.units, self.units, bias=False)

    @property
    def state_size(self):
        return self.units

    def calculate_laplacian(self, adj):
        adj = adj + torch.eye(adj.size(0))  # Add self-loops
        degree = torch.sum(adj, dim=1)
        degree[degree==0] = 1
        d_inv_sqrt = torch.diag(torch.pow(degree, -0.5))
        laplacian = d_inv_sqrt @ adj @ d_inv_sqrt
        return laplacian

    def trans(self, inputs):
        # print(inputs.shape)
        if inputs.dim() == 2:
            inputs = inputs.unsqueeze(-1)  # Add feature dimension

        # Use Graph Transformer for feature extraction
        transformed_inputs = self.graph_transformer(inputs)
        return transformed_inputs

    def forward(self, inputs, state):
        x = self.trans(inputs)

        # GRU gates
        z = torch.sigmoid(self.wz(x.mean(dim=1)) + self.uz(state))
        r = torch.sigmoid(self.wr(x.mean(dim=1)) + self.ur(state))
        h = torch.tanh(self.wh(x.mean(dim=1)) + self.uh(r * state))

        # Update state
        output = z * state + (1 - z) * h
        return output



In [None]:
class TfGRUModel(nn.Module):
    def __init__(self, cell, seq_len, num_gcn_nodes):
        super(TfGRUModel, self).__init__()
        self.cell = cell
        self.seq_len = seq_len
        self.num_gcn_nodes = num_gcn_nodes

    def forward(self, x):
        batch_size = x.size(0)
        state = torch.zeros(batch_size, self.cell.units).to(x.device)

        outputs = []
        for t in range(self.seq_len):
            state = self.cell(x[:, t, :], state)
            outputs.append(state)

        return torch.stack(outputs, dim=1)  # (batch_size, seq_len, units)


In [None]:
def normalize_adj(adj):
    """Normalize the adjacency matrix A_hat = D^-0.5 A D^-0.5"""
    adj = sp.coo_matrix(adj)
    rowsum = np.array(adj.sum(1))
    d_inv_sqrt = np.power(rowsum, -0.5).flatten()
    d_inv_sqrt[np.isinf(d_inv_sqrt)] = 0.
    d_mat_inv_sqrt = sp.diags(d_inv_sqrt)
    normalized_adj = adj.dot(d_mat_inv_sqrt).transpose().dot(d_mat_inv_sqrt).tocoo()
    return normalized_adj.astype(np.float32)

def load_dow_price_data(data_addr, adj_addr):
    data = pd.read_csv(data_addr).values
    adj = pd.read_csv(adj_addr, header=None).values
    # data = normalize(data, axis=0)
    scaler = MinMaxScaler()
    data = scaler.fit_transform(data)
    return data, adj

def preprocess_data(data, labels, time_len, train_rate, seq_len, pre_len):
    X, Y, pre_Y = [], [], []
    for i in range(time_len - seq_len - pre_len):
        X.append(data[i:i + seq_len, :])
        Y.append(labels[i + seq_len:i + seq_len + pre_len])
        pre_Y.append(labels[(i + seq_len - 1):(i + seq_len + pre_len - 1)])

    # Split the dataset into training and testing sets
    train_size = int(train_rate * len(X))
    X_train = np.array(X[:train_size])
    Y_train = np.array(Y[:train_size])
    X_test = np.array(X[train_size:])
    Y_test = np.array(Y[train_size:])
    # pre_Y_test = labels[train_size + seq_len:train_size + seq_len + len(X_test)]
    pre_Y_test = np.array(pre_Y[train_size:])

    return X_train, Y_train, X_test, Y_test, pre_Y_test

In [None]:

# Load and preprocess data
data, adj = load_dow_price_data(data_addr, adj_addr)
adj = normalize_adj(adj)
if isinstance(adj, sp.coo_matrix):
    adj = adj.toarray()  # Convert to a dense NumPy array
labels = data[:, s_index]
if n_off > 0:
    data = data[:-n_off]
    labels = labels[n_off:]

train_rate = 0.8
pre_len = 1
time_len = data.shape[0]
n_gcn_nodes = data.shape[1]

X_train, y_train, X_test, y_test, pre_y_test = preprocess_data( data, labels, time_len, train_rate, seq_len, pre_len)

# X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32)
# X_test = torch.tensor(X_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.float32)

X_train = torch.tensor(X_train, dtype=torch.float32).permute(0, 2, 1)  # [batch_size, num_gcn_nodes, seq_len]
X_test = torch.tensor(X_test, dtype=torch.float32).permute(0, 2, 1)


  d_inv_sqrt = np.power(rowsum, -0.5).flatten()


In [None]:
# Initialize model
cell = TfGRUCell(n_neurons, torch.tensor(adj, dtype=torch.float32), n_gcn_nodes, s_index)
model = TfGRUModel(cell, seq_len, n_gcn_nodes)
optimizer = optim.Adam(model.parameters(), lr=lr)
criterion = nn.MSELoss()
# Training
for epoch in range(n_epochs):
    model.train()
    optimizer.zero_grad()
    outputs = model(X_train).squeeze()
    loss = criterion(outputs[:, -1, 0].unsqueeze(1), y_train) # Select the first output node for comparison with y_train
    loss.backward()
    optimizer.step()
    print(f"Epoch {epoch + 1}/{n_epochs}, Loss: {loss.item()}")


Epoch 1/40, Loss: 0.08105330914258957
Epoch 2/40, Loss: 0.043260443955659866
Epoch 3/40, Loss: 0.030466580763459206
Epoch 4/40, Loss: 0.033168692141771317
Epoch 5/40, Loss: 0.03996584191918373
Epoch 6/40, Loss: 0.04384530335664749
Epoch 7/40, Loss: 0.0436837337911129
Epoch 8/40, Loss: 0.040774375200271606
Epoch 9/40, Loss: 0.03679312393069267
Epoch 10/40, Loss: 0.03316093981266022
Epoch 11/40, Loss: 0.030823804438114166
Epoch 12/40, Loss: 0.030137624591588974
Epoch 13/40, Loss: 0.03085392713546753
Epoch 14/40, Loss: 0.032274309545755386
Epoch 15/40, Loss: 0.03357940539717674
Epoch 16/40, Loss: 0.03418903425335884
Epoch 17/40, Loss: 0.033944204449653625
Epoch 18/40, Loss: 0.033054500818252563
Epoch 19/40, Loss: 0.03191396966576576
Epoch 20/40, Loss: 0.030909720808267593
Epoch 21/40, Loss: 0.03029399923980236
Epoch 22/40, Loss: 0.030137933790683746
Epoch 23/40, Loss: 0.030354419723153114
Epoch 24/40, Loss: 0.030763156712055206
Epoch 25/40, Loss: 0.031167425215244293
Epoch 26/40, Loss: 0.

In [None]:
# Evaluation
model.eval()
with torch.no_grad():
    predictions = model(X_test)

    result = predictions[:, -1, 0].unsqueeze(1).numpy()

# Metrics
# print(result.shape)
# print(y_test.shape)
# r2 = r2_score(y_test, result)
rmse = sqrt(mean_squared_error(y_test, result))
mae = mean_absolute_error(y_test, result)
# re = avg_relative_error(y_test, result)

print("***********************")
# print(f"R2: {r2}")
print(f"RMSE: {rmse}")
print(f"MAE: {mae}")
# print(f"Relative Error: {re}")

***********************
RMSE: 0.020449883523973086
MAE: 0.01664881408214569
