In [69]:
import torch
from torch_geometric.data import Data
import networkx as nx
import pandas as pd

In [70]:
import sys
sys.path.append('../TorchDataPreprocessing/')
from preprocessing_data import preprocess_data, create_data_object

In [71]:
graph_path = '../Graphs/pearson_correlation_threshold_graph.graphml'
data_path = '../Datasets/yfinance_weekly_data.csv'

threshold_graph = nx.read_graphml(graph_path)
df = pd.read_csv(data_path)
df.set_index('Date', inplace=True)

In [72]:
df

Unnamed: 0_level_0,Adj Close,symbol
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2013-01-01,67.343971,MMM
2013-01-08,68.465317,MMM
2013-01-15,69.636009,MMM
2013-01-22,70.983040,MMM
2013-01-29,71.067665,MMM
...,...,...
2022-11-28,155.644012,ZTS
2022-12-05,151.659470,ZTS
2022-12-12,142.830231,ZTS
2022-12-19,144.115540,ZTS


In [73]:
graph = preprocess_data(df, threshold_graph)
node_to_index = {label: index for index, label in enumerate(graph.nodes)}
final_data = create_data_object(graph, node_to_index)

In [74]:
graph

<networkx.classes.graph.Graph at 0x196a2f94c90>

In [75]:
final_data

Data(x=[442, 496], edge_index=[2, 31665], y=[442, 496], node_sectors=[442], edge_weight=[31665])

In [76]:
final_data.lstm_input = final_data.x

In [77]:
final_data.lstm_input

tensor([[0.0046, 0.0047, 0.0048,  ..., 0.0190, 0.0203, 0.0201],
        [0.0023, 0.0023, 0.0022,  ..., 0.0021, 0.0022, 0.0021],
        [0.0113, 0.0112, 0.0114,  ..., 0.0272, 0.0301, 0.0284],
        ...,
        [0.0104, 0.0107, 0.0111,  ..., 0.0173, 0.0183, 0.0181],
        [0.0069, 0.0069, 0.0070,  ..., 0.0494, 0.0531, 0.0513],
        [0.0030, 0.0029, 0.0030,  ..., 0.0083, 0.0084, 0.0082]])

In [78]:
final_data.node_sectors

tensor([ 5.,  6.,  1.,  7.,  5.,  5.,  7.,  7.,  7.,  2.,  7.,  7., 10., 10.,
        10.,  4.,  4.,  4.,  4.,  7.,  8.,  5.,  6.,  4.,  7.,  8.,  7.,  6.,
         5.,  4.,  9.,  1.,  7.,  4.,  6.,  3.,  8.,  7.,  1.,  9., 10.,  9.,
         7.,  8., 10.,  4.,  1.,  6.,  4.,  5.,  1.,  1.,  5.,  4.,  5.,  5.,
         4.,  1.,  3.,  4.,  5.,  7.,  4.,  5.,  1.,  9.,  4.,  2.,  5.,  6.,
         4.,  4.,  9.,  9.,  1.,  7.,  8.,  8.,  2.,  6.,  0.,  5.,  4.,  2.,
         2.,  4.,  0.,  4.,  1.,  6., 10.,  5., 10.,  4.,  5.,  3.,  2.,  2.,
         6.,  5.,  7.,  7.,  6.,  6.,  3.,  7.,  5.,  3., 10.,  6.,  8.,  6.,
         4.,  1.,  5.,  1.,  5.,  0.,  0.,  9.,  1.,  6.,  1.,  1., 10., 10.,
         5.,  3.,  7.,  5.,  0.,  1.,  8., 10.,  6., 10.,  2.,  8.,  6.,  7.,
         3.,  9.,  9., 10.,  9.,  6., 10., 10.,  5., 10.,  6.,  1.,  9.,  1.,
         3.,  6.,  8.,  6., 10.,  7.,  7.,  4.,  7.,  8.,  9.,  7.,  6.,  6.,
         5.,  2.,  4.,  7.,  1.,  6.,  0.,  0.,  1.,  7.,  1.,  

In [79]:
data_x = torch.cat([final_data.node_sectors.view(-1, 1), final_data.lstm_input], dim=1)


In [80]:
num_nodes = 442

data_x = data_x.view(num_nodes, -1, 1)

In [81]:
data_x

tensor([[[5.0000e+00],
         [4.6316e-03],
         [4.7423e-03],
         ...,
         [1.9009e-02],
         [2.0343e-02],
         [2.0112e-02]],

        [[6.0000e+00],
         [2.2804e-03],
         [2.2528e-03],
         ...,
         [2.0999e-03],
         [2.2064e-03],
         [2.1325e-03]],

        [[1.0000e+00],
         [1.1325e-02],
         [1.1201e-02],
         ...,
         [2.7222e-02],
         [3.0063e-02],
         [2.8420e-02]],

        ...,

        [[5.0000e+00],
         [1.0429e-02],
         [1.0713e-02],
         ...,
         [1.7268e-02],
         [1.8261e-02],
         [1.8092e-02]],

        [[7.0000e+00],
         [6.9046e-03],
         [6.9183e-03],
         ...,
         [4.9445e-02],
         [5.3118e-02],
         [5.1315e-02]],

        [[4.0000e+00],
         [2.9741e-03],
         [2.8806e-03],
         ...,
         [8.3004e-03],
         [8.3568e-03],
         [8.1586e-03]]])

In [82]:
final_data.x = data_x

In [83]:
final_data

Data(x=[442, 497, 1], edge_index=[2, 31665], y=[442, 496], node_sectors=[442], edge_weight=[31665], lstm_input=[442, 496])

In [84]:
from torch_geometric.data import DataLoader

loader = DataLoader([final_data], batch_size=4, shuffle=True)



In [85]:
from torch_geometric.nn import GCNConv
import torch.nn as nn

class TemporalGCN(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, num_nodes, lstm_hidden_dim):
        super(TemporalGCN, self).__init__()

        # Spatial Graph Convolution Layer
        self.gcn = GCNConv(input_dim, hidden_dim)

        # Temporal LSTM Layer
        self.lstm = nn.LSTM(input_size=hidden_dim, hidden_size=lstm_hidden_dim, batch_first=True)

        # Output Layer
        self.fc = nn.Linear(lstm_hidden_dim, output_dim)

        # Number of nodes in the graph
        self.num_nodes = num_nodes

    def forward(self, x, edge_index, edge_weight, lstm_input):
        # Spatial Graph Convolution
        x = self.gcn(x, edge_index, edge_weight)
        
        # Reshape the output to (num_nodes, sequence_length, hidden_dim)
        x = x.view(self.num_nodes, -1, x.size(1))

        # Temporal LSTM Layer
        lstm_output, _ = self.lstm(x)

        # Take the output from the last time step
        lstm_output_last = lstm_output[:, -1, :]

        # Fully Connected Layer
        output = self.fc(lstm_output_last)

        return output


In [90]:
learning_rate = 0.001

input_dim = 497
hidden_dim = 64
output_dim = 1 
num_nodes = 442 
lstm_hidden_dim = 32

model = TemporalGCN(input_dim, hidden_dim, output_dim, num_nodes, lstm_hidden_dim)

criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [91]:
# Example (replace with your actual training loop)
def train(model, loader, optimizer, criterion):
    model.train()
    total_loss = 0

    for data in loader:
        optimizer.zero_grad()
        x, edge_index, edge_weight, lstm_input, target = data.x, data.edge_index, data.edge_weight, data.lstm_input, data.y
        output = model(x, edge_index, edge_weight, lstm_input)
        loss = criterion(output, target.float())
        loss.backward()
        optimizer.step()
        total_loss += loss.item()

    average_loss = total_loss / len(loader.dataset)
    return average_loss

In [92]:
def test(model, loader, criterion):
    model.eval()
    total_loss = 0

    with torch.no_grad():
        for data in loader:
            x, edge_index, edge_weight, lstm_input, target = data.x, data.edge_index, data.edge_weight, data.lstm_input, data.y
            output = model(x, edge_index, edge_weight, lstm_input)
            loss = criterion(output, target.float())
            total_loss += loss.item()

    average_loss = total_loss / len(loader.dataset)
    return average_loss

In [93]:
num_epochs = 100

for epoch in range(num_epochs):
    train_loss = train(model, loader, optimizer, criterion)
    test_loss = test(model, loader, criterion)

    print(f'Epoch {epoch + 1}/{num_epochs}, Train Loss: {train_loss:.4f}, Test Loss: {test_loss:.4f}')

RuntimeError: mat1 and mat2 shapes cannot be multiplied (219674x1 and 497x64)

In [None]:
split_date_index = 400

historical_data_train = final_data.lstm_input[:, :split_date_index]
historical_data_test = final_data.lstm_input[:, split_date_index:]

node_sectors_train = final_data.node_sectors
node_sectors_test = final_data.node_sectors

y_train = final_data.y[:, :split_date_index]
y_test = final_data.y[:, split_date_index:]

# Combine historical data and node sectors into data.x
data_x_train = torch.cat([node_sectors_train.view(-1, 1), historical_data_train], dim=1)
data_x_test = torch.cat([node_sectors_test.view(-1, 1), historical_data_test], dim=1)

# Reshape data_x to have the desired shape (num_nodes, num_features, 1)
data_x_train = data_x_train.view(num_nodes, -1, 1)
data_x_test = data_x_test.view(num_nodes, -1, 1)

In [None]:
data_train = Data(
    x=data_x_train,
    node_sectors=node_sectors_train,
    edge_index=final_data.edge_index,
    edge_weight=final_data.edge_weight,
    lstm_input=historical_data_train,
    y=y_train
)

data_test = Data(
    x=data_x_test,
    node_sectors=node_sectors_test,
    edge_index=final_data.edge_index,
    edge_weight=final_data.edge_weight,
    lstm_input=historical_data_test,
    y=y_test
)

In [None]:
final_data

Data(x=[442, 497, 1], edge_index=[2, 31665], y=[442, 496], node_sectors=[442], edge_weight=[31665], lstm_input=[442, 496])

In [None]:
data_train

Data(x=[442, 401, 1], edge_index=[2, 31665], y=[442, 400], node_sectors=[442], edge_weight=[31665], lstm_input=[442, 400])

In [None]:
data_test

Data(x=[442, 97, 1], edge_index=[2, 31665], y=[442, 96], node_sectors=[442], edge_weight=[31665], lstm_input=[442, 96])

In [None]:
train_loader = DataLoader([data_train], batch_size=1, shuffle=True)
test_loader = DataLoader([data_test], batch_size=1, shuffle=False)



In [None]:
learning_rate = 0.001

input_dim = final_data.x.shape[1]
hidden_dim = 64
output_dim = 1 
num_nodes = 442 
lstm_hidden_dim = 32

model = TemporalGCN(input_dim, hidden_dim, output_dim, num_nodes, lstm_hidden_dim)

criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [None]:
num_epochs = 100

for epoch in range(num_epochs):
    train_loss = train(model, train_loader, optimizer, criterion)
    test_loss = test(model, test_loader, criterion)

    print(f'Epoch {epoch + 1}/{num_epochs}, Train Loss: {train_loss:.4f}, Test Loss: {test_loss:.4f}')

Original size of x: torch.Size([442, 401, 1])


NameError: name 'self' is not defined