In [1]:
import torch
import torch.nn.functional as F
from torch_geometric.nn import GCNConv
from torch_geometric.data import Data
import os

In [2]:
class GCN(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels):
        super(GCN, self).__init__()
        self.conv1 = GCNConv(in_channels, hidden_channels)
        self.conv2 = GCNConv(hidden_channels, out_channels)
    
    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = self.conv2(x, edge_index)
        return x

In [3]:
hidden_dim = 16
output_dim = 2  
num_epochs = 100
learning_rate = 0.01

graph_dir = "processed_graphs"
embeddings_dir = "graph_embeddings"
os.makedirs(embeddings_dir, exist_ok=True)

In [4]:
for year in range(2000, 2024):
    print(f"Training GCN for year {year}...")
    graph = torch.load(f"{graph_dir}/graph_{year}.pt")
    
    model = GCN(in_channels=graph.x.shape[1], hidden_channels=hidden_dim, out_channels=output_dim)
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    
    for epoch in range(num_epochs):
        model.train()
        optimizer.zero_grad()
        output = model(graph.x, graph.edge_index)
        loss = F.mse_loss(output, graph.x)  
        loss.backward()
        optimizer.step()
        
        if epoch % 10 == 0:
            print(f"Year {year}, Epoch {epoch}: Loss = {loss.item():.4f}")
    
    
    torch.save(output, f"{embeddings_dir}/embeddings_{year}.pt")
    print(f"Embeddings saved for {year}!")

print("GCN Training Completed!")


Training GCN for year 2000...
Year 2000, Epoch 0: Loss = 515.8576
Year 2000, Epoch 10: Loss = 124.3597
Year 2000, Epoch 20: Loss = 7.7808
Year 2000, Epoch 30: Loss = 13.0984
Year 2000, Epoch 40: Loss = 4.9510
Year 2000, Epoch 50: Loss = 4.5445
Year 2000, Epoch 60: Loss = 3.2599
Year 2000, Epoch 70: Loss = 3.3720
Year 2000, Epoch 80: Loss = 3.1253
Year 2000, Epoch 90: Loss = 3.0841
Embeddings saved for 2000!
Training GCN for year 2001...
Year 2001, Epoch 0: Loss = 219.1116
Year 2001, Epoch 10: Loss = 19.7605
Year 2001, Epoch 20: Loss = 16.3677
Year 2001, Epoch 30: Loss = 5.9628
Year 2001, Epoch 40: Loss = 5.5465
Year 2001, Epoch 50: Loss = 4.0447
Year 2001, Epoch 60: Loss = 3.8068
Year 2001, Epoch 70: Loss = 3.5153
Year 2001, Epoch 80: Loss = 3.3617
Year 2001, Epoch 90: Loss = 3.3574
Embeddings saved for 2001!
Training GCN for year 2002...
Year 2002, Epoch 0: Loss = 615.5639
Year 2002, Epoch 10: Loss = 424.7521
Year 2002, Epoch 20: Loss = 273.7277
Year 2002, Epoch 30: Loss = 78.7131
Yea

In [10]:
import torch
import torch.nn as nn
import torch.optim as optim
import os

# Define LSTM Model
class LSTMModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, num_layers=2):
        super(LSTMModel, self).__init__()
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)
    
    def forward(self, x):
        lstm_out, _ = self.lstm(x)
        output = self.fc(lstm_out[:, -1, :])  # Use only the final timestep
        return output

# Hyperparameters
hidden_dim = 16
output_dim = 2
num_layers = 2
num_epochs = 300
learning_rate = 0.01

# Load embeddings
embeddings_dir = "graph_embeddings"
years = list(range(2000, 2024))
num_past_years = 5

# Prepare dataset
sequences = []
targets = []

for i in range(len(years) - num_past_years):
    past_years = years[i : i + num_past_years]
    future_year = years[i + num_past_years]
    
    past_embeddings = [torch.load(f"{embeddings_dir}/embeddings_{y}.pt") for y in past_years]
    future_embedding = torch.load(f"{embeddings_dir}/embeddings_{future_year}.pt")
    
    past_embeddings = torch.stack(past_embeddings)  # Shape: (10, num_nodes, 8)
    future_embedding = future_embedding  # Shape: (num_nodes, 8)
    
    sequences.append(past_embeddings)
    targets.append(future_embedding)

# Convert to tensors
sequences = torch.stack(sequences)  # Shape: (num_samples, 10, num_nodes, 8)
targets = torch.stack(targets)      # Shape: (num_samples, num_nodes, 8)

# Reshape to match LSTM expectations
num_samples, seq_len, num_nodes, emb_dim = sequences.shape
sequences = sequences.view(num_samples, seq_len, num_nodes * emb_dim)
targets = targets.view(num_samples, num_nodes * emb_dim)

# Corrected Input Dimension
input_dim = sequences.shape[-1]  # Dynamically set input size

# Initialize model, loss, and optimizer
model = LSTMModel(input_dim=input_dim, hidden_dim=hidden_dim, output_dim=targets.shape[-1], num_layers=num_layers)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training Loop
for epoch in range(num_epochs):
    model.train()
    optimizer.zero_grad()
    
    output = model(sequences)
    loss = criterion(output, targets)
    
    loss.backward()
    optimizer.step()
    
    if epoch % 10 == 0:
        print(f"Epoch {epoch}: Loss = {loss.item():.4f}")

# Save Prediction for 2024
predicted_embedding_2024 = output.view(num_samples, num_nodes, emb_dim)  # Reshape back
torch.save(predicted_embedding_2024, f"{embeddings_dir}/predicted_embeddings_2024.pt")

print("LSTM Training Completed & Predictions Saved!")



Epoch 0: Loss = 483.0508
Epoch 10: Loss = 449.6117
Epoch 20: Loss = 388.9518
Epoch 30: Loss = 334.7071
Epoch 40: Loss = 287.6370
Epoch 50: Loss = 246.4783
Epoch 60: Loss = 210.6407
Epoch 70: Loss = 179.5119
Epoch 80: Loss = 152.5101
Epoch 90: Loss = 129.1257
Epoch 100: Loss = 108.9208
Epoch 110: Loss = 91.5160
Epoch 120: Loss = 76.5779
Epoch 130: Loss = 63.8101
Epoch 140: Loss = 52.9466
Epoch 150: Loss = 43.7482
Epoch 160: Loss = 35.9992
Epoch 170: Loss = 29.5056
Epoch 180: Loss = 24.0936
Epoch 190: Loss = 19.6081
Epoch 200: Loss = 15.9115
Epoch 210: Loss = 12.8824
Epoch 220: Loss = 10.4148
Epoch 230: Loss = 8.4162
Epoch 240: Loss = 6.8069
Epoch 250: Loss = 5.5187
Epoch 260: Loss = 4.4934
Epoch 270: Loss = 3.6822
Epoch 280: Loss = 3.0440
Epoch 290: Loss = 2.5448
LSTM Training Completed & Predictions Saved!
