In [1]:
import torch
import torch.nn.functional as F
from torch_geometric.nn import GCNConv
from torch_geometric.data import Data
import os

In [2]:
class GCN(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels):
        super(GCN, self).__init__()
        self.conv1 = GCNConv(in_channels, hidden_channels)
        self.conv2 = GCNConv(hidden_channels, out_channels)
    
    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = self.conv2(x, edge_index)
        return x

In [3]:
hidden_dim = 20
output_dim = 2  
num_epochs = 400
learning_rate = 0.01

graph_dir = "processed_graphs"
embeddings_dir = "graph_embeddings"
os.makedirs(embeddings_dir, exist_ok=True)

In [4]:
for year in range(2000, 2024):
    print(f"Training GCN for year {year}...")
    graph = torch.load(f"{graph_dir}/graph_{year}.pt")
    
    model = GCN(in_channels=graph.x.shape[1], hidden_channels=hidden_dim, out_channels=output_dim)
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    
    for epoch in range(num_epochs):
        model.train()
        optimizer.zero_grad()
        output = model(graph.x, graph.edge_index)
        loss = F.mse_loss(output, graph.x)  
        loss.backward()
        optimizer.step()
        
        if epoch % 20 == 0:
            print(f"Year {year}, Epoch {epoch}: Loss = {loss.item():.4f}")
    
    
    torch.save(output, f"{embeddings_dir}/embeddings_{year}.pt")
    print(f"Embeddings saved for {year}!")

print("GCN Training Completed!")


Training GCN for year 2000...
Year 2000, Epoch 0: Loss = 748.1550
Year 2000, Epoch 20: Loss = 53.1440
Year 2000, Epoch 40: Loss = 12.7925
Year 2000, Epoch 60: Loss = 4.1764
Year 2000, Epoch 80: Loss = 3.1328
Year 2000, Epoch 100: Loss = 3.0263
Year 2000, Epoch 120: Loss = 3.0036
Year 2000, Epoch 140: Loss = 2.9919
Year 2000, Epoch 160: Loss = 2.9835
Year 2000, Epoch 180: Loss = 2.9775
Year 2000, Epoch 200: Loss = 2.9744
Year 2000, Epoch 220: Loss = 2.9726
Year 2000, Epoch 240: Loss = 2.9714
Year 2000, Epoch 260: Loss = 2.9705
Year 2000, Epoch 280: Loss = 2.9698
Year 2000, Epoch 300: Loss = 2.9693
Year 2000, Epoch 320: Loss = 2.9689
Year 2000, Epoch 340: Loss = 2.9684
Year 2000, Epoch 360: Loss = 2.9681
Year 2000, Epoch 380: Loss = 2.9677
Embeddings saved for 2000!
Training GCN for year 2001...
Year 2001, Epoch 0: Loss = 300.9543
Year 2001, Epoch 20: Loss = 6.1071
Year 2001, Epoch 40: Loss = 5.4119
Year 2001, Epoch 60: Loss = 3.5629
Year 2001, Epoch 80: Loss = 3.4211
Year 2001, Epoch 10

In [5]:
import torch
import torch.nn as nn
import torch.optim as optim
import os

class LSTMModel(nn.Module):
    def __init__(self, input_dim, hidden_dim=16, num_layers=2):
        super(LSTMModel, self).__init__()
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_dim, input_dim)  
    
    def forward(self, x):
        lstm_out, _ = self.lstm(x)
        output = self.fc(lstm_out[:, -1, :])  
        return output


graph_dir = "graph_embeddings"
years = list(range(2000, 2024))
embeddings = {}
for year in years:
    embeddings[year] = torch.load(os.path.join(graph_dir, f"embeddings_{year}.pt"))


window_size = 10  
train_x, train_y = [], []
for i in range(len(years) - window_size):
    x_seq = [embeddings[years[i + j]] for j in range(window_size)]  # Past 10 years
    y_seq = embeddings[years[i + window_size]]  # Next year
    train_x.append(torch.stack(x_seq))
    train_y.append(y_seq)

train_x = torch.stack(train_x)  
train_y = torch.stack(train_y)  


num_samples, window_size, num_nodes, features = train_x.shape
train_x = train_x.reshape(num_samples, window_size, num_nodes * features)  
input_dim = train_x.shape[-1]  
lstm_model = LSTMModel(input_dim)
optimizer = optim.Adam(lstm_model.parameters(), lr=0.01)
criterion = nn.MSELoss()


epochs = 500
for epoch in range(epochs):
    lstm_model.train()
    optimizer.zero_grad()
    predictions = lstm_model(train_x)  # Shape: (num_samples, num_nodes * features)
    
    # Reshape predictions to match train_y
    predictions = predictions.reshape(num_samples, num_nodes, features)  # Shape: (num_samples, num_nodes, features)
    
    loss = criterion(predictions, train_y)
    loss.backward()
    optimizer.step()
    
    if epoch % 50 == 0:
        print(f"Epoch {epoch}: Loss = {loss.item():.4f}")

# Save trained LSTM model
torch.save(lstm_model.state_dict(), "lstm_model.pth")
print("LSTM training completed and model saved!")

# Predict next year's embeddings (2024)
past_10_years = [embeddings[year] for year in range(2014, 2024)]
input_seq = torch.stack(past_10_years).unsqueeze(0)  # Shape: (1, 10, num_nodes, features)
input_seq = input_seq.reshape(1, window_size, num_nodes * features)  # Reshape for LSTM

lstm_model.eval()
predicted_2024 = lstm_model(input_seq).squeeze(0)  # Shape: (num_nodes * features,)
predicted_2024 = predicted_2024.reshape(num_nodes, features)  # Reshape to (num_nodes, features)

# Save predicted embeddings for 2024
torch.save(predicted_2024, "graph_embeddings/embeddings_2024.pt")
print("Predictions for 2024 saved!")

Epoch 0: Loss = 484.9755
Epoch 50: Loss = 245.8815
Epoch 100: Loss = 107.8632
Epoch 150: Loss = 42.9638
Epoch 200: Loss = 15.3817
Epoch 250: Loss = 5.1223
Epoch 300: Loss = 1.8128
Epoch 350: Loss = 0.8854
Epoch 400: Loss = 0.6576
Epoch 450: Loss = 0.6079
LSTM training completed and model saved!
Predictions for 2024 saved!
