In [None]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from torch_geometric.nn import GCNConv
from torch.utils.data import DataLoader, TensorDataset
from torch.optim import Adam
import torch
import os
from torch.utils.tensorboard import SummaryWriter
from torch.nn.functional import l1_loss
import torch.nn.functional as F

In [None]:
device = torch.device('cuda:1')
inputdir = '../data/processed/'
resultdir = '../data/result/'
batchsize = 2048
feature_names = [
    'hour_sin', 'hour_cos', 
    'day_of_week_sin', 'day_of_week_cos', 
    'month_sin', 'month_cos',
    'etat_barre_0', 'etat_barre_1', 'etat_barre_2', 'etat_barre_3'
]

if torch.cuda.is_available():
    print("Available CUDA devices:", torch.cuda.device_count())
    for i in range(torch.cuda.device_count()):
        print(f"Device {i}: {torch.cuda.get_device_name(i)}")
else:
    print("CUDA is not available.")
if not os.path.exists(resultdir):
    os.makedirs(resultdir)

In [None]:
train_data = pd.read_csv('train_dataset.csv')
test_data_x = pd.read_csv('test_dataset_x.csv')
eval_data = pd.read_csv('eval_dataset.csv')

In [None]:
def load_time_series_data(file_path, feature_names):
    data = pd.read_csv(file_path)
    features = data[feature_names].values
    # 假设每个样本都是按时间步骤连续的，此处可能需要根据实际情况调整
    features = features.reshape(-1, num_nodes, num_timesteps, len(feature_names))
    return features

train_features = load_time_series_data(f"{inputdir}train_dataset.csv", feature_names)
eval_features = load_time_series_data(f"{inputdir}eval_dataset.csv", feature_names)
test_features = load_time_series_data(f"{inputdir}test_dataset_x.csv", feature_names)


In [None]:
# npz Data Load example:
def load_graph_data(filename=f"{inputdir}graph_data.npz"):
    # 加载 npz 文件
    data = np.load(filename)
    
    # 读取邻接矩阵
    adjacency_matrix = data['adjacency_matrix']
    
    # 重构字典
    keys = data['keys']
    values = data['values']
    index_to_iu_ac = {key: value for key, value in zip(keys, values)}
    
    return adjacency_matrix, index_to_iu_ac

adjacency_matrix, index_to_iu_ac = load_graph_data(filename=f"{inputdir}graph_data.npz")

# 打印结果，验证加载
print("Loaded Adjacency Matrix:\n", adjacency_matrix)
print("Loaded Node Mapping:", index_to_iu_ac)

In [None]:
# 归一化邻接矩阵
D = np.diag(np.sum(adjacency_matrix, axis=0))
D_inv = np.linalg.inv(D)
normalized_adjacency = np.dot(D_inv, adjacency_matrix)

In [None]:
train_features = torch.tensor(train_features, dtype=torch.float32)
train_labels = torch.tensor(train_data['target'].values, dtype=torch.float32).unsqueeze(-1)

train_dataset = TensorDataset(train_features, train_labels)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

In [None]:
class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size, dropout=0.5):
        super(LSTMModel, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers  
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, dropout=dropout if num_layers > 1 else 0)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        out, _ = self.lstm(x, (h0, c0))
        out = self.fc(out[:, -1, :])  # 取最后一个时间步
        return out


In [None]:
class GraphConvolution(nn.Module):
    def __init__(self, in_features, out_features):
        super(GraphConvolution, self).__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.weight = nn.Parameter(torch.FloatTensor(in_features, out_features))
        self.bias = nn.Parameter(torch.FloatTensor(out_features))
        self.reset_parameters()

    def reset_parameters(self):
        stdv = 1. / np.sqrt(self.weight.size(1))
        self.weight.data.uniform_(-stdv, stdv)
        self.bias.data.uniform_(-stdv, stdv)

    def forward(self, input, adj):
        support = torch.matmul(input, self.weight)
        output = torch.matmul(adj, support)
        return output + self.bias

class STGNN(nn.Module):
    def __init__(self, num_features, num_nodes, hidden_size, num_layers, dropout=0.5):
        super(STGNN, self).__init__()
        self.gc1 = GraphConvolution(num_features, hidden_size)
        self.lstm = LSTMModel(hidden_size * num_nodes, hidden_size, num_layers, num_nodes, dropout)
        self.fc = nn.Linear(hidden_size, 1)  # Assuming the target is 1-dimensional

    def forward(self, x, adj):
        x = F.relu(self.gc1(x, adj))
        x, _ = self.lstm(x)
        x = self.fc(x[:, -1, :])  # Take the last time step
        return x


In [None]:
def load_data(file_path, feature_names):
    data = pd.read_csv(file_path)
    features = data[feature_names].values
    targets = data['target'].values
    return features, targets

def create_datasets(input_dir, feature_names):
    train_features, train_targets = load_data(f"{input_dir}train_dataset.csv", feature_names)
    eval_features, eval_targets = load_data(f"{input_dir}eval_dataset.csv", feature_names)
    test_features, _ = load_data(f"{input_dir}test_dataset_x.csv", feature_names)

    # Convert to PyTorch tensors
    train_features, train_targets = torch.tensor(train_features, dtype=torch.float32), torch.tensor(train_targets, dtype=torch.float32).unsqueeze(1)
    eval_features, eval_targets = torch.tensor(eval_features, dtype=torch.float32), torch.tensor(eval_targets, dtype=torch.float32).unsqueeze(1)
    test_features = torch.tensor(test_features, dtype=torch.float32)

    return train_features, train_targets, eval_features, eval_targets, test_features

train_features, train_targets, eval_features, eval_targets, test_features = create_datasets(inputdir, feature_names)
adjacency_matrix, index_to_iu_ac = load_graph_data()
adjacency_matrix = torch.tensor(adjacency_matrix, dtype=torch.float32)


In [None]:
model = STGNN(len(feature_names), train_features.size(1), 64, 2).to(device)
adjacency_matrix = adjacency_matrix.to(device)
train_features, train_targets = train_features.to(device), train_targets.to(device)

optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
criterion = nn.MSELoss()

# Example training loop
for epoch in range(100):
    model.train()
    optimizer.zero_grad()
    output = model(train_features, adjacency_matrix)
    loss = criterion(output, train_targets)
    loss.backward()
    optimizer.step()
    print(f'Epoch {epoch}, Loss: {loss.item()}')


In [None]:
model.eval()
with torch.no_grad():
    test_output = model(test_features.to(device), adjacency_matrix)
    print(test_output.cpu().numpy())