This notebook converts tabular data to graph data for pytorch geometric to ingest.

The final graph is a static homogeneous directed graph with temporal signals.


In [1]:
import pandas as pd
import numpy as np

In [2]:
flow_df = pd.read_csv("data/flow/_combined.csv")
dap_df = pd.read_csv("data/dap/_combined.csv")

intersect_datetime = np.intersect1d(flow_df["datetime"], dap_df["datetime"])
flow_df = flow_df[flow_df["datetime"].isin(intersect_datetime)]
dap_df = dap_df[dap_df["datetime"].isin(intersect_datetime)]
dap_df = dap_df.reindex(sorted(dap_df.columns), axis=1)

In [3]:
# Static edges of shape (2, n_edges)
interconnectors = flow_df.columns[1:]
exporters = []
importers = []
for ic in interconnectors:
    exporter, importer = ic.split("->")
    exporters.append(exporter)
    importers.append(importer)

exporters = np.array(exporters)
importers = np.array(importers)

edges = np.vstack([exporters, importers])
print(edges.shape)
print(edges[:, :10])

(2, 58)
[['BE' 'BE' 'BE' 'BE' 'BE' 'DE' 'DE' 'DE' 'DE' 'DE']
 ['DE' 'FR' 'LU' 'NL' 'UK' 'BE' 'DK' 'AT' 'CH' 'CZ']]


In [4]:
# Map edge names to indices
edge_names = np.unique(edges)
edge_map = {edge: i for i, edge in enumerate(edge_names)}
edge_indices = np.array([edge_map[edge] for edge in edges.flatten()]).reshape(
    edges.shape
)
print(edge_indices.shape)
print(edge_indices[:, :10])
print(edge_map)

(2, 58)
[[ 1  1  1  1  1  4  4  4  4  4]
 [ 4  8 11 12 16  1  5  0  2  3]]
{'AT': 0, 'BE': 1, 'CH': 2, 'CZ': 3, 'DE': 4, 'DK': 5, 'ES': 6, 'FI': 7, 'FR': 8, 'IE': 9, 'IT': 10, 'LU': 11, 'NL': 12, 'NO': 13, 'PL': 14, 'SE': 15, 'UK': 16}


In [5]:
# Edge features
edge_features = np.array(flow_df[interconnectors])
edge_features = np.reshape(edge_features, (edge_features.shape[0], edge_features.shape[1], 1))
print(edge_features.shape)

(5136, 58, 1)


In [6]:
# Node features
node_features = dap_df.drop(columns=["datetime"])
node_names = node_features.index
node_features = node_features.values
node_features = node_features
node_features = np.reshape(node_features, (node_features.shape[0], node_features.shape[1], 1))
print(node_features.shape)

(5136, 17, 1)


In [7]:
# Labels are sliding window of flow data
n_timesteps = 24
labels = edge_features[n_timesteps:]
print(labels.shape)

(5112, 58, 1)


In [8]:
# Remove last n_timesteps from edge_features and node_features
edge_features = edge_features[:-n_timesteps]
node_features = node_features[:-n_timesteps]
print(edge_features.shape)
print(node_features.shape)

(5112, 58, 1)
(5112, 17, 1)


In [9]:
# Finally repeat the edge indices to match the number of timesteps (because edges are static)
edge_indices = np.repeat(edge_indices[np.newaxis, :, :], edge_features.shape[0], axis=0)
print(edge_indices.shape)

(5112, 2, 58)


In [10]:
assert edge_indices.shape[0] == edge_features.shape[0] == node_features.shape[0] == labels.shape[0]
# Print a snapshot of the shape of the graph data
i = 256
print("Edge indices:", edge_indices[i].shape)
print("Edge features:", edge_features[i].shape)
print("Node features:", node_features[i].shape)
print("Labels:", labels[i].shape)

Edge indices: (2, 58)
Edge features: (58, 1)
Node features: (17, 1)
Labels: (58, 1)


In [11]:
from torch_geometric_temporal.signal import DynamicGraphTemporalSignal

dataset = DynamicGraphTemporalSignal(edge_indices, edge_features, node_features, labels)
print(dataset)

<torch_geometric_temporal.signal.dynamic_graph_temporal_signal.DynamicGraphTemporalSignal object at 0x0000021074C19150>
