In [93]:
import torch
from IPython.display import clear_output
pt_version = torch.__version__
print(pt_version)

1.12.1+cu113


In [125]:
# Run it for once before
!pip install torch-geometric-temporal
clear_output()

In [126]:
import pandas as pd
import numpy as np
from tqdm import tqdm
import matplotlib.pyplot as plt
import copy

In [127]:
import torch.nn.functional as F
from torch_geometric.data import Dataset, Data
from torch_geometric.utils import dense_to_sparse

## Hyperparameters

In [215]:
isEdgeLength=True
early_stopping=False
toDecay=False
nIter=20

## Loading the dataset

In [216]:
traffic_data=pd.read_csv("d1_X.csv",index_col=0)

In [217]:
traffic_data.head()

Unnamed: 0,288416374,288416379,288416380,288416386,288416399,314622896,314622918,314623160,314635488,314655436,...,8318574243,8318574244,8318574253,8320254523,8321458993,8321458994,8321459001,8321459093,8321552780,8321552782
2016-10-31 16:02:00,0,1,1,0,0,0,0,0,1,0,...,0,0,0,1,0,0,0,0,0,0
2016-10-31 16:07:00,0,1,3,0,0,1,8,3,7,0,...,0,0,0,1,0,0,0,0,0,0
2016-10-31 16:12:00,0,4,5,2,0,0,7,5,2,0,...,0,0,1,1,0,0,0,0,0,0
2016-10-31 16:17:00,6,5,3,7,1,4,12,3,6,0,...,0,0,1,4,0,0,0,0,0,0
2016-10-31 16:22:00,2,5,5,4,0,5,20,9,10,0,...,0,0,2,5,0,0,0,0,0,0


In [218]:
nodes=[str(i) for i in traffic_data]

In [219]:
print("No. of nodes: ",len(nodes))
print("No. of time steps: ",len(traffic_data[nodes[0]]))

No. of nodes:  3193
No. of time steps:  1613


In [220]:
X=[]
Y=[]
for i in range(traffic_data.shape[0]-1):
  X.append(traffic_data.iloc[i,:])
  Y.append(traffic_data.iloc[i+1,:])
X=np.asarray(X)
Y=np.asarray(Y)

In [221]:
print(X.shape)
print(Y.shape)

(1612, 3193)
(1612, 3193)


### Loading the train, test and validation nodes.

In [222]:
def get_train_test_validation_nodes(filename):
  data=np.load(filename)
  train_nodes=data["train_node_ids"].astype("str")
  test_nodes=data["test_node_ids"].astype("str")
  validation_nodes=data["val_node_ids"].astype("str")
  return train_nodes,test_nodes,validation_nodes

In [223]:
train_nodes,test_nodes,validation_nodes=get_train_test_validation_nodes("d1_graph_splits.npz")

In [224]:
train_nodes_mask=torch.zeros(len(nodes),dtype=torch.bool)
test_nodes_mask=torch.zeros(len(nodes),dtype=torch.bool)
validation_nodes_mask=torch.zeros(len(nodes),dtype=torch.bool)

node_mapping={}
inverse_node_mapping={}
for t in enumerate(nodes):
  node_mapping[str(t[1])]=t[0]
  inverse_node_mapping[t[0]]=str(t[1])

for node in train_nodes:
  if node in nodes:
    train_nodes_mask[node_mapping[node]]=True
  else:
    print(f"Node {node} not present as the column of nodes")

for node in test_nodes:
  if node in nodes:
    test_nodes_mask[node_mapping[node]]=True
  else:
    print(f"Node {node} not present as the column of nodes")

for node in validation_nodes:
  if node in nodes:
    validation_nodes_mask[node_mapping[node]]=True
  else:
    print(f"Node {node} not present as the column of nodes")


### Loading the graph dense adjacency matrix and making it sparse

In [225]:
data_adj=pd.read_csv("d1_adj_mx.csv",index_col=0)
data_adj.fillna(0,inplace=True)

In [226]:
print(data_adj.head())

           288416374  288416379  288416380  288416386  288416399  314622896  \
288416374        0.0        0.0        0.0        0.0        0.0        0.0   
288416379        0.0        0.0        0.0        0.0        0.0        0.0   
288416380        0.0        0.0        0.0        0.0        0.0        0.0   
288416386        0.0        0.0        0.0        0.0        0.0        0.0   
288416399        0.0        0.0        0.0        0.0        0.0        0.0   

           314622918  314623160  314635488  314655436  ...  8318574243  \
288416374        0.0        0.0        0.0     173.86  ...         0.0   
288416379        0.0        0.0        0.0       0.00  ...         0.0   
288416380        0.0        0.0        0.0       0.00  ...         0.0   
288416386        0.0        0.0        0.0       0.00  ...         0.0   
288416399        0.0        0.0        0.0       0.00  ...         0.0   

           8318574244  8318574253  8320254523  8321458993  8321458994  \
2884163

In [227]:
edge_index,edge_weight=dense_to_sparse(torch.from_numpy(data_adj.values))
"""
  Uncomment according to the dataset
"""
if not isEdgeLength:
  edge_weight=1/edge_weight

### Model Class

In [228]:
from torch_geometric.nn import SAGEConv, GATv2Conv
class Model(torch.nn.Module):
  def __init__(self):
    super().__init__()
    self.layer1=SAGEConv(1,32,'mean')
    self.layer2=SAGEConv(32,16,'mean')
    # self.layer1 = GATv2Conv(1, 32, edge_dim=1)
    # self.layer2 = GATv2Conv(32, 16, edge_dim=1)
    self.linear=torch.nn.Linear(16,1)
  
  def forward(self,d):
    X,edge_index,edge_weight=d.x,d.edge_index,d.edge_weight

    output1=F.relu(self.layer1(X,edge_index))
    output2=F.relu(self.layer2(output1,edge_index))

    output=self.linear(output2)

    return output

### Creating batches of data

In [229]:
final_data=[]
for i in range(X.shape[0]):
  x=torch.tensor(X[i],dtype=torch.double).reshape(-1,1)
  y=torch.tensor(Y[i],dtype=torch.double).reshape(-1,1)
  d=Data(x=x,y=y,edge_index=edge_index)
  d.train_nodes_mask=train_nodes_mask
  d.test_nodes_mask=test_nodes_mask
  d.validation_nodes_mask=validation_nodes_mask
  d.edge_weight=edge_weight
  final_data.append(d)

In [230]:
from torch_geometric.loader import DataLoader
dataloader=DataLoader(final_data,batch_size=32,shuffle=False)

In [231]:
def calculate_loss(Y_pred,Y,node_mask):
  return torch.mean(torch.abs(Y_pred[node_mask]-Y[node_mask])).item()

### Model Instance

In [232]:
model=Model().double()

### Training the model

In [None]:
last_error=1000000000000.0

decay=1.0
for iter in tqdm(range(nIter)):
  model.train()
  for i,data in enumerate(dataloader):
    Y_pred=model(data)
    error=F.mse_loss(Y_pred[data.train_nodes_mask],data.y[data.train_nodes_mask])
    if i%100==0:
      print("The current MSE is: ", error.item()/len(data))
    optimizer=torch.optim.Adam(model.parameters(), lr=0.001*decay)
    optimizer.zero_grad()
    error.backward()
    optimizer.step()
    
  model.eval()
  error=0
  n=len(dataloader.dataset)
  for data in dataloader.dataset:
    Y_pred=model(data)
    error+=calculate_loss(Y_pred,data.y,validation_nodes_mask)
  print(f"The error after {iter+1} iterations is: ",error/n)
  """
    Early stopping the training
  """
  if early_stopping:
    if abs(last_error-error/n)>=0.05:
      last_error=error/n
    else:
      break
  if toDecay and iter%5==0:
    decay=decay/100.0

  0%|          | 0/20 [00:00<?, ?it/s]

The current MSE is:  0.408698360315909


  5%|▌         | 1/20 [00:19<06:18, 19.92s/it]

The error after 1 iterations is:  2.5103462693511567
The current MSE is:  0.13916033206259168


 10%|█         | 2/20 [00:37<05:35, 18.66s/it]

The error after 2 iterations is:  1.8171315480226158
The current MSE is:  0.10330841918018435


 15%|█▌        | 3/20 [00:54<05:03, 17.86s/it]

The error after 3 iterations is:  1.7537641084927114
The current MSE is:  0.10456512996457856


 20%|██        | 4/20 [01:11<04:39, 17.49s/it]

The error after 4 iterations is:  1.7161641270967882
The current MSE is:  0.10363994023483246


 25%|██▌       | 5/20 [01:28<04:19, 17.32s/it]

The error after 5 iterations is:  1.720963288984428
The current MSE is:  0.10388697655726788


 30%|███       | 6/20 [01:45<04:00, 17.20s/it]

The error after 6 iterations is:  1.7140473351992758
The current MSE is:  0.10332820662032721


 35%|███▌      | 7/20 [02:03<03:46, 17.46s/it]

The error after 7 iterations is:  1.7536356321729571
The current MSE is:  0.10520982312946833


 40%|████      | 8/20 [02:20<03:28, 17.38s/it]

The error after 8 iterations is:  1.7496898727623593
The current MSE is:  0.10517500954837775


 45%|████▌     | 9/20 [02:39<03:16, 17.84s/it]

The error after 9 iterations is:  1.7500108031949413
The current MSE is:  0.10510338363886938


In [None]:
error=0
n=len(dataloader.dataset)
model.eval()
for data in dataloader.dataset:
  Y_pred=model(data)
  error+=calculate_loss(Y_pred,data.y,test_nodes_mask)
print("The testing accuracy is: ",error/n)

In [None]:
def plot_graph(Y,Y_pred):
  plt.plot(np.arange(1,len(Y)),Y[1:])
  plt.plot(np.arange(1,len(Y)),Y_pred[:-1], color = 'orange')
  plt.legend(['actual', 'predicted'])

In [None]:
node_id=test_nodes[10]

In [None]:
Y_pred=np.empty((traffic_data.shape[1],traffic_data.shape[0]))
with torch.no_grad():
  model.eval()
  for i,data in enumerate(final_data):
    Y_pred[:,i:i+1]=model(data)

In [None]:
y=np.array(traffic_data[node_id])
y_pred=Y_pred[node_mapping[node_id],:]

In [None]:
from matplotlib.pyplot import figure
figure(figsize=(20, 30), dpi=80)

In [None]:
plot_graph(y,y_pred)

### Saving the model

In [None]:
torch.save(model,"mcs212138_task1-1.model")