In [149]:
import torch
import random
import time
import numpy as np
import pandas as pd
from torch.autograd import Variable
from torch.utils.data import DataLoader
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset
from torch_geometric.data import Data, InMemoryDataset
import torch.optim as optim
from torch.optim.lr_scheduler import ReduceLROnPlateau
import pickle
import glob
import math
import pandas as pd
import collections
import random
from torch_geometric.nn import GCNConv
from torch_geometric.data import Batch

## Read demand data

In [150]:
def data_reader(filename):
    f = open(filename,'rb')
    data = pickle.load(f)
    return data

In [151]:
filename = glob.glob('../data/demand/*')
all_data = {}
node_feature = []
for i in filename:
    num = i.split('_')[1]
    data = data_reader(i)
    all_data[int(num)] = data
    node_feature.append(data)
sorted_data = collections.OrderedDict(sorted(all_data.items()))
node_feature = np.array(node_feature)

In [152]:
node_feature.shape

(262, 35064)

In [153]:
all_key = list(sorted_data.keys())

In [154]:
oldkey2newkey = {all_key[i] : i for i in range(len(all_key))}

## Read graph structure¶

In [155]:
file = open('../data/proxList','rb')
data = pickle.load(file)

In [156]:
edge_index = [[], []]
for item in data:
    edge_index[0].append(item[0])
    edge_index[1].append(item[1])

In [157]:
initial_feature = np.expand_dims(node_feature.mean(1), axis=0)

In [158]:
initial_feature.shape

(1, 262)

## Clean graph

In [159]:
new_graph = [[], []]
for i in range(len(edge_index[0])):
    if edge_index[0][i] in all_key and edge_index[1][i] in all_key:
        new_graph[0].append(oldkey2newkey[edge_index[0][i]])
        new_graph[1].append(oldkey2newkey[edge_index[1][i]])

In [160]:
edge_index = torch.tensor(new_graph, dtype=torch.long)
node_feature = torch.tensor(node_feature, dtype=torch.float32)

## Hyperparameters

In [161]:
class Args():
    def __init__(self):
        self.split_ratio = [0.7, 0.2, 0.1]
        self.hist_length = 24 
        self.pred_length = 12
        self.batch_size = 1
        self.num_node_features = 1
        self.gcn_hidden_dim = 16
        self.encoder_hidden_dim = 16
        self.decoder_hidden_dim = 16
        self.zone_num = 1
        # self.device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
        self.device = torch.device("cpu")
        self.epoch_num = 30
args = Args()

## Dataset

In [162]:
class GraphDataset(Dataset):
    def __init__(self, node_feature, hist_length, pred_length):
        self.node_feature = node_feature
        self.total_len = hist_length + pred_length
        self.hist_len = hist_length
        self.pred_len = pred_length
        
    def __len__(self):
        return int(self.node_feature.shape[1] - self.total_len)
    
    def __getitem__(self, index):
        return (self.node_feature[:, index:index + self.hist_len], 
            self.node_feature[:, index + self.hist_len: index + self.total_len])

In [163]:
total_len = node_feature.shape[1]
train_st = 0
train_ed = math.floor(total_len * args.split_ratio[0])
valid_st = math.floor(total_len * args.split_ratio[0])
valid_ed = math.floor(total_len * (args.split_ratio[0] + args.split_ratio[1]))
test_st = math.floor(total_len * (args.split_ratio[0] + args.split_ratio[1]))
test_ed = total_len
train_dataset = GraphDataset(node_feature[:, train_st:train_ed], args.hist_length, args.pred_length)
val_dataset = GraphDataset(node_feature[:, valid_st:valid_ed], args.hist_length, args.pred_length)
test_dataset = GraphDataset(node_feature[:, test_st:test_ed], args.hist_length, args.pred_length)

In [164]:
train_loader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=args.batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=args.batch_size, shuffle=False)

In [165]:
len(train_dataset)

24508

In [166]:
train_dataset[24510][0].shape

torch.Size([262, 24])

## Model

In [167]:
class GCN(torch.nn.Module):
    def __init__(self, args):
        super().__init__()
        self.conv1 = GCNConv(args.num_node_features, args.gcn_hidden_dim)
        self.conv2 = GCNConv(args.gcn_hidden_dim, args.gcn_hidden_dim)

    def forward(self, node_feature, edge_index):
        x = self.conv1(node_feature, edge_index)
        x = F.relu(x)
        x = F.dropout(x, training=self.training)
        x = self.conv2(x, edge_index)
        return x

In [168]:
class GRU_Encoder(torch.nn.Module):
    def __init__(self, args):
        super().__init__()
        self.feature_extractor = GCN(args)
        self.gru1 = nn.GRU(input_size=args.gcn_hidden_dim, hidden_size=args.encoder_hidden_dim)
        self.gru2 = nn.GRU(input_size=args.encoder_hidden_dim, hidden_size=args.encoder_hidden_dim)
        
    def forward(self, batchx, edge_index):
        GCN_output = []
        print(batchx.shape, edge_index.shape)
        for i in range(batchx[0].shape[1]):
            gcn_feature = self.feature_extractor(torch.unsqueeze(batchx[0][:, i], 1), edge_index)
            GCN_output.append(gcn_feature)
        
        out = torch.stack(GCN_output)
        out, h1 = self.gru1(out)
        out, h2 = self.gru2(out)
        return h1, h2

In [169]:
class GRU_Decoder(torch.nn.Module):
    def __init__(self, args):
        super().__init__()
        self.gcn = GCN(args)
        self.gru1 = nn.GRUCell(input_size=args.gcn_hidden_dim, hidden_size=args.decoder_hidden_dim)
        self.gru2 = nn.GRUCell(input_size=args.decoder_hidden_dim, hidden_size=args.decoder_hidden_dim)
        self.pred = nn.Linear(args.decoder_hidden_dim, args.zone_num)
    
    def forward(self, input_graph, edge_index, h1, h2):
        print('- 1111111111', self.gcn, input_graph.shape, edge_index.shape)
        graph_embed = self.gcn(input_graph, edge_index)
        print('- 2222222222')
        h1 = self.gru1(graph_embed, h1)
        h2 = self.gru2(torch.squeeze(h1), h2)
        print('- 3333333333')
        out_graph = self.pred(torch.squeeze(h2))
        return (out_graph, h1, h2)

In [170]:
class Graph_Seq2Seq(torch.nn.Module):
    def __init__(self, args, initial_feature):
        super().__init__()
        self.initial_feature = initial_feature
        self.encoder = GRU_Encoder(args)
        self.decoder = GRU_Decoder(args)
        
    def forward(self, input_seq, edge_index, pred_len):
        graph_li = []
        print(111111111)
        h1, h2 = self.encoder(input_seq, edge_index)
        print(222222222)
        graph = self.initial_feature
        for i in range(pred_len):
            print(333333333, 'graph=', graph.shape, edge_index.shape, h1.shape, h2.shape)
            out_graph, h1, h2 = self.decoder(graph, edge_index, torch.squeeze(h1), torch.squeeze(h2))
            graph_li.append(out_graph)
        print(444444444)
        pred = torch.stack(graph_li)
        return pred

In [171]:
args.device

device(type='cpu')

In [172]:
def train(model, criterion, optimizer, edge_index, args):
    model.train()
    eval_loss = []
    edge_index = edge_index.to(args.device)
    for batchIdx, (batchx, batchy) in enumerate(train_loader):
        optimizer.zero_grad()
        batchx = batchx.to(args.device)
        batchy = batchy.to(args.device)
        predictions = model(batchx, edge_index, args.pred_length)
        loss = criterion(predictions, batchy.permute(2, 1, 0))
        loss.backward()
        # torch.nn.utils.clip_grad_norm_(model.parameters(), 2)
        optimizer.step()
        eval_loss.append(loss.detach().numpy())
        end = time.time()
        print('train loss:', loss)
    return np.mean(eval_loss)

In [173]:
def valid(model, criterion, edge_index, args):
    model.eval()
    eval_loss = []
    edge_index = edge_index.to(args.device)
    for batchIdx, (batchx, batchy) in enumerate(train_loader):
        optimizer.zero_grad()
        batchx = batchx.to(args.device)
        batchy = batchy.to(args.device)
        predictions = model(batchx, edge_index, args.pred_length)
        loss = criterion(predictions, batchy.permute(2, 1, 0))
        eval_loss.append(loss.detach().numpy())
        end = time.time()
    return np.mean(eval_loss)

In [174]:
model = Graph_Seq2Seq(args, initial_feature).to(args.device)
criterion = F.mse_loss
optimizer = optim.Adam(model.parameters(), amsgrad=True)

for i in range(args.epoch_num):
    train_loss = train(model, criterion, optimizer, edge_index, args)
    print('----------------------------- epoch {} train loss {} ------------------------------'.format(i, train_loss))
    eval_loss = valid(model, criterion, edge_index, args)
    print('----------------------------- epoch {} eval loss {} ------------------------------'.format(i, eval_loss))

111111111
torch.Size([1, 262, 24]) torch.Size([2, 1310])
222222222
333333333 graph= (1, 262) torch.Size([2, 1310]) torch.Size([1, 262, 16]) torch.Size([1, 262, 16])
- 1111111111 GCN(
  (conv1): GCNConv(1, 16)
  (conv2): GCNConv(16, 16)
) (1, 262) torch.Size([2, 1310])


TypeError: 'int' object is not callable