In [26]:
import pandas as pd
import numpy as np
import pickle
import torch
from torch_geometric.data import Batch

from bokeh.plotting import figure
from bokeh.io import show, output_notebook
from bokeh.models.formatters import DatetimeTickFormatter

model_path = './models/'
data_path = './data/'

In [2]:
import torch.nn.functional as F
from torch_geometric.nn import GCNConv

class GraphConv(torch.nn.Module):
    def __init__(self, dim_in, dim_out):
        super(GraphConv, self).__init__()
        
        self.dim_in = dim_in
        self.dim_out = dim_out

        # model definition
        self.conv = GCNConv(dim_in, dim_out, cached=False)
   
    def forward(self, data):
        x, edge_index, edge_weight = data.x, data.edge_index, data.edge_weight
        
        conv_feat = self.conv(x, edge_index, edge_weight=edge_weight)
        conv_feat_act = F.relu(conv_feat)
        
        return conv_feat_act

class Encoder(torch.nn.Module):
    def __init__(self, input_dim, hid_dim, n_layers):
        super().__init__()
        
        self.hid_dim = hid_dim
        self.n_layers = n_layers
        
        self.lstm = torch.nn.LSTM(input_dim, hid_dim, n_layers)
        
    def forward(self, input_tensor):
        
        # input_tensor shape = (sequence length, batch size, feature vector size)
        outputs, (hidden, cell) = self.lstm(input_tensor)
        
        return hidden, cell
    
class Decoder_Lin(torch.nn.Module):
    def __init__(self, output_dim, hid_dim, n_layers):
        super().__init__()
        
        self.output_dim = output_dim
        self.hid_dim = hid_dim
        self.n_layers = n_layers

        self.lstm = torch.nn.LSTM(output_dim, hid_dim, n_layers)
        self.lin = torch.nn.Linear(64, 2)
        
    def forward(self, input_tensor, hidden, cell):
        
        # input_tensor shape = (sequence length, batch size, feature vector size)
        output, (hidden, cell) = self.lstm(input_tensor, (hidden, cell))
        output = self.lin(output[-1, :, :])
        # output = F.relu(output)
        
        return output, hidden, cell
    
class FC(torch.nn.Module):
    def __init__(self, dim_in, dim_out):
        super(FC ,self).__init__()
        
        # now we are just picking numbers arbitrarily
        # could use some improvement
        self.fc1 = torch.nn.Linear(dim_in, 64)
        self.fc2 = torch.nn.Linear(64, 32)
        self.fc3 = torch.nn.Linear(32, 16)
        self.fc4 = torch.nn.Linear(16, dim_out)
        
    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = self.fc4(x)
        
        return x

In [3]:
def plot_results(x, target, prediction):
    p = figure(
        plot_width = 1000,
        plot_height = 600,
        x_axis_label='time',
        x_axis_type="datetime",
        y_axis_label='# of entries',
        toolbar_location="left",
        aspect_scale=100
    )
    
    p.xaxis.formatter=DatetimeTickFormatter(
        days=["%y/%m/%d"],
        months=["%y/%m"],
        hours=["%m/%d %H:00"]
    )

    # plot target
    p.circle(
        x = timestamps,
        y = target,
        size = 2,
        color='navy',
        alpha=0.5,
        legend_label='Target',
        muted_alpha=0.2
    )
    p.line(
        x = timestamps,
        y = target,
        color='navy',
        legend_label= 'Target',
        muted_alpha=0.2
    )
    
    # plot prediction
    p.circle(
        x = timestamps,
        y = prediction,
        size = 2,
        color='orange',
        alpha=0.5,
        legend_label='Prediction',
        muted_alpha=0.2
    )
    p.line(
        x = timestamps,
        y = prediction,
        color='orange',
        legend_label= 'Prediction',
        muted_alpha=0.2
    )

    p.legend.location = "top_left"
    p.legend.click_policy="mute"

    output_notebook()
    show(p)

In [8]:
def evaluate(graph_conv, encoder, fc, data_list): 
    graph_conv.eval()
    encoder.eval()
    fc.eval()
    
    # when plotting, specify the node we want to inspect, (employ the index-station-id conversion table)
    timestamps = []      # Pandas timestamps
    targets = []         # Target (ground truths), list of shape=[960, 2]
    predictions = []     # Predictions, list of shape=[960, 2]
    
    with torch.no_grad():
        # we implement the sliding window here
        for i in range(len(data_list) - WINDOW_SIZE):
            batch = Batch.from_data_list(data_list[i:i + WINDOW_SIZE])
            batch.to(device)
            y = batch.y.view(-1, NUM_NODE, 2)[-1, :, :]
            batch_weather = batch.target_weather.view(-1, NUM_WEATHER_FEATURE)[-1, :]
            # tile weather data
            weather = batch_weather.repeat(NUM_NODE)
            
            # GCN
            conv_out = graph_conv(batch)
            batch = None # release memory
            
            # LSTM encoder
            encoder_in = conv_out.view(-1, NUM_NODE, EMB_DIM)
            hidden, cell = encoder(encoder_in)
            
            # FC
            # concat the final state of the LSTM
            cell_input = torch.cat((hidden.view(-1, EMB_DIM), cell.view(-1, EMB_DIM)), 1)
            # concat the weather data to the final state
            fc_input = torch.cat((cell_input, weather.view(-1, NUM_WEATHER_FEATURE)), 1)
            # pass thru the FC layers
            fc_output = fc(fc_input)
            
            ts = pd.Timestamp('2019-01-19 23:00:00') + pd.Timedelta(pd.offsets.Hour(i + WINDOW_SIZE - 1))
            timestamps.append(ts) # timestamp for x
            targets.append(y.cpu())
            predictions.append(fc_output.cpu())
            
    targets = torch.stack(targets)
    predictions = torch.stack(predictions)
            
    return timestamps, targets, predictions
            

In [5]:
# get the test partition of the dataset
# we are using the last 40 days as test data
with open(data_path + 'test_data_list.pickle', 'rb') as openfile:
    test_data_list = pickle.load(openfile)
    
len(test_data_list)

960

In [23]:
WINDOW_SIZE = 6
EMB_DIM = 64
NUM_NODE = 960
NUM_WEATHER_FEATURE = 18

device = torch.device('cuda:0')

# Model definition
graph_conv = GraphConv(2, EMB_DIM)
encoder = Encoder(EMB_DIM, EMB_DIM, 1)
fc = FC(EMB_DIM * 2 + NUM_WEATHER_FEATURE, 2)

graph_conv.load_state_dict(torch.load(model_path + 'graph_conv.pt'))
encoder.load_state_dict(torch.load(model_path + 'encoder.pt'))
fc.load_state_dict(torch.load(model_path + 'fc.pt'))

graph_conv.to(device)
encoder.to(device)
fc.to(device)

# define loss
criterion = torch.nn.MSELoss()

timestamps, targets, predictions = evaluate(graph_conv, encoder, fc, test_data_list)

## Results

In [24]:
# inflow averge
criterion(targets[:, :, 0], predictions[:, :, 0]).item()

4.4707536697387695

In [25]:
#outflow average
criterion(targets[:, :, 1], predictions[:, :, 1]).item()

4.499856472015381

In [27]:
print(f'Inflow Loss (RMSE): {(criterion(targets[:, 0, 0], predictions[:, 0, 0]).item()):.3f}\n')
plot_results(timestamps, targets[:, 0, 0].numpy(), predictions[:, 0, 0].numpy())

Inflow Loss (RMSE): 5.160



In [28]:
print(f'Outflow Loss (RMSE): {(criterion(targets[:, 0, 1], predictions[:, 0, 1]).item()):.3f}\n')
plot_results(timestamps, targets[:, 0, 1].numpy(), predictions[:, 0, 1].numpy())

Outflow Loss (RMSE): 6.282



In [None]:
# top stations

