Author: **Mathis Konarski** </br>
Date: **21/06/2022**

This notebook implement an image based model on NYC bike and NYC taxi data. </br>
The model idea is based on *Revisiting Spatial-Temporal Similarity: A Deep Learning Framework for Traffic Prediction* by H. Yao, X. Tang, H. Wei, G. Zheng and Z. Li. </br>
However there is implementation differences and this model is not including an attention layer.

In [None]:
import numpy as np
import pandas as pd
from tqdm import tqdm
import matplotlib.pyplot as plt

from torch import nn
import torch

import Training_functions as t_func

In [None]:
GRID_SIZE = (10, 20)
TRAIN_PERIOD = 70*24*2 # TIME_PERIOD
BATCH_SIZE = 4
WINDOW_SIZE = 3*24*2 # TIME_PERIOD
MIN_VOL_METRICS = 10
MIN_FLOW = 0

# Visualization volume

In [None]:
def volume_show(period):
    train_period_df = train_df[train_df.period==period]
    start = np.zeros((20,10))
    stop = np.zeros((20,10))
    for i in train_period_df.lat.unique():
        lat_period_df = train_period_df[train_period_df.lat==i]
        for j in lat_period_df.lon.unique():
            start[j-1,i-1] = lat_period_df[lat_period_df.lon==j].volume_start
            stop[j-1,i-1] = lat_period_df[lat_period_df.lon==j].volume_stop

    f, (ax1, ax2) = plt.subplots(1, 2)
    ax1.axis('off')
    ax2.axis('off')
    ax1.matshow(start, vmin=0, vmax=350, cmap='gist_stern', origin='lower')
    ax2.matshow(stop, vmin=0, vmax=350, cmap='gist_stern', origin='lower')

# Load Dataset

In [None]:
data_df = pd.read_csv("Datasets/ytaxi_prepared.csv", index_col=0)
data_flow_np = t_func.flow_data(data_df, GRID_SIZE, MIN_FLOW)
data_vol_np = t_func.volume_data(data_df)

In [None]:
class TimeSeriesDataset(torch.utils.data.Dataset):
    '''
    Custome pyTorch datasets in order to handle time series data with flow and volume information
    
    Parameters
    ----------
    data_vol_ten : torch tensor with start and end volume for each period and area
    data_flow_lst : list of flow graphs
    window : length of previous data considered for LSTM
    norm_y : maximal volume value for the training set
    '''
    def __init__(self, data_vol_ten, data_flow_ten, window, norm_y):
        self.data_vol = data_vol_ten
        self.data_flow = data_flow_ten
        self.window = window
        self.norm_y = norm_y
        self.shape = self.__getshape__()
        self.size = self.__getsize__()
 
    def __getitem__(self, index):
        index = index
        v = self.data_vol[index:index+self.window]
        f = self.data_flow[index:index+self.window]
        y = self.data_vol[index+1:index+self.window+1]/self.norm_y
        return v, f, y
 
    def __len__(self):
        return len(self.data_vol) -  self.window
    
    def __getshape__(self):
        return (self.__len__(), *self.__getitem__(0)[0].shape)
    
    def __getsize__(self):
        return (self.__len__())

def create_dataloader(flow_np, vol_np, train_period, batch_size, window_size):
    '''
    Create pyTorch DataLoader for train and test data based on TimeSeriesDataset
    
    Parameters
    ----------
    flow_np : flow informations based on flow_data function
    vol_np : volume informations based on volume_data function
    train_period : length of the training period
    batch_size
    window_size : length of previous data considered for LSTM
    
    Returns
    -------
    train_dataloader pytorch_geometic.loader.DataLoader
    test_dataloader pytorch_geometic.loader.DataLoader
    norm_y : maximal volume value for the training set
    '''
    norm_y = vol_np[:train_period].max()
    train_dataset = TimeSeriesDataset(torch.Tensor(vol_np[:train_period]),
                                      torch.tensor(flow_np[:train_period], dtype=torch.uint8),
                                      window_size, norm_y)
    train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

    test_dataset = TimeSeriesDataset(torch.Tensor(vol_np[train_period-window_size:]),
                                     torch.tensor(flow_np[train_period-window_size:], dtype=torch.uint8),
                                     window_size, norm_y)
    test_dataloader = torch.utils.data.DataLoader(test_dataset)
    return train_dataloader, test_dataloader, norm_y

In [None]:
train_loader, test_loader, norm_y = create_dataloader(data_flow_np, data_vol_np, TRAIN_PERIOD,
                                                      BATCH_SIZE, WINDOW_SIZE)

# PyTorch

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"

In [None]:
class YaoNeuralNetwork(nn.Module):
    def __init__(self, norm):
        super(YaoNeuralNetwork, self).__init__()
        self.dropout_rate = 0.5 
        self.conv_vol_size = 2
        self.in_flow_size = 2
        self.conv_flow_size = 2
        self.out_flow_vol_size = 4
        self.lstm_input_size = 4
        self.lstm_output_size = 32
        self.norm = norm
        
        self.norm_vol = nn.BatchNorm3d(WINDOW_SIZE)
        self.conv_vol = nn.Sequential(
            nn.Conv3d(2, self.conv_vol_size, (1,3,3)), nn.ReLU(),
            nn.Conv3d(self.conv_vol_size, self.conv_vol_size, (1,3,3)), nn.ReLU(),
            nn.Conv3d(self.conv_vol_size, self.conv_vol_size, (1,3,3)), nn.ReLU() )
        self.dense_vol = nn.Sequential(
            nn.Flatten(2), nn.Dropout(self.dropout_rate), nn.Linear(self.conv_vol_size*4*14, self.out_flow_vol_size), nn.ReLU() )
        
        self.norm_flow = nn.Sequential(
            nn.Flatten(4), nn.Linear(200, self.in_flow_size), nn.ReLU(), nn.BatchNorm3d(WINDOW_SIZE) )
        self.conv_flow = nn.Sequential(
            nn.Conv3d(self.in_flow_size, self.conv_flow_size, (1,3,3)), nn.ReLU(),
            nn.Conv3d(self.conv_flow_size, self.conv_flow_size, (1,3,3)), nn.ReLU(),
            nn.Conv3d(self.conv_flow_size, self.conv_flow_size, (1,3,3)), nn.ReLU() )
        self.dense_flow = nn.Sequential(
            nn.Flatten(2), nn.Dropout(self.dropout_rate), nn.Linear(self.conv_flow_size*56, self.out_flow_vol_size), nn.ReLU() )
        
        self.pre_lstm = nn.Sequential(
            nn.Dropout(0.5), nn.Linear(self.out_flow_vol_size, self.lstm_input_size), nn.ReLU() )
        self.lstm = nn.LSTM(input_size=self.lstm_input_size, hidden_size=self.lstm_output_size, num_layers=1, dropout=0, batch_first=True)
        self.end = nn.Sequential(
            nn.Dropout(self.dropout_rate), nn.Linear(self.lstm_output_size, 400), nn.Tanh(),
            nn.Unflatten(2, (10,20,2)) )

    def forward(self, v, f):
        v = self.norm_vol(v)
        v = self.conv_vol(v.permute(0,4,1,2,3))
        v = self.dense_vol(v.permute(0,2,3,4,1))
        f = self.norm_flow(f/self.norm)
        f = self.conv_flow(f.permute(0,4,1,2,3))
        f = self.dense_flow(f.permute(0,2,3,4,1))
        x = torch.mul(v,f)
        x = self.pre_lstm(x)
        (h0, c0) = (torch.zeros(1,x.shape[0], self.lstm_output_size), torch.zeros(1,x.shape[0], self.lstm_output_size))
        (x, (_, _)) = self.lstm(x, (h0, c0))
        logits = self.end(x)
        return logits
    
YaoModel = YaoNeuralNetwork().to(device)

In [None]:
model = YaoModel
loader_tuple = (train_loader, test_loader, norm_y)
loss_mse = torch.nn.MSELoss(reduction='none')
epochs = 100
patience=5
learning_rate = 1e-3
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate , eps=1e-7) # ADAM # RMSprop etc..

                
t_func.model_training(loader_tuple, model, loss_mse, optimizer, epochs, MIN_VOL_METRICS, patience)