In [1]:
import torch
import torch.nn as nn
import numpy as np
import torch.nn.functional as F

In [2]:
# Codes in this cell is proposed in Spatio-Temporal Graph Convolutional Networks: A Deep Learning Framework for Traffic Forecasting, Sijie Yan, Yuanjun Xiong and Dahua Lin, AAAI 2018.
# The key structure of spatio-temporal graph convolutional networks.
# sources: https://github.com/hazdzz/STGCN/models/layers.py

import math
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.nn.init as init

class Align(nn.Module):
    def __init__(self, c_in, c_out):
        super(Align, self).__init__()
        self.c_in = c_in
        self.c_out = c_out
        self.align_conv = nn.Conv2d(in_channels=c_in, out_channels=c_out, kernel_size=(1, 1))

    def forward(self, x):
        if self.c_in > self.c_out:
            x = self.align_conv(x)
        elif self.c_in < self.c_out:
            batch_size, _, timestep, n_vertex = x.shape
            x = torch.cat([x, torch.zeros([batch_size, self.c_out - self.c_in, timestep, n_vertex]).to(x)], dim=1)
        else:
            x = x
        
        return x

class CausalConv1d(nn.Conv1d):
    def __init__(self, in_channels, out_channels, kernel_size, stride=1, enable_padding=False, dilation=1, groups=1, bias=True):
        if enable_padding == True:
            self.__padding = (kernel_size - 1) * dilation
        else:
            self.__padding = 0
        super(CausalConv1d, self).__init__(in_channels, out_channels, kernel_size=kernel_size, stride=stride, padding=self.__padding, dilation=dilation, groups=groups, bias=bias)

    def forward(self, input):
        result = super(CausalConv1d, self).forward(input)
        if self.__padding != 0:
            return result[: , : , : -self.__padding]
        
        return result

class CausalConv2d(nn.Conv2d):
    def __init__(self, in_channels, out_channels, kernel_size, stride=1, enable_padding=False, dilation=1, groups=1, bias=True):
        kernel_size = nn.modules.utils._pair(kernel_size)
        stride = nn.modules.utils._pair(stride)
        dilation = nn.modules.utils._pair(dilation)
        if enable_padding == True:
            self.__padding = [int((kernel_size[i] - 1) * dilation[i]) for i in range(len(kernel_size))]
        else:
            self.__padding = 0
        self.left_padding = nn.modules.utils._pair(self.__padding)
        super(CausalConv2d, self).__init__(in_channels, out_channels, kernel_size, stride=stride, padding=0, dilation=dilation, groups=groups, bias=bias)
        
    def forward(self, input):
        if self.__padding != 0:
            input = F.pad(input, (self.left_padding[1], 0, self.left_padding[0], 0))
        result = super(CausalConv2d, self).forward(input)

        return result

class TemporalConvLayer(nn.Module):

    # Temporal Convolution Layer (GLU)
    #
    #        |--------------------------------| * Residual Connection *
    #        |                                |
    #        |    |--->--- CasualConv2d ----- + -------|       
    # -------|----|                                   ⊙ ------>
    #             |--->--- CasualConv2d --- Sigmoid ---|                               
    #
    
    #param x: tensor, [bs, c_in, ts, n_vertex]

    def __init__(self, Kt, c_in, c_out, n_vertex, act_func):
        super(TemporalConvLayer, self).__init__()
        self.Kt = Kt
        self.c_in = c_in
        self.c_out = c_out
        self.n_vertex = n_vertex
        self.align = Align(c_in, c_out)
        if act_func == 'glu' or act_func == 'gtu':
            self.causal_conv = CausalConv2d(in_channels=c_in, out_channels=2 * c_out, kernel_size=(Kt, 1), enable_padding=False, dilation=1)
        else:
            self.causal_conv = CausalConv2d(in_channels=c_in, out_channels=c_out, kernel_size=(Kt, 1), enable_padding=False, dilation=1)
        self.act_func = act_func
        self.sigmoid = nn.Sigmoid()
        self.tanh = nn.Tanh()
        self.relu = nn.ReLU()
        self.leaky_relu = nn.LeakyReLU()
        self.silu = nn.SiLU()

    def forward(self, x):   
        x_in = self.align(x)[:, :, self.Kt - 1:, :]
        x_causal_conv = self.causal_conv(x)

        if self.act_func == 'glu' or self.act_func == 'gtu':
            x_p = x_causal_conv[:, : self.c_out, :, :]
            x_q = x_causal_conv[:, -self.c_out:, :, :]

            if self.act_func == 'glu':
                # GLU was first purposed in
                # *Language Modeling with Gated Convolutional Networks*.
                # URL: https://arxiv.org/abs/1612.08083
                # Input tensor X is split by a certain dimension into tensor X_a and X_b.
                # In PyTorch, GLU is defined as X_a ⊙ Sigmoid(X_b).
                # URL: https://pytorch.org/docs/master/nn.functional.html#torch.nn.functional.glu
                # (x_p + x_in) ⊙ Sigmoid(x_q)
                x = torch.mul((x_p + x_in), self.sigmoid(x_q))

            else:
                # Tanh(x_p + x_in) ⊙ Sigmoid(x_q)
                x = torch.mul(self.tanh(x_p + x_in), self.sigmoid(x_q))

        elif self.act_func == 'relu':
            x = self.relu(x_causal_conv + x_in)
        
        elif self.act_func == 'leaky_relu':
            x = self.leaky_relu(x_causal_conv + x_in)

        elif self.act_func == 'silu':
            x = self.silu(x_causal_conv + x_in)
        
        else:
            raise NotImplementedError(f'ERROR: The activation function {self.act_func} is not implemented.')
        
        return x

class ChebGraphConv(nn.Module):
    def __init__(self, c_in, c_out, Ks, gso, bias):
        super(ChebGraphConv, self).__init__()
        self.c_in = c_in
        self.c_out = c_out
        self.Ks = Ks
        self.gso = gso
        self.weight = nn.Parameter(torch.FloatTensor(Ks, c_in, c_out))
        if bias:
            self.bias = nn.Parameter(torch.FloatTensor(c_out))
        else:
            self.register_parameter('bias', None)
        self.reset_parameters()

    def reset_parameters(self):
        init.kaiming_uniform_(self.weight, a=math.sqrt(5))
        if self.bias is not None:
            fan_in, _ = init._calculate_fan_in_and_fan_out(self.weight)
            bound = 1 / math.sqrt(fan_in) if fan_in > 0 else 0
            init.uniform_(self.bias, -bound, bound)
    
    def forward(self, x):
        #bs, c_in, ts, n_vertex = x.shape
        x = torch.permute(x, (0, 2, 3, 1))

        if self.Ks - 1 < 0:
            raise ValueError(f'ERROR: the graph convolution kernel size Ks has to be a positive integer, but received {self.Ks}.')  
        elif self.Ks - 1 == 0:
            x_0 = x
            x_list = [x_0]
        elif self.Ks - 1 == 1:
            x_0 = x
            x_1 = torch.einsum('hi,btij->bthj', self.gso, x)
            x_list = [x_0, x_1]
        elif self.Ks - 1 >= 2:
            x_0 = x
            x_1 = torch.einsum('hi,btij->bthj', self.gso, x)
            x_list = [x_0, x_1]
            for k in range(2, self.Ks):
                x_list.append(torch.einsum('hi,btij->bthj', 2 * self.gso, x_list[k - 1]) - x_list[k - 2])
        
        x = torch.stack(x_list, dim=2)

        cheb_graph_conv = torch.einsum('btkhi,kij->bthj', x, self.weight)

        if self.bias is not None:
            cheb_graph_conv = torch.add(cheb_graph_conv, self.bias)
        else:
            cheb_graph_conv = cheb_graph_conv
        
        return cheb_graph_conv

class GraphConv(nn.Module):
    def __init__(self, c_in, c_out, gso, bias):
        super(GraphConv, self).__init__()
        self.c_in = c_in
        self.c_out = c_out
        self.gso = gso
        self.weight = nn.Parameter(torch.FloatTensor(c_in, c_out))
        if bias:
            self.bias = nn.Parameter(torch.FloatTensor(c_out))
        else:
            self.register_parameter('bias', None)
        self.reset_parameters()

    def reset_parameters(self):
        init.kaiming_uniform_(self.weight, a=math.sqrt(5))
        if self.bias is not None:
            fan_in, _ = init._calculate_fan_in_and_fan_out(self.weight)
            bound = 1 / math.sqrt(fan_in) if fan_in > 0 else 0
            init.uniform_(self.bias, -bound, bound)

    def forward(self, x):
        #bs, c_in, ts, n_vertex = x.shape
        x = torch.permute(x, (0, 2, 3, 1))

        first_mul = torch.einsum('hi,btij->bthj', self.gso, x)
        second_mul = torch.einsum('bthi,ij->bthj', first_mul, self.weight)

        if self.bias is not None:
            graph_conv = torch.add(second_mul, self.bias)
        else:
            graph_conv = second_mul
        
        return graph_conv

class GraphConvLayer(nn.Module):
    def __init__(self, graph_conv_type, c_in, c_out, Ks, gso, bias):
        super(GraphConvLayer, self).__init__()
        self.graph_conv_type = graph_conv_type
        self.c_in = c_in
        self.c_out = c_out
        self.align = Align(c_in, c_out)
        self.Ks = Ks
        self.gso = gso
        if self.graph_conv_type == 'cheb_graph_conv':
            self.cheb_graph_conv = ChebGraphConv(c_out, c_out, Ks, gso, bias)
        elif self.graph_conv_type == 'graph_conv':
            self.graph_conv = GraphConv(c_out, c_out, gso, bias)

    def forward(self, x):
        x_gc_in = self.align(x)
        if self.graph_conv_type == 'cheb_graph_conv':
            x_gc = self.cheb_graph_conv(x_gc_in)
        elif self.graph_conv_type == 'graph_conv':
            x_gc = self.graph_conv(x_gc_in)
        x_gc = x_gc.permute(0, 3, 1, 2)
        x_gc_out = torch.add(x_gc, x_gc_in)

        return x_gc_out

class STConvBlock(nn.Module):
    # STConv Block contains 'TGTND' structure
    # T: Gated Temporal Convolution Layer (GLU or GTU)
    # G: Graph Convolution Layer (ChebGraphConv or GraphConv)
    # T: Gated Temporal Convolution Layer (GLU or GTU)
    # N: Layer Normolization
    # D: Dropout

    def __init__(self, Kt, Ks, n_vertex, last_block_channel, channels, act_func, graph_conv_type, gso, bias, droprate):
        super(STConvBlock, self).__init__()
        self.tmp_conv1 = TemporalConvLayer(Kt, last_block_channel, channels[0], n_vertex, act_func)
        self.graph_conv = GraphConvLayer(graph_conv_type, channels[0], channels[1], Ks, gso, bias)
        self.tmp_conv2 = TemporalConvLayer(Kt, channels[1], channels[2], n_vertex, act_func)
        self.tc2_ln = nn.LayerNorm([n_vertex, channels[2]])
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(p=droprate)

    def forward(self, x):
        x = self.tmp_conv1(x)
        x = self.graph_conv(x)
        x = self.relu(x)
        x = self.tmp_conv2(x)
        x = self.tc2_ln(x.permute(0, 2, 3, 1)).permute(0, 3, 1, 2)
        x = self.dropout(x)

        return x

class OutputBlock(nn.Module):
    # Output block contains 'TNFF' structure
    # T: Gated Temporal Convolution Layer (GLU or GTU)
    # N: Layer Normolization
    # F: Fully-Connected Layer
    # F: Fully-Connected Layer

    def __init__(self, Ko, last_block_channel, channels, end_channel, n_vertex, act_func, bias, droprate):
        super(OutputBlock, self).__init__()
        self.tmp_conv1 = TemporalConvLayer(Ko, last_block_channel, channels[0], n_vertex, act_func)
        self.fc1 = nn.Linear(in_features=channels[0], out_features=channels[1], bias=bias)
        self.fc2 = nn.Linear(in_features=channels[1], out_features=end_channel, bias=bias)
        self.tc1_ln = nn.LayerNorm([n_vertex, channels[0]])
        self.relu = nn.ReLU()
        self.leaky_relu = nn.LeakyReLU()
        self.silu = nn.SiLU()
        self.dropout = nn.Dropout(p=droprate)

    def forward(self, x):
        x = self.tmp_conv1(x)
        x = self.tc1_ln(x.permute(0, 2, 3, 1))
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x).permute(0, 3, 1, 2)

        return x

In [3]:
def time2vec(x, f, w0, b0, w, b):
    # One Non-periodic feature
    v1 = torch.matmul(x, w0) + b0
    # k-1 periodic features
    v2 = f(torch.matmul(x, w) + b)
    return torch.cat([v1, v2], dim=2)

class Time2Vec(nn.Module):
    '''
    Using sine from time point to vector, which consists of 2 parts:
    1. periodic 2.non-periodic
    x: bz, days, stations
    :return bz, stations, days, embeding_dim(out_features)
    '''
    def __init__(self, in_features, out_features, bz, stations,days):
        super(Time2Vec, self).__init__()
        self.w0 = nn.parameter.Parameter(torch.randn(bz, days, in_features))
        self.b0 = nn.parameter.Parameter(torch.randn(bz, stations, in_features))
        self.w = nn.parameter.Parameter(torch.randn(bz, days, out_features - 1))
        self.b = nn.parameter.Parameter(torch.randn(bz, stations, out_features - 1))
        self.f = torch.sin

    def forward(self, x):
        return time2vec(x, self.f, self.w0, self.b0, self.w, self.b)


class Transformer(nn.Module):
    '''
    use Transformer to model the same stop in different days
    :x bz, days, stations
    '''
    def __init__(self, time2vector, in_features, out_features, bz, stations, days):
        super(Transformer, self).__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.bz = bz
        self.stations = stations
        self.embedding = nn.Linear(days, out_features)
        self.T2V = time2vector(in_features, out_features, bz, stations, days)
        self.positional_embedding = self.T2V
        self.encoder = nn.TransformerEncoderLayer(d_model=out_features, nhead=8, dropout=0.1)
        self.decoder = nn.TransformerEncoder(self.encoder, num_layers=3)
        self.linear = nn.Linear(out_features, out_features*2)
        nn.init.kaiming_normal_(self.linear.weight, mode='fan_in')

    def forward(self, x):
        # X [bz, days, stations]
        x = x.permute(0, 2, 1)
        x_input = self.embedding(x)                             # bz, stations, 29->d_models(out_features)
        x_pos = self.positional_embedding(x)                    # bz, stations, out_features
        x_time = x_input + x_pos                                # bz, stations, out_features
        x_out = torch.zeros_like(x_time)
        for i in range(self.stations):                          # self.stations = x_time.shape[1]
            x_feature = self.decoder(x_time[:, i, :])           # bz, stations, out_features
            x_out[:, i, :] = x_feature
        return self.linear(x_out)                               # bz, stations, out_features*2


class LSTM(nn.Module):
    def __init__(self, hidden_dim, out_dim, days):
        super(LSTM, self).__init__()
        self.hidden_dim = hidden_dim
        self.out_dim = out_dim
        assert self.hidden_dim == self.out_dim
        self.lstm = nn.LSTM(self.hidden_dim, self.out_dim, num_layers=3, batch_first=True)
        self.linear = nn.Linear(self.out_dim, 1)
        nn.init.kaiming_normal_(self.linear.weight, mode='fan_in')

    def forward(self, x):
        teacher_forcing_ratio = 0.5
        input = x[:, 0, :]
        out = torch.zeros_like(x)
        for i in range(x.shape[1]):
            out[:, i, :], _ = self.lstm(input)
            input = out[:, i, :] + (x[:, i, :] - out[:, i, :]) * teacher_forcing_ratio
        output = self.linear(out)
        return output


class CausalConv1d(torch.nn.Conv1d):
    def __init__(self,
                 in_channels,
                 out_channels,
                 kernel_size,
                 stride=1,
                 dilation=1,
                 groups=1,
                 bias=True):

        super(CausalConv1d, self).__init__(
            in_channels,
            out_channels,
            kernel_size=kernel_size,
            stride=stride,
            padding=0,
            dilation=dilation,
            groups=groups,
            bias=bias)

        self.__padding = (kernel_size - 1) * dilation

    def forward(self, input):
        return super(CausalConv1d, self).forward(F.pad(input, (self.__padding, 0)))


class TemperalModel(nn.Module):
    def __init__(self, Transformer, time2vector, LSTM, bz, stations, days, in_features, out_features, hidden_dim, out_dim):
        super(TemperalModel, self).__init__()
        # self.T = Transformer(time2vector, in_features, out_features, bz, stations, days)
        self.embedding = nn.Linear(days, out_features)
        self.T = time2vector(in_features, out_features, bz, stations, days)
        self.L = LSTM(hidden_dim, out_dim, days)
        self.layer_norm = nn.LayerNorm(out_features)

    def count_parameters(self):
        T_params = sum(p.numel() for p in self.T.parameters() if p.requires_grad)
        L_params = sum(p.numel() for p in self.L.parameters() if p.requires_grad)
        return T_params+L_params

    def forward(self, x):
        x = x.permute(0, 2, 1)
        x = self.T(x)              # bz, stations, out_features
        x = self.layer_norm(x)
        x = self.L(x)
        return x

In [34]:
import torch
import torch.utils as utils
import torch.nn as nn
import numpy as np
import os
import random
import tqdm
import torch.optim as optim
import argparse
import json
from torch.utils.tensorboard import SummaryWriter


layout = {
    'PLOT': {
        "loss": ["Multiline", ['Loss/train', 'Loss/val']],
        "Metrics": ["Multiline", ['Metrics/mae', 'Metrics/lr']],
    },
}
writer = SummaryWriter()
writer.add_custom_scalars(layout)

class EarlyStopping:
    """Early stops the training if validation loss doesn't improve after a given patience."""
    def __init__(self, patience=30, verbose=False, delta=0, path="./checkpoint/A-best_model.pth", trace_func=print):
        """
        Args:
            patience (int): How long to wait after last time validation loss improved.
                            Default: 30
            verbose (bool): If True, prints a message for each validation loss improvement.
                            Default: False
            delta (float): Minimum change in the monitored quantity to qualify as an improvement.
                            Default: 0
            path (str): Path for the checkpoint to be saved to.
                            Default: 'checkpoint.pt'
            trace_func (function): trace print function.
                            Default: print
        """
        self.patience = patience
        self.verbose = verbose
        self.counter = 0
        self.best_score = None
        self.early_stop = False
        self.val_loss_min = np.Inf
        self.delta = delta
        self.path = path
        self.trace_func = trace_func

    def __call__(self, val_loss, model):

        score = val_loss

        if self.best_score is None:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
        elif score > self.best_score + self.delta:
            self.counter += 1
            self.trace_func(f'Early stopping counter: {self.counter} out of {self.patience}')
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
            self.counter = 0

    def save_checkpoint(self, val_loss, model):
        '''Saves model when validation loss decrease.'''
        if self.verbose:
            self.trace_func(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}).  Saving model ...')
        torch.save(model.state_dict(), self.path)
        self.val_loss_min = val_loss
    
def read_data(filename):
    """
    Read data from a file.
    """
    with open(filename, 'r') as f:
        data = f.readlines()
    return data

def z_score(x):
    '''
    Z-score normalization function: $z = (X - \mu) / \sigma $,
    where $\mu$ and $\sigma$ are the mean and standard deviation of the data.
    '''
    mean, std = torch.std_mean(x)
    x_out = (x - mean) / std
    zero_value = torch.min(x_out)
    return x_out, mean, std, zero_value

def convert_str_to_list(line):
    """
    Convert string to list.
    """
    return np.array(json.loads(line))

def generate_data(datafile, max_len=50):
    """
    Generate data. txt --> list --> tensor(padding to max_len)
    return: [len(data), 30, max_len]
    """
    data = read_data(datafile)
    data_input = torch.zeros(len(data), 30, max_len)
    num = 0
    for line in data:
        line = convert_str_to_list(line)
        route_len = int(len(line) / 30)
        data_line = line.reshape(30, route_len)
        data_line = torch.from_numpy(data_line)
        data_line[25, :] = (data_line[24, :] + data_line[26, :])/2
        data_line = F.pad(input=data_line, pad=(0, max_len - route_len, 0, 0), mode='constant', value=0)
        data_input[num] = data_line
        num += 1
    return data_input

def data_transform(datafile, max_len, batch_size):
    """
    produce data slices for x_data and y_data
    :param data: [len(data), 30, max_len]
    :return: x:[len(data), :29, max_len] , y:[len(data), 30, max_len]
    """
    data = generate_data(datafile, max_len)
    data, mean, std, zero_value = z_score(data)
    x = torch.zeros([len(data), 29, max_len])
    y = torch.zeros([len(data),  1, max_len])
    # mask_x = torch.zeros([len(data), 29, max_len])
    # mask_y = torch.zeros([len(data),  1, max_len])
    mask_x = torch.empty([len(data), 29, max_len])
    mask_y = torch.zeros([len(data), 1, max_len])

    for i in range(len(data)):
        x[i] = data[i][:29]   # [1,29,50]
        y[i] = data[i][29]    # [1,1,50]
        # a = torch.logical_not(x[i] == zero_value)
        mask_x[i] = torch.logical_not(x[i] == zero_value)
        mask_y[i] = torch.logical_not(y[i] == zero_value)

    # data = utils.data.TensorDataset(x.cuda(), y.cuda(), mask_x.cuda(), mask_y.cuda())
    data = utils.data.TensorDataset(x.cuda(), y.cuda())
    iter_data = utils.data.DataLoader(dataset=data, batch_size=batch_size, shuffle=True, drop_last=True)
    return iter_data, zero_value, mean, std

parser = argparse.ArgumentParser(description='BUS')
parser.add_argument('--enable_cuda', type=bool, default='True', help='enable CUDA, default as True')
parser.add_argument('--seed', type=int, default=42, help='set the random seed for stabilize experiment results')
parser.add_argument('--kt', type=int, default=3, help='kenel size of temporal convolution')
parser.add_argument('--ks', type=int, default=3, help='kenel size of spatial convolution')
parser.add_argument('--epoches', type=int, default=2000)
parser.add_argument('--step_size', type=int, default=10)
parser.add_argument('--patience', type=int, default=50, help='early stop,How long to wait after last time validation loss improved.')
parser.add_argument('--lr', type=float, default=0.001, help='learning rate')
parser.add_argument('--loss', type=str, default='mse', choices=['mse'])
parser.add_argument('--bz', type=int, default=10, help='batchsize')
parser.add_argument('--max_len', type=int, default=50, help='the max length of route')
parser.add_argument('--days', type=int, default=29)
parser.add_argument('--in_f', type=int, default=1)
parser.add_argument('--out_f', type=int, default=256)
parser.add_argument('--hidden_dim', type=int, default=256)
parser.add_argument('--out_dim', type=int, default=256)
# 从pycharm处移植代码到jupyter会出现参数错误：An exception has occurred, use %tb to see the full traceback.
parser.add_argument('-f',type=str, default="读取额外的参数")
args = parser.parse_args()

import the used dataset and environment:

In [None]:
if torch.cuda.is_available():
    device = torch.device('cuda')
    print('Using GPU')
else:
    device = torch.device('cpu')
def set_env(seed):
    os.environ['CUDA_VISIBLE_DEVICES'] = '0'
    os.environ['PYTHONHASHSEED'] = str(seed)
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.benchmark = False ## find fittest convolution
    torch.backends.cudnn.deterministic = True ## keep experiment result stable
set_env(seed=args.seed)

model = TemperalModel(Transformer, Time2Vec, LSTM, bz=args.bz, stations=args.max_len, days=args.days,
                      in_features=args.in_f, out_features=args.out_f, hidden_dim=args.hidden_dim, out_dim=args.out_dim).to(device)
print(model.count_parameters()) # 1781505

trainFile = './data_used/train.txt'
valFile = './data_used/val.txt'
testFile = './data_used/test.txt'
train_iter, zero_value1, mean1, std1 = data_transform(trainFile, max_len=args.max_len, batch_size=args.bz)
val_iter, zero_value2, mean2, std2 = data_transform(valFile, max_len=args.max_len, batch_size=args.bz)
test_iter, zero_value3, mean3, std3 = data_transform(testFile, max_len=args.max_len, batch_size=args.bz)

In [None]:
# optimizer = optim.SGD(model.parameters(), lr=args.lr, weight_decay=1e-4)
optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)
scheduler1 = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.9)
scheduler2 = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=1.2)
scheduler3 = optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.9)
scheduler4 = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=1.2)
scheduler5 = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.9)
scheduler6 = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=1.2)
scheduler7 = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.9)
scheduler8 = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=1.2)
scheduler9 = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.9)
scheduler10 = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=1.2)
scheduler11 = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.9)
loss = nn.MSELoss()

train:

In [None]:
def train(epochs, optimizer, scheduler, loss, early_stopping, model, train_iter, val_iter):
    min_val_loss = np.inf
    for epoch in range(epochs):
        model.train()
        l_sum = []
        for x, y in tqdm.tqdm(train_iter):
            # print(x.shape, y.shape,)
            y_pred = model(x).permute(0, 2, 1)
            mask = torch.logical_not(y == zero_value1)
            y_pred_without_padding = torch.masked_select(y_pred, mask)
            y_without_padding = torch.masked_select(y, mask)
            # print(y.shape, y_pred.shape)
            # print(y_pred_without_padding.shape, y_without_padding.shape)
            l = loss(y_pred_without_padding, y_without_padding)
            writer.add_scalar("Loss/train", np.mean(l.item()), epoch)
            optimizer.zero_grad()
            l.backward()
            optimizer.step()
            # print(l)
            l_sum.append(np.mean(l.item()))
        if epoch <= 100:
            scheduler1.step()
        elif epoch > 100 and epoch <= 200:
            scheduler2.step()
        else:
            scheduler3.step()
        val_loss, mae = val(model, val_iter, epoch=epoch)
        writer.add_scalar("Metrics/lr", optimizer.param_groups[0]['lr'], epoch)
        writer.add_scalar("Metrics/mae", mae, epoch)
        if val_loss < min_val_loss:
            min_val_loss = val_loss
        # early_stopping(mae, model)
        gpu_mem_alloc = torch.cuda.max_memory_allocated() / 1000000 if torch.cuda.is_available() else 0
        print('Epoch: {:03d} | Lr: {:.6f} |Train loss: {:.6f} | Val loss: {:.6f} | MAE: {:.4f} |GPU occupy: {:.6f} MiB'. \
              format(epoch + 1, optimizer.param_groups[0]['lr'], np.mean(l_sum), val_loss, mae, gpu_mem_alloc))

        torch.save(model.state_dict(), './model_mid/'+str(epoch+1)+'-'+str(mae)+'_model.pth')
        if early_stopping.early_stop:
            print("Early stopping.")
            break

    print('\nTraining finished.\n')
    
def val(model, val_loader, mean=mean2, std=std2, epoch=0):
    model.eval()
    l_sum = []
    total_y_pred, total_y = [], []
    with torch.no_grad():
        mae, sum_y, mape, mse = [], [], [], []
        for x, y in val_loader:
            y_pred = model(x).permute(0, 2, 1)
            mask = torch.logical_not(y == zero_value2)
            y_pred_without_padding = torch.masked_select(y_pred, mask).cpu()
            y_without_padding = torch.masked_select(y, mask).cpu()
            l = loss(y_pred_without_padding, y_without_padding)
            l_sum.append(np.mean(l.item()))
            writer.add_scalar("Loss/val", np.mean(l.item()), epoch)
            ### score
            y_without_padding = mean + std * y_without_padding
            y_pred_without_padding = mean + std * y_pred_without_padding
            d = np.abs(y_without_padding - y_pred_without_padding)/60
            mae += d.tolist()
            sum_y += y_without_padding.tolist()
            mape += (d / y_without_padding).tolist()
            mse += (d ** 2).tolist()
        MAE = np.array(mae).mean()
        RMSE = np.sqrt(np.array(mse).mean())
        WMAPE = np.sum(np.array(mae)) / np.sum(np.array(sum_y))
        return np.mean(l_sum), MAE

def test(model_save_path, model, test_loader, score):
    model.load_state_dict(torch.load(model_save_path))

In [None]:
set_env(args.seed)
train(args.epoches, optimizer, scheduler1, loss, EarlyStopping(patience=args.patience), model, train_iter, val_iter)
writer.flush()

KNN train:

In [None]:
from sklearn.model_selection import train_test_split, KFold

def euclidean_dist(o1, o2):
    '''
    Calculate the euclidean distance between two objects
    :param o1: [[x1,y1],[x2,y2],[x3,y3]...]
    :param o2:
    :return:
    '''
    dist = []
    for i in range(len(o1)):
        x1, y1 = o1[i][0], o1[i][1]
        x2, y2 = o2[i][0], o2[i][1]
        dist.append(np.sqrt((x1 - x2) ** 2 + (y1 - y2) ** 2))
    return np.mean(dist)


def knn_model(data, k, order, trip):
    data_c = data.copy()
    data_c.coord = data_c.coord.apply(lambda x: x[:order])
    # print(len(data.coord[0]))
    dist_all = []
    for i in range(len(data_c)):
        dist = euclidean_dist(data_c.coord[i], trip)
        dist_all.append(dist)
    dist_top_k = np.argsort(dist_all)[:k]
    # print(dist_top_k)
    return dist_top_k


def knn_predict(data, dist_top_k, order):
    predict_candidates = []
    for i in dist_top_k:
        predict_candidates.append(data.coord[i][order][1])
    return np.mean(predict_candidates)


l = [380,
 200,
 280,
 470,
 2890,
 780,
 750,
 560,
 560,
 320,
 280,
 240,
 1460,
 460,
 690,
 380,
 570,
 320,
 360,
 620,
 440,
 510,
 1330,
 1270,
 1320,
 550,
 350,
 380,
 410,
 340,
 640,
 220,
 360,
 410,
 780,
 140,
 70]
l = [0]+[int(i/10) for i in l]
l = np.cumsum(l)

data = pd.read_csv('./data_used/knn_dataset/train.csv')
data['coord'] = data['coord'].apply(eval)
train, test = train_test_split(data, test_size=0.1, random_state=42)
kf5 = KFold(n_splits=5, shuffle=False)

for train_index, test_index in kf5.split(data):
    train, test = data.iloc[train_index], data.iloc[test_index]
train, test = train.reset_index(drop=True), test.reset_index(drop=True)
predict_list = []
error_list = []
truth_list = []
for sample in tqdm.tqdm(range(len(test))):
    predict_sample = []
    error_sample = []
    truth_sample = []
    for i in range(2, len(l)):
        trip = test['coord'][sample]
        dist_top_k = knn_model(train, k=25, order=4*i, trip=trip[:4*i])
        predict = knn_predict(train, dist_top_k, 4*i)
        predict_sample.append(predict)
        error = np.abs(trip[4*i][1] - predict)/60
        error_sample.append(error)
        truth_sample.append(trip[4*i][1])
    predict_list.append(predict_sample)
    error_list.append(error_sample)
    truth_list.append(truth_sample)

with open('./result/knn-predict.txt', 'wb') as handle:
    pickle.dump(predict_list, handle, protocol=pickle.HIGHEST_PROTOCOL)
with open('./result/knn-mae.txt', 'wb') as handle:
    pickle.dump(error_list, handle, protocol=pickle.HIGHEST_PROTOCOL)
with open('./result/knn-truth.txt', 'wb') as handle:
    pickle.dump(truth_list, handle, protocol=pickle.HIGHEST_PROTOCOL)
    
error_list = np.array(error_list)
error_list = np.mean(error_list, axis=0)
# plot
plt.stem(error_list, linefmt='b-', markerfmt='bo', basefmt='r-')
plt.show()